Browse Source
Debug failed loads
Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
main
1 changed files with
12 additions and
4 deletions
-
loader.py
|
@ -23,12 +23,20 @@ class TextLoader(PyOperator): |
|
|
file_path = file |
|
|
file_path = file |
|
|
from langchain.document_loaders import UnstructuredFileLoader |
|
|
from langchain.document_loaders import UnstructuredFileLoader |
|
|
loader = UnstructuredFileLoader(file_path, mode='single', strategy='fast') |
|
|
loader = UnstructuredFileLoader(file_path, mode='single', strategy='fast') |
|
|
doc = loader.load()[0] |
|
|
|
|
|
return doc.page_content |
|
|
|
|
|
|
|
|
data = loader.load() |
|
|
|
|
|
if len(data) > 0: |
|
|
|
|
|
doc = data[0] |
|
|
|
|
|
return doc.page_content |
|
|
|
|
|
else: |
|
|
|
|
|
raise RuntimeError(f'Failed to load data from {file}. Invalid output: {data}') |
|
|
|
|
|
|
|
|
def _from_url(self, url: str) -> str: |
|
|
def _from_url(self, url: str) -> str: |
|
|
from langchain.document_loaders import UnstructuredURLLoader |
|
|
from langchain.document_loaders import UnstructuredURLLoader |
|
|
|
|
|
|
|
|
loader = UnstructuredURLLoader(urls=[url]) |
|
|
loader = UnstructuredURLLoader(urls=[url]) |
|
|
doc = loader.load()[0] |
|
|
|
|
|
return doc.page_content |
|
|
|
|
|
|
|
|
data = loader.load() |
|
|
|
|
|
if len(data) > 0: |
|
|
|
|
|
doc = data[0] |
|
|
|
|
|
return doc.page_content |
|
|
|
|
|
else: |
|
|
|
|
|
raise RuntimeError(f'Failed to load data from {url}. Invalid output: {data}') |
|
|