logo
Browse Source

Support more file reader

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
main
Jael Gu 2 years ago
parent
commit
15f161db91
  1. 10
      loader.py

10
loader.py

@ -4,8 +4,9 @@ from typing import List, Optional
class TextLoader(PyOperator):
'''Load data from url or file (paths or file-like objects).'''
def __init__(self) -> None:
def __init__(self, **kwargs) -> None:
super().__init__()
self.unstructured_kwargs = kwargs
def __call__(self, data_src) -> List[str]:
if data_src.startswith('http'):
@ -20,9 +21,10 @@ class TextLoader(PyOperator):
file_path = file.name
else:
file_path = file
with open(file_path, encoding=encoding) as f:
text = f.read()
return text
from langchain.document_loaders import UnstructuredFileLoader
loader = UnstructuredFileLoader(file_path, mode='single', strategy='fast')
doc = loader.load()[0]
return doc.page_content
def _from_url(self, url: str) -> str:
from langchain.document_loaders import UnstructuredURLLoader

Loading…
Cancel
Save