Browse Source
Support more file reader
Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
main
1 changed files with
6 additions and
4 deletions
-
loader.py
|
|
@ -4,8 +4,9 @@ from typing import List, Optional |
|
|
|
|
|
|
|
class TextLoader(PyOperator): |
|
|
|
'''Load data from url or file (paths or file-like objects).''' |
|
|
|
def __init__(self) -> None: |
|
|
|
def __init__(self, **kwargs) -> None: |
|
|
|
super().__init__() |
|
|
|
self.unstructured_kwargs = kwargs |
|
|
|
|
|
|
|
def __call__(self, data_src) -> List[str]: |
|
|
|
if data_src.startswith('http'): |
|
|
@ -20,9 +21,10 @@ class TextLoader(PyOperator): |
|
|
|
file_path = file.name |
|
|
|
else: |
|
|
|
file_path = file |
|
|
|
with open(file_path, encoding=encoding) as f: |
|
|
|
text = f.read() |
|
|
|
return text |
|
|
|
from langchain.document_loaders import UnstructuredFileLoader |
|
|
|
loader = UnstructuredFileLoader(file_path, mode='single', strategy='fast') |
|
|
|
doc = loader.load()[0] |
|
|
|
return doc.page_content |
|
|
|
|
|
|
|
def _from_url(self, url: str) -> str: |
|
|
|
from langchain.document_loaders import UnstructuredURLLoader |
|
|
|