From 6e6168c9e1876882b3a83345a261ce65b975f777 Mon Sep 17 00:00:00 2001 From: Jael Gu Date: Thu, 7 Sep 2023 13:47:55 +0800 Subject: [PATCH] Debug failed loads Signed-off-by: Jael Gu --- loader.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/loader.py b/loader.py index c38ea01..dadfb02 100644 --- a/loader.py +++ b/loader.py @@ -23,12 +23,20 @@ class TextLoader(PyOperator): file_path = file from langchain.document_loaders import UnstructuredFileLoader loader = UnstructuredFileLoader(file_path, mode='single', strategy='fast') - doc = loader.load()[0] - return doc.page_content + data = loader.load() + if len(data) > 0: + doc = data[0] + return doc.page_content + else: + raise RuntimeError(f'Failed to load data from {file}. Invalid output: {data}') def _from_url(self, url: str) -> str: from langchain.document_loaders import UnstructuredURLLoader loader = UnstructuredURLLoader(urls=[url]) - doc = loader.load()[0] - return doc.page_content \ No newline at end of file + data = loader.load() + if len(data) > 0: + doc = data[0] + return doc.page_content + else: + raise RuntimeError(f'Failed to load data from {url}. Invalid output: {data}')