Browse Source
        
      
      Debug failed loads
      
        Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
      
      
        main
      
      
     
    
    
    
	
		
			
				 1 changed files with 
12 additions and 
4 deletions
			 
			
		 
		
			
				- 
					
					
					 
					loader.py
				
 
			
		
		
			
			
			
			
			
			
				
				
					
						
							
								
									
	
		
			
				| 
					
					
						
							
						
					
					
				 | 
				@ -23,12 +23,20 @@ class TextLoader(PyOperator): | 
			
		
		
	
		
			
				 | 
				 | 
				            file_path = file | 
				 | 
				 | 
				            file_path = file | 
			
		
		
	
		
			
				 | 
				 | 
				        from langchain.document_loaders import UnstructuredFileLoader | 
				 | 
				 | 
				        from langchain.document_loaders import UnstructuredFileLoader | 
			
		
		
	
		
			
				 | 
				 | 
				        loader = UnstructuredFileLoader(file_path, mode='single', strategy='fast') | 
				 | 
				 | 
				        loader = UnstructuredFileLoader(file_path, mode='single', strategy='fast') | 
			
		
		
	
		
			
				 | 
				 | 
				        doc = loader.load()[0] | 
				 | 
				 | 
				 | 
			
		
		
	
		
			
				 | 
				 | 
				        return doc.page_content | 
				 | 
				 | 
				 | 
			
		
		
	
		
			
				 | 
				 | 
				 | 
				 | 
				 | 
				        data = loader.load() | 
			
		
		
	
		
			
				 | 
				 | 
				 | 
				 | 
				 | 
				        if len(data) > 0: | 
			
		
		
	
		
			
				 | 
				 | 
				 | 
				 | 
				 | 
				            doc = data[0] | 
			
		
		
	
		
			
				 | 
				 | 
				 | 
				 | 
				 | 
				            return doc.page_content | 
			
		
		
	
		
			
				 | 
				 | 
				 | 
				 | 
				 | 
				        else: | 
			
		
		
	
		
			
				 | 
				 | 
				 | 
				 | 
				 | 
				            raise RuntimeError(f'Failed to load data from {file}. Invalid output: {data}') | 
			
		
		
	
		
			
				 | 
				 | 
				     | 
				 | 
				 | 
				     | 
			
		
		
	
		
			
				 | 
				 | 
				    def _from_url(self, url: str) -> str: | 
				 | 
				 | 
				    def _from_url(self, url: str) -> str: | 
			
		
		
	
		
			
				 | 
				 | 
				        from langchain.document_loaders import UnstructuredURLLoader | 
				 | 
				 | 
				        from langchain.document_loaders import UnstructuredURLLoader | 
			
		
		
	
		
			
				 | 
				 | 
				
 | 
				 | 
				 | 
				
 | 
			
		
		
	
		
			
				 | 
				 | 
				        loader = UnstructuredURLLoader(urls=[url]) | 
				 | 
				 | 
				        loader = UnstructuredURLLoader(urls=[url]) | 
			
		
		
	
		
			
				 | 
				 | 
				        doc = loader.load()[0] | 
				 | 
				 | 
				 | 
			
		
		
	
		
			
				 | 
				 | 
				        return doc.page_content | 
				 | 
				 | 
				 | 
			
		
		
	
		
			
				 | 
				 | 
				 | 
				 | 
				 | 
				        data = loader.load() | 
			
		
		
	
		
			
				 | 
				 | 
				 | 
				 | 
				 | 
				        if len(data) > 0: | 
			
		
		
	
		
			
				 | 
				 | 
				 | 
				 | 
				 | 
				            doc = data[0] | 
			
		
		
	
		
			
				 | 
				 | 
				 | 
				 | 
				 | 
				            return doc.page_content | 
			
		
		
	
		
			
				 | 
				 | 
				 | 
				 | 
				 | 
				        else: | 
			
		
		
	
		
			
				 | 
				 | 
				 | 
				 | 
				 | 
				            raise RuntimeError(f'Failed to load data from {url}. Invalid output: {data}') | 
			
		
		
	
	
		
			
				| 
					
					
					
				 | 
				
  |