Browse Source
        
      
      Debug failed loads
      
        Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
      
      
        main
      
      
     
    
    
    
	
		
			
				 1 changed files with 
12 additions and 
4 deletions
			 
			
		 
		
			
				- 
					
					
					 
					loader.py
				
 
			
		
		
			
			
			
			
			
			
				
				
					
						
							
								
									
	
		
			
				
					| 
						
						
							
								
							
						
						
					 | 
				
				 | 
				
					@ -23,12 +23,20 @@ class TextLoader(PyOperator): | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            file_path = file | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        from langchain.document_loaders import UnstructuredFileLoader | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        loader = UnstructuredFileLoader(file_path, mode='single', strategy='fast') | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        doc = loader.load()[0] | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        return doc.page_content | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        data = loader.load() | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        if len(data) > 0: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            doc = data[0] | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            return doc.page_content | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        else: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            raise RuntimeError(f'Failed to load data from {file}. Invalid output: {data}') | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					     | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    def _from_url(self, url: str) -> str: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        from langchain.document_loaders import UnstructuredURLLoader | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        loader = UnstructuredURLLoader(urls=[url]) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        doc = loader.load()[0] | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        return doc.page_content | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        data = loader.load() | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        if len(data) > 0: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            doc = data[0] | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            return doc.page_content | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        else: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            raise RuntimeError(f'Failed to load data from {url}. Invalid output: {data}') | 
				
			
			
		
	
	
		
			
				
					| 
						
						
						
					 | 
				
				 | 
				
					
  |