Browse Source
        
      
      Support more suffix of docs
      
        Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
      
      
        main
      
      
     
    
    
    
	
		
			
				 1 changed files with 
11 additions and 
2 deletions
			 
			
		 
		
			
				- 
					
					
					 
					osschat_insert.py
				
				
				
					
						
							
								
									
	
		
			
				
					|  |  | @ -67,7 +67,16 @@ def _get_embedding_op(config): | 
			
		
	
		
			
				
					|  |  |  |         return False, ops.sentence_embedding.openai(model_name=config.embedding_model, api_key=config.openai_api_key) | 
			
		
	
		
			
				
					|  |  |  |     raise RuntimeError('Unknown model: [%s], only support: %s' % (config.embedding_model, _hf_models + _sbert_models + _openai_models)) | 
			
		
	
		
			
				
					|  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  | def data_loader(path): | 
			
		
	
		
			
				
					|  |  |  |     if path.endswith('pdf'): | 
			
		
	
		
			
				
					|  |  |  |         op = ops.data_loader.pdf_loader() | 
			
		
	
		
			
				
					|  |  |  |     elif path.endswith(('xls', 'xslx')): | 
			
		
	
		
			
				
					|  |  |  |         op = ops.data_loader.excel_loader() | 
			
		
	
		
			
				
					|  |  |  |     elif path.endswith('ppt'): | 
			
		
	
		
			
				
					|  |  |  |         op = ops.data_loader.powerpoint_loader() | 
			
		
	
		
			
				
					|  |  |  |     else: | 
			
		
	
		
			
				
					|  |  |  |         op = ops.text_loader() | 
			
		
	
		
			
				
					|  |  |  |     return op(path) | 
			
		
	
		
			
				
					|  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  | @AutoPipes.register | 
			
		
	
		
			
				
					|  |  |  | def osschat_insert_pipe(config): | 
			
		
	
	
		
			
				
					|  |  | @ -91,7 +100,7 @@ def osschat_insert_pipe(config): | 
			
		
	
		
			
				
					|  |  |  |      | 
			
		
	
		
			
				
					|  |  |  |     p = ( | 
			
		
	
		
			
				
					|  |  |  |         pipe.input('doc', 'project_name') | 
			
		
	
		
			
				
					|  |  |  |             .map('doc', 'text', ops.text_loader()) | 
			
		
	
		
			
				
					|  |  |  |             .map('doc', 'text', data_loader) | 
			
		
	
		
			
				
					|  |  |  |             .flat_map('text', 'sentence', text_split_op) | 
			
		
	
		
			
				
					|  |  |  |             .map('sentence', 'embedding', sentence_embedding_op, config=sentence_embedding_config) | 
			
		
	
		
			
				
					|  |  |  |     ) | 
			
		
	
	
		
			
				
					|  |  | 
 |