Browse Source
        
      
      Fix for model without pad
      
        Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
      
      
        main
      
      
     
    
    
    
	
		
			
				 1 changed files with 
6 additions and 
4 deletions
			 
			
		 
		
			
				- 
					
					
					 
					auto_transformers.py
				
				
				
					
						
							
								
									
	
		
			
				
					|  |  | @ -103,9 +103,8 @@ class AutoTransformers(NNOperator): | 
			
		
	
		
			
				
					|  |  |  |     def __call__(self, txt: str, return_sentence_emb: bool = False) -> numpy.ndarray: | 
			
		
	
		
			
				
					|  |  |  |         try: | 
			
		
	
		
			
				
					|  |  |  |             inputs = self.tokenizer(txt, padding=True, truncation=True, return_tensors="pt").to(self.device) | 
			
		
	
		
			
				
					|  |  |  |         except Exception as e: | 
			
		
	
		
			
				
					|  |  |  |             log.error(f'Invalid input for the tokenizer: {self.model_name}') | 
			
		
	
		
			
				
					|  |  |  |             raise e | 
			
		
	
		
			
				
					|  |  |  |         except Exception: | 
			
		
	
		
			
				
					|  |  |  |             inputs = self.tokenizer(dummy_input, truncation=True, return_tensors='pt').to(self.device) | 
			
		
	
		
			
				
					|  |  |  |         try: | 
			
		
	
		
			
				
					|  |  |  |             outs = self.model(**inputs) | 
			
		
	
		
			
				
					|  |  |  |         except Exception as e: | 
			
		
	
	
		
			
				
					|  |  | @ -144,7 +143,10 @@ class AutoTransformers(NNOperator): | 
			
		
	
		
			
				
					|  |  |  |                 raise AttributeError('Unsupported model_type.') | 
			
		
	
		
			
				
					|  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |         dummy_input = '[CLS]' | 
			
		
	
		
			
				
					|  |  |  |         inputs = self.tokenizer(dummy_input, padding=True, truncation=True, return_tensors='pt')  # a dictionary | 
			
		
	
		
			
				
					|  |  |  |         try: | 
			
		
	
		
			
				
					|  |  |  |             inputs = self.tokenizer(dummy_input, padding=True, truncation=True, return_tensors='pt')  # a dictionary | 
			
		
	
		
			
				
					|  |  |  |         except Exception: | 
			
		
	
		
			
				
					|  |  |  |             inputs = self.tokenizer(dummy_input, truncation=True, return_tensors='pt') | 
			
		
	
		
			
				
					|  |  |  |         if model_type == 'pytorch': | 
			
		
	
		
			
				
					|  |  |  |             torch.save(self._model, output_file) | 
			
		
	
		
			
				
					|  |  |  |         elif model_type == 'torchscript': | 
			
		
	
	
		
			
				
					|  |  | 
 |