Browse Source
        
      
      Update
      
        Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
      
      
        main
      
      
     
    
    
    
	
		
			
				 2 changed files with 
12 additions and 
11 deletions
			 
			
		 
		
			
				- 
					
					
					 
					README.md
				
- 
					
					
					 
					nlp_longformer.py
				
				
				
					
						
							
								
									
	
		
			
				|  | @ -1,6 +1,6 @@ | 
		
	
		
			
				|  |  | # Operator: nlp-longformer |  |  | # Operator: nlp-longformer | 
		
	
		
			
				|  |  | 
 |  |  | 
 | 
		
	
		
			
				|  |  | Author: |  |  |  | 
		
	
		
			
				|  |  |  |  |  | Author: Kyle He, Jael Gu | 
		
	
		
			
				|  |  | 
 |  |  | 
 | 
		
	
		
			
				|  |  | ## Overview |  |  | ## Overview | 
		
	
		
			
				|  |  | 
 |  |  | 
 | 
		
	
	
		
			
				|  | @ -27,17 +27,18 @@ __call__(self, call_arg_1: xxx) | 
		
	
		
			
				|  |  | 
 |  |  | 
 | 
		
	
		
			
				|  |  | Args: |  |  | Args: | 
		
	
		
			
				|  |  | 
 |  |  | 
 | 
		
	
		
			
				|  |  | - call_arg_1: |  |  |  | 
		
	
		
			
				|  |  |   - xxx(description about call_arg_1) |  |  |  | 
		
	
		
			
				|  |  |   - supported types: xxx |  |  |  | 
		
	
		
			
				|  |  |   Returns: |  |  |  | 
		
	
		
			
				|  |  |  |  |  | - txt: | 
		
	
		
			
				|  |  |  |  |  |   - input text in words, sentences, or paragraphs | 
		
	
		
			
				|  |  |  |  |  |   - supported types: str | 
		
	
		
			
				|  |  | 
 |  |  | 
 | 
		
	
		
			
				|  |  | The Operator returns a tuple Tuple[('results_1', xxx)] containing following fields: |  |  |  | 
		
	
		
			
				|  |  |  |  |  | Returns: | 
		
	
		
			
				|  |  | 
 |  |  | 
 | 
		
	
		
			
				|  |  | - results_1: |  |  |  | 
		
	
		
			
				|  |  |   - xxx(description of results_1) |  |  |  | 
		
	
		
			
				|  |  |   - data type: xxx |  |  |  | 
		
	
		
			
				|  |  |   - shape: (xxx,) |  |  |  | 
		
	
		
			
				|  |  |  |  |  | The Operator returns a tuple Tuple[('feature_vector', numpy.ndarray)] containing following fields: | 
		
	
		
			
				|  |  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |  | - feature_vector: | 
		
	
		
			
				|  |  |  |  |  |   - the embedding of the text | 
		
	
		
			
				|  |  |  |  |  |   - data type: numpy.ndarray | 
		
	
		
			
				|  |  |  |  |  |   - shape: (x, dim) where x is number of vectors and dim is dimension of vector depending on model_name | 
		
	
		
			
				|  |  | 
 |  |  | 
 | 
		
	
		
			
				|  |  | ## Requirements |  |  | ## Requirements | 
		
	
		
			
				|  |  | 
 |  |  | 
 | 
		
	
	
		
			
				|  | 
 | 
		
	
								
							
						
					 
					
				 
			 
		
			
			
			
			
			
			
				
				
					
						
							
								
									
	
		
			
				|  | @ -27,7 +27,7 @@ class NlpLongformer(NNOperator): | 
		
	
		
			
				|  |  |         input_ids = torch.tensor(self.tokenizer.encode(txt)).unsqueeze(0) |  |  |         input_ids = torch.tensor(self.tokenizer.encode(txt)).unsqueeze(0) | 
		
	
		
			
				|  |  |         attention_mask = None |  |  |         attention_mask = None | 
		
	
		
			
				|  |  |         outs = self.model(input_ids, attention_mask=attention_mask, output_hidden_states=True) |  |  |         outs = self.model(input_ids, attention_mask=attention_mask, output_hidden_states=True) | 
		
	
		
			
				|  |  |         feature_vector = outs[1].squeeze() |  |  |  | 
		
	
		
			
				|  |  |  |  |  |         feature_vector = outs[1].squeeze(0) | 
		
	
		
			
				|  |  |         Outputs = NamedTuple('Outputs', [('feature_vector', numpy.ndarray)]) |  |  |         Outputs = NamedTuple('Outputs', [('feature_vector', numpy.ndarray)]) | 
		
	
		
			
				|  |  |         return Outputs(feature_vector.detach().numpy()) |  |  |         return Outputs(feature_vector.detach().numpy()) | 
		
	
		
			
				|  |  | 
 |  |  | 
 | 
		
	
	
		
			
				|  | 
 |