logo
Browse Source

Add operator

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
main
Jael Gu 3 years ago
parent
commit
638a038911
  1. 51
      README.md
  2. 0
      __init__.py
  3. 35
      nlp_longformer.py

51
README.md

@ -1,3 +1,50 @@
# nlp-longformer
# Operator: nlp-longformer
2
Author:
## Overview
## Interface
```python
__init__(self, model_name: str, framework: str = 'pytorch')
```
Args:
- model_name:
- the model name for embedding
- supported types: str, for example 'xxx' or 'xxx'
- framework:
- the framework of the model
- supported types: str, default is 'pytorch'
```python
__call__(self, call_arg_1: xxx)
```
Args:
- call_arg_1:
- xxx(description about call_arg_1)
- supported types: xxx
Returns:
The Operator returns a tuple Tuple[('results_1', xxx)] containing following fields:
- results_1:
- xxx(description of results_1)
- data type: xxx
- shape: (xxx,)
## Requirements
## How it works
## Reference

0
__init__.py

35
nlp_longformer.py

@ -0,0 +1,35 @@
import numpy
from typing import NamedTuple
import torch
from transformers import LongformerTokenizer, LongformerModel
from towhee.operator import NNOperator
import warnings
warnings.filterwarnings('ignore')
class NlpLongformer(NNOperator):
"""
NLP embedding operator that uses the pretrained longformer model gathered by huggingface.
The Longformer model was presented in Longformer: The Long-Document Transformer by Iz Beltagy,
Matthew E. Peters, Arman Cohan.
Ref: https://huggingface.co/docs/transformers/v4.16.2/en/model_doc/longformer#transformers.LongformerConfig
Args:
model_name (`str`):
Which model to use for the embeddings.
"""
def __init__(self, model_name: str) -> None:
self.model = LongformerModel.from_pretrained(model_name)
self.tokenizer = LongformerTokenizer.from_pretrained(model_name)
def __call__(self, txt: str) -> NamedTuple('Outputs', [('feature_vector', numpy.ndarray)]):
input_ids = torch.tensor(self.tokenizer.encode(txt)).unsqueeze(0)
attention_mask = None
outs = self.model(input_ids, attention_mask=attention_mask, output_hidden_states=True)
feature_vector = outs[1].squeeze()
Outputs = NamedTuple('Outputs', [('feature_vector', numpy.ndarray)])
return Outputs(feature_vector.detach().numpy())
def get_model(self):
return self.model
Loading…
Cancel
Save