|
|
@ -96,6 +96,8 @@ class AutoTransformers(NNOperator): |
|
|
|
raise e |
|
|
|
else: |
|
|
|
self.tokenizer = tokenizer |
|
|
|
if not self.tokenizer.pad_token: |
|
|
|
self.tokenizer.pad_token = '[PAD]' |
|
|
|
else: |
|
|
|
log.warning('The operator is initialized without specified model.') |
|
|
|
pass |
|
|
@ -103,8 +105,9 @@ class AutoTransformers(NNOperator): |
|
|
|
def __call__(self, txt: str, return_sentence_emb: bool = False) -> numpy.ndarray: |
|
|
|
try: |
|
|
|
inputs = self.tokenizer(txt, padding=True, truncation=True, return_tensors="pt").to(self.device) |
|
|
|
except Exception: |
|
|
|
inputs = self.tokenizer(txt, truncation=True, return_tensors='pt').to(self.device) |
|
|
|
except Exception as e: |
|
|
|
log.error(f'Fail to tokenize inputs: {e}') |
|
|
|
raise e |
|
|
|
try: |
|
|
|
outs = self.model(**inputs) |
|
|
|
except Exception as e: |
|
|
@ -143,10 +146,7 @@ class AutoTransformers(NNOperator): |
|
|
|
raise AttributeError('Unsupported model_type.') |
|
|
|
|
|
|
|
dummy_input = '[CLS]' |
|
|
|
try: |
|
|
|
inputs = self.tokenizer(dummy_input, padding=True, truncation=True, return_tensors='pt') # a dictionary |
|
|
|
except Exception: |
|
|
|
inputs = self.tokenizer(dummy_input, truncation=True, return_tensors='pt') |
|
|
|
if model_type == 'pytorch': |
|
|
|
torch.save(self._model, output_file) |
|
|
|
elif model_type == 'torchscript': |
|
|
|