diff --git a/auto_transformers.py b/auto_transformers.py index e3fb185..12ff5b9 100644 --- a/auto_transformers.py +++ b/auto_transformers.py @@ -96,6 +96,8 @@ class AutoTransformers(NNOperator): raise e else: self.tokenizer = tokenizer + if not self.tokenizer.pad_token: + self.tokenizer.pad_token = '[PAD]' else: log.warning('The operator is initialized without specified model.') pass @@ -103,8 +105,9 @@ class AutoTransformers(NNOperator): def __call__(self, txt: str, return_sentence_emb: bool = False) -> numpy.ndarray: try: inputs = self.tokenizer(txt, padding=True, truncation=True, return_tensors="pt").to(self.device) - except Exception: - inputs = self.tokenizer(txt, truncation=True, return_tensors='pt').to(self.device) + except Exception as e: + log.error(f'Fail to tokenize inputs: {e}') + raise e try: outs = self.model(**inputs) except Exception as e: @@ -143,10 +146,7 @@ class AutoTransformers(NNOperator): raise AttributeError('Unsupported model_type.') dummy_input = '[CLS]' - try: - inputs = self.tokenizer(dummy_input, padding=True, truncation=True, return_tensors='pt') # a dictionary - except Exception: - inputs = self.tokenizer(dummy_input, truncation=True, return_tensors='pt') + inputs = self.tokenizer(dummy_input, padding=True, truncation=True, return_tensors='pt') # a dictionary if model_type == 'pytorch': torch.save(self._model, output_file) elif model_type == 'torchscript':