logo
Browse Source

Update tokenizer

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
main
Jael Gu 2 years ago
parent
commit
9ec9754464
  1. 2
      codebert.py

2
codebert.py

@ -76,7 +76,7 @@ class CodeBert(NNOperator):
tokens = self.tokenizer.tokenize(txt) tokens = self.tokenizer.tokenize(txt)
tokens = [self.tokenizer.cls_token, '<encoder-only>', self.tokenizer.sep_token] + tokens + \ tokens = [self.tokenizer.cls_token, '<encoder-only>', self.tokenizer.sep_token] + tokens + \
[self.tokenizer.sep_token] [self.tokenizer.sep_token]
tokens_ids = self.tokenizer.convert_tokens_to_ids(tokens, return_tensors='pt')
tokens_ids = self.tokenizer.convert_tokens_to_ids(tokens)
inputs = torch.tensor(tokens_ids).to(self.device) inputs = torch.tensor(tokens_ids).to(self.device)
except Exception as e: except Exception as e:
log.error(f'Invalid input for the tokenizer: {self.model_name}') log.error(f'Invalid input for the tokenizer: {self.model_name}')

Loading…
Cancel
Save