Browse Source
Update tokenizer
Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
main
1 changed files with
1 additions and
1 deletions
-
codebert.py
|
@ -76,7 +76,7 @@ class CodeBert(NNOperator): |
|
|
tokens = self.tokenizer.tokenize(txt) |
|
|
tokens = self.tokenizer.tokenize(txt) |
|
|
tokens = [self.tokenizer.cls_token, '<encoder-only>', self.tokenizer.sep_token] + tokens + \ |
|
|
tokens = [self.tokenizer.cls_token, '<encoder-only>', self.tokenizer.sep_token] + tokens + \ |
|
|
[self.tokenizer.sep_token] |
|
|
[self.tokenizer.sep_token] |
|
|
tokens_ids = self.tokenizer.convert_tokens_to_ids(tokens, return_tensors='pt') |
|
|
|
|
|
|
|
|
tokens_ids = self.tokenizer.convert_tokens_to_ids(tokens) |
|
|
inputs = torch.tensor(tokens_ids).to(self.device) |
|
|
inputs = torch.tensor(tokens_ids).to(self.device) |
|
|
except Exception as e: |
|
|
except Exception as e: |
|
|
log.error(f'Invalid input for the tokenizer: {self.model_name}') |
|
|
log.error(f'Invalid input for the tokenizer: {self.model_name}') |
|
|