From 9ec97544640c7ddd9063533a84eb9eef6a9b7888 Mon Sep 17 00:00:00 2001 From: Jael Gu Date: Tue, 8 Nov 2022 16:52:18 +0800 Subject: [PATCH] Update tokenizer Signed-off-by: Jael Gu --- codebert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebert.py b/codebert.py index 386bc00..274e1a2 100644 --- a/codebert.py +++ b/codebert.py @@ -76,7 +76,7 @@ class CodeBert(NNOperator): tokens = self.tokenizer.tokenize(txt) tokens = [self.tokenizer.cls_token, '', self.tokenizer.sep_token] + tokens + \ [self.tokenizer.sep_token] - tokens_ids = self.tokenizer.convert_tokens_to_ids(tokens, return_tensors='pt') + tokens_ids = self.tokenizer.convert_tokens_to_ids(tokens) inputs = torch.tensor(tokens_ids).to(self.device) except Exception as e: log.error(f'Invalid input for the tokenizer: {self.model_name}')