From e2d3c74824808cff1f25f7f180cb32277c071cef Mon Sep 17 00:00:00 2001 From: Jael Gu Date: Mon, 6 Feb 2023 18:14:21 +0800 Subject: [PATCH] Update model list Signed-off-by: Jael Gu --- s_bert.py | 54 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/s_bert.py b/s_bert.py index 57cb16e..429144a 100644 --- a/s_bert.py +++ b/s_bert.py @@ -193,15 +193,49 @@ class STransformers(NNOperator): @staticmethod def supported_model_names(format: str = None): - import requests - req = requests.get("https://www.sbert.net/_static/html/models_en_sentence_embeddings.html") - data = req.text - full_list = [] - for line in data.split('\r\n'): - line = line.replace(' ', '') - if line.startswith('"name":'): - name = line.split(':')[-1].replace('"', '').replace(',', '') - full_list.append(name) + full_list = [ + 'bert-base-nli-mean-tokens', + 'msmarco-distilbert-base-v4', + 'sentence-t5-xxl', + 'sentence-t5-xl', + 'sentence-t5-large', + 'all-distilroberta-v1', + 'gtr-t5-xxl', + 'gtr-t5-large', + 'gtr-t5-xl', + 'all-MiniLM-L12-v1', + 'all-MiniLM-L12-v2', + 'all-MiniLM-L6-v1', + 'all-MiniLM-L6-v2', + 'all-mpnet-base-v1', + 'all-mpnet-base-v2', + 'all-roberta-large-v1', + 'bert-base-nli-mean-tokens', + 'gtr-t5-base', + 'distiluse-base-multilingual-cased-v1', + 'distiluse-base-multilingual-cased-v2', + 'msmarco-bert-base-dot-v5', + 'msmarco-distilbert-base-tas-b', + 'msmarco-distilbert-base-v4', + 'msmarco-distilbert-dot-v5', + 'multi-qa-distilbert-cos-v1', + 'multi-qa-distilbert-dot-v1', + 'multi-qa-MiniLM-L6-cos-v1', + 'multi-qa-MiniLM-L6-dot-v1', + 'multi-qa-mpnet-base-cos-v1', + 'multi-qa-mpnet-base-dot-v1', + 'paraphrase-albert-small-v2', + 'paraphrase-distilroberta-base-v2', + 'average_word_embeddings_komninos', + 'paraphrase-MiniLM-L12-v2', + 'paraphrase-MiniLM-L3-v2', + 'average_word_embeddings_glove.6B.300d', + 'paraphrase-MiniLM-L6-v2', + 'paraphrase-mpnet-base-v2', + 'paraphrase-multilingual-MiniLM-L12-v2', + 'paraphrase-multilingual-mpnet-base-v2', + 'paraphrase-TinyBERT-L6-v2' + ] full_list.sort() if format is None: model_list = full_list @@ -210,7 +244,7 @@ class STransformers(NNOperator): assert set(to_remove).issubset(set(full_list)) model_list = list(set(full_list) - set(to_remove)) elif format == 'onnx': - to_remove = [] + to_remove = ['gtr-t5-xxl', 'sentence-t5-xxl'] assert set(to_remove).issubset(set(full_list)) model_list = list(set(full_list) - set(to_remove)) else: