From a49eedf253e59692ff15630e9b2ec0a45e372a59 Mon Sep 17 00:00:00 2001 From: Jael Gu Date: Fri, 13 Jan 2023 15:08:49 +0800 Subject: [PATCH] Add files Signed-off-by: Jael Gu --- __init__.py | 4 ++++ openai_embedding.py | 48 +++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + 3 files changed, 53 insertions(+) create mode 100644 __init__.py create mode 100644 openai_embedding.py create mode 100644 requirements.txt diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..34bfa43 --- /dev/null +++ b/__init__.py @@ -0,0 +1,4 @@ +from .openai_embedding import OpenaiEmbeding + +def openai(*args, **kwargs): + return OpenaiEmbeding(*args, **kwargs) diff --git a/openai_embedding.py b/openai_embedding.py new file mode 100644 index 0000000..94f05fb --- /dev/null +++ b/openai_embedding.py @@ -0,0 +1,48 @@ +# Copyright 2021 Zilliz. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from openai import Embedding +from tenacity import retry, stop_after_attempt, wait_random_exponential +from towhee.operator.base import PyOperator + + +class OpenaiEmbeding(PyOperator): + def __init__(self, model_name='text-embedding-ada-002', api_key=None): + self._engine = model_name + self._api_key = api_key + + @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6)) + def _call(self, text): + text = text.replace("\n", " ") + return Embedding.create(input=[text], + engine=self._engine, + api_key=self._api_key, + )["data"][0]["embedding"] + + def __call__(self, text): + return self._call(text) + + @staticmethod + def supported_model_names(): + model_list = [ + 'text-embedding-ada-002', + 'text-similarity-davinci-001', + 'text-similarity-curie-001', + 'text-similarity-babbage-001', + 'text-similarity-ada-001' + ] + model_list.sort() + return model_list + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ec838c5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +openai