azure-openai
copied
4 changed files with 151 additions and 1 deletions
@ -1,2 +1,86 @@ |
|||||
# azure-openai |
|
||||
|
# Sentence Embedding with OpenAI |
||||
|
|
||||
|
*author: Junjie, Jael* |
||||
|
|
||||
|
<br /> |
||||
|
|
||||
|
## Description |
||||
|
|
||||
|
A sentence embedding operator generates one embedding vector in ndarray for each input text. |
||||
|
The embedding represents the semantic information of the whole input text as one vector. |
||||
|
This operator is implemented with embedding models from [OpenAI](https://platform.openai.com/docs/guides/embeddings). |
||||
|
Please note you need an [OpenAI API key](https://platform.openai.com/account/api-keys) to access OpenAI. |
||||
|
|
||||
|
<br /> |
||||
|
|
||||
|
## Code Example |
||||
|
|
||||
|
Use the pre-trained model '' |
||||
|
to generate an embedding for the sentence "Hello, world.". |
||||
|
|
||||
|
*Write a pipeline with explicit inputs/outputs name specifications:* |
||||
|
|
||||
|
```python |
||||
|
from towhee import pipe, ops, DataCollection |
||||
|
|
||||
|
p = ( |
||||
|
pipe.input('text') |
||||
|
.map('text', 'vec', |
||||
|
ops.sentence_embedding.openai(model_name='text-embedding-ada-002', api_key=OPENAI_API_KEY)) |
||||
|
.output('text', 'vec') |
||||
|
) |
||||
|
|
||||
|
DataCollection(p('Hello, world.')).show() |
||||
|
``` |
||||
|
|
||||
|
<br /> |
||||
|
|
||||
|
## Factory Constructor |
||||
|
|
||||
|
Create the operator via the following factory method: |
||||
|
|
||||
|
***sentence_embedding.openai(model_name='text-embedding-ada-002')*** |
||||
|
|
||||
|
**Parameters:** |
||||
|
|
||||
|
***model_name***: *str* |
||||
|
|
||||
|
The model name in string, defaults to 'text-embedding-ada-002'. Supported model names: |
||||
|
- text-embedding-ada-002 |
||||
|
- text-similarity-davinci-001 |
||||
|
- text-similarity-curie-001 |
||||
|
- text-similarity-babbage-001 |
||||
|
- text-similarity-ada-001 |
||||
|
|
||||
|
***api_key***: *str=None* |
||||
|
|
||||
|
The OpenAI API key in string, defaults to None. |
||||
|
|
||||
|
<br /> |
||||
|
|
||||
|
## Interface |
||||
|
|
||||
|
The operator takes a piece of text in string as input. |
||||
|
It returns a text emabedding in numpy.ndarray. |
||||
|
|
||||
|
***\_\_call\_\_(txt)*** |
||||
|
|
||||
|
**Parameters:** |
||||
|
|
||||
|
***text***: *str* |
||||
|
|
||||
|
The text in string. |
||||
|
|
||||
|
**Returns**: |
||||
|
|
||||
|
*numpy.ndarray or list* |
||||
|
|
||||
|
The text embedding extracted by model. |
||||
|
|
||||
|
<br /> |
||||
|
|
||||
|
***supported_model_names()*** |
||||
|
|
||||
|
Get a list of supported model names. |
||||
|
|
||||
|
|
||||
|
@ -0,0 +1,4 @@ |
|||||
|
from .azure_openai_embedding import AzureOpenaiEmbeding |
||||
|
|
||||
|
def azure_openai(*args, **kwargs): |
||||
|
return AzureOpenaiEmbeding(*args, **kwargs) |
@ -0,0 +1,61 @@ |
|||||
|
# Copyright 2021 Zilliz. All rights reserved. |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
|
||||
|
from openai import Embedding |
||||
|
from tenacity import retry, stop_after_attempt, wait_random_exponential |
||||
|
from towhee.operator.base import PyOperator |
||||
|
|
||||
|
|
||||
|
class AzureOpenaiEmbeding(PyOperator): |
||||
|
def __init__(self, |
||||
|
engine='text-embedding-ada-002', |
||||
|
api_type: str = 'azure', |
||||
|
api_version: str = '2023-07-01-preview', |
||||
|
api_key=None, |
||||
|
api_base=None): |
||||
|
self._engine = engine |
||||
|
self._api_type = api_type |
||||
|
self._api_version = api_version |
||||
|
self._api_key = api_key |
||||
|
self._api_base = api_base |
||||
|
|
||||
|
|
||||
|
|
||||
|
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6)) |
||||
|
def _call(self, text): |
||||
|
text = text.replace("\n", " ") |
||||
|
return Embedding.create(input=[text], |
||||
|
engine=self._engine, |
||||
|
api_key=self._api_key, |
||||
|
api_type=self._api_type, |
||||
|
api_version=self._api_version, |
||||
|
api_base=self._api_base |
||||
|
)["data"][0]["embedding"] |
||||
|
|
||||
|
def __call__(self, text): |
||||
|
return self._call(text) |
||||
|
|
||||
|
@staticmethod |
||||
|
def supported_model_names(): |
||||
|
model_list = [ |
||||
|
'text-embedding-ada-002', |
||||
|
'text-similarity-davinci-001', |
||||
|
'text-similarity-curie-001', |
||||
|
'text-similarity-babbage-001', |
||||
|
'text-similarity-ada-001' |
||||
|
] |
||||
|
model_list.sort() |
||||
|
return model_list |
||||
|
|
@ -0,0 +1 @@ |
|||||
|
openai |
Loading…
Reference in new issue