azure-openai
copied
4 changed files with 151 additions and 1 deletions
@ -1,2 +1,86 @@ |
|||
# azure-openai |
|||
# Sentence Embedding with OpenAI |
|||
|
|||
*author: Junjie, Jael* |
|||
|
|||
<br /> |
|||
|
|||
## Description |
|||
|
|||
A sentence embedding operator generates one embedding vector in ndarray for each input text. |
|||
The embedding represents the semantic information of the whole input text as one vector. |
|||
This operator is implemented with embedding models from [OpenAI](https://platform.openai.com/docs/guides/embeddings). |
|||
Please note you need an [OpenAI API key](https://platform.openai.com/account/api-keys) to access OpenAI. |
|||
|
|||
<br /> |
|||
|
|||
## Code Example |
|||
|
|||
Use the pre-trained model '' |
|||
to generate an embedding for the sentence "Hello, world.". |
|||
|
|||
*Write a pipeline with explicit inputs/outputs name specifications:* |
|||
|
|||
```python |
|||
from towhee import pipe, ops, DataCollection |
|||
|
|||
p = ( |
|||
pipe.input('text') |
|||
.map('text', 'vec', |
|||
ops.sentence_embedding.openai(model_name='text-embedding-ada-002', api_key=OPENAI_API_KEY)) |
|||
.output('text', 'vec') |
|||
) |
|||
|
|||
DataCollection(p('Hello, world.')).show() |
|||
``` |
|||
|
|||
<br /> |
|||
|
|||
## Factory Constructor |
|||
|
|||
Create the operator via the following factory method: |
|||
|
|||
***sentence_embedding.openai(model_name='text-embedding-ada-002')*** |
|||
|
|||
**Parameters:** |
|||
|
|||
***model_name***: *str* |
|||
|
|||
The model name in string, defaults to 'text-embedding-ada-002'. Supported model names: |
|||
- text-embedding-ada-002 |
|||
- text-similarity-davinci-001 |
|||
- text-similarity-curie-001 |
|||
- text-similarity-babbage-001 |
|||
- text-similarity-ada-001 |
|||
|
|||
***api_key***: *str=None* |
|||
|
|||
The OpenAI API key in string, defaults to None. |
|||
|
|||
<br /> |
|||
|
|||
## Interface |
|||
|
|||
The operator takes a piece of text in string as input. |
|||
It returns a text emabedding in numpy.ndarray. |
|||
|
|||
***\_\_call\_\_(txt)*** |
|||
|
|||
**Parameters:** |
|||
|
|||
***text***: *str* |
|||
|
|||
The text in string. |
|||
|
|||
**Returns**: |
|||
|
|||
*numpy.ndarray or list* |
|||
|
|||
The text embedding extracted by model. |
|||
|
|||
<br /> |
|||
|
|||
***supported_model_names()*** |
|||
|
|||
Get a list of supported model names. |
|||
|
|||
|
|||
|
@ -0,0 +1,4 @@ |
|||
from .azure_openai_embedding import AzureOpenaiEmbeding |
|||
|
|||
def azure_openai(*args, **kwargs): |
|||
return AzureOpenaiEmbeding(*args, **kwargs) |
@ -0,0 +1,61 @@ |
|||
# Copyright 2021 Zilliz. All rights reserved. |
|||
# |
|||
# Licensed under the Apache License, Version 2.0 (the "License"); |
|||
# you may not use this file except in compliance with the License. |
|||
# You may obtain a copy of the License at |
|||
# |
|||
# http://www.apache.org/licenses/LICENSE-2.0 |
|||
# |
|||
# Unless required by applicable law or agreed to in writing, software |
|||
# distributed under the License is distributed on an "AS IS" BASIS, |
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
# See the License for the specific language governing permissions and |
|||
# limitations under the License. |
|||
|
|||
|
|||
from openai import Embedding |
|||
from tenacity import retry, stop_after_attempt, wait_random_exponential |
|||
from towhee.operator.base import PyOperator |
|||
|
|||
|
|||
class AzureOpenaiEmbeding(PyOperator): |
|||
def __init__(self, |
|||
engine='text-embedding-ada-002', |
|||
api_type: str = 'azure', |
|||
api_version: str = '2023-07-01-preview', |
|||
api_key=None, |
|||
api_base=None): |
|||
self._engine = engine |
|||
self._api_type = api_type |
|||
self._api_version = api_version |
|||
self._api_key = api_key |
|||
self._api_base = api_base |
|||
|
|||
|
|||
|
|||
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6)) |
|||
def _call(self, text): |
|||
text = text.replace("\n", " ") |
|||
return Embedding.create(input=[text], |
|||
engine=self._engine, |
|||
api_key=self._api_key, |
|||
api_type=self._api_type, |
|||
api_version=self._api_version, |
|||
api_base=self._api_base |
|||
)["data"][0]["embedding"] |
|||
|
|||
def __call__(self, text): |
|||
return self._call(text) |
|||
|
|||
@staticmethod |
|||
def supported_model_names(): |
|||
model_list = [ |
|||
'text-embedding-ada-002', |
|||
'text-similarity-davinci-001', |
|||
'text-similarity-curie-001', |
|||
'text-similarity-babbage-001', |
|||
'text-similarity-ada-001' |
|||
] |
|||
model_list.sort() |
|||
return model_list |
|||
|
@ -0,0 +1 @@ |
|||
openai |
Loading…
Reference in new issue