Add files

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
3 years ago · 962af00c4b
4 changed files with 151 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -1,2 +1,83 @@
 # Dolly
 # OpenAI Chat Completion
 *author: Jael*
 <br />
 ## Description
 A LLM operator generates answer given prompt in messages using a large language model or service.
 This operator uses a pretrained [Dolly](https://github.com/databrickslabs/dolly) to generate response.
 It will download model from [HuggingFace Models](https://huggingface.co/models).
 <br />
 ## Code Example
 Use the default model to continue the conversation from given messages.
 *Write a pipeline with explicit inputs/outputs name specifications:*
 ```python
 from towhee import pipe, ops
 p = (
    pipe.input('messages')
        .map('messages', 'answer', ops.LLM.Dolly())
        .output('messages', 'answer')
 )
 messages=[
        {'question': 'Who won the world series in 2020?', 'answer': 'The Los Angeles Dodgers won the World Series in 2020.'},
        {'question': 'Where was it played?'}
    ]
 answer = p(messages)
 ```
 <br />
 ## Factory Constructor
 Create the operator via the following factory method:
 ***LLM.Dolly(model_name: str)***
 **Parameters:**
 ***model_name***: *str*
 The model name in string, defaults to 'databricks/dolly-v2-12b'. Supported model names:
 - databricks/dolly-v2-12b
 - databricks/dolly-v2-7b
 - databricks/dolly-v2-3b
 - databricks/dolly-v1-6b
 ***\*\*kwargs***
 Other Dolly model parameters such as device_map.
 <br />
 ## Interface
 The operator takes a piece of text in string as input.
 It returns answer in json.
 ***\_\_call\_\_(txt)***
 **Parameters:**
 ***messages***: *list*
 	A list of messages to set up chat.
 Must be a list of dictionaries with key value from "system", "question", "answer". For example, [{"question": "a past question?", "answer": "a past answer."}, {"question": "current question?"}]
 **Returns**:
 *answer: str*
 	The answer generated.
 <br />
--- a/init.py
+++ b/init.py
@ -0,0 +1,5 @@
 from .hf_dolly import HuggingfaceDolly
 def Dolly(*args, **kwargs):
    return HuggingfaceDolly(*args, **kwargs)
--- a/hf_dolly.py
+++ b/hf_dolly.py
@ -0,0 +1,62 @@
 # Copyright 2021 Zilliz. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from typing import List
 import torch
 from transformers import pipeline
 from towhee.operator.base import PyOperator
 class HuggingfaceDolly(PyOperator):
    '''Wrapper of OpenAI Chat API'''
    def __init__(self,
                 model_name: str = 'databricks/dolly-v2-12b',
                 **kwargs
                 ):
        torch_dtype = kwargs.get('torch_dtype', torch.bfloat16)
        trust_remote_code = kwargs.get('trust_remote_code', True)
        device_map = kwargs.get('device_map', 'auto')
        self.pipeline = pipeline(model=model_name, torch_dtype=torch_dtype, trust_remote_code=trust_remote_code, device_map=device_map)
    def __call__(self, messages: List[dict]):
        prompt = self.parse_inputs(messages)
        ans = self.pipeline(prompt)
        return ans
    def parse_inputs(self, messages: List[dict]):
        assert isinstance(messages, list), \
            'Inputs must be a list of dictionaries with keys from ["system", "question", "answer"].'
        prompt = messages[-1]['question']
        history = ''
        for m in messages[:-1]:
            for k, v in m.items():
                line = k + ': ' + v + '\n'
                history += line
        return prompt + '\n' + history
    @staticmethod
    def supported_model_names():
        model_list = [
            'databricks/dolly-v2-12b',
            'databricks/dolly-v2-7b',
            'databricks/dolly-v2-3b',
            'databricks/dolly-v1-6b'
        ]
        model_list.sort()
        return model_list
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
 transformers[torch]>=4.28.1,<5
 torch>=1.13.1,<2
	`@ -0,0 +1,2 @@`
					`transformers[torch]>=4.28.1,<5`
					`torch>=1.13.1,<2`