Add files

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
3 years ago · 3fc3c4c434
4 changed files with 236 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -1,2 +1,140 @@
 # Llama-2
 # Llama-2 Chat
 *author: Jael*
 <br />
 ## Description
 A LLM operator generates answer given prompt in messages using a large language model or service.
 This operator uses a pretrained [Llama-2](https://ai.meta.com/llama) to generate response.
 By default, it will download the model file from [HuggingFace](https://huggingface.co/TheBloke) 
 and then run the model with [Llama-cpp](https://github.com/ggerganov/llama.cpp).
 This operator will automatically install and run model with llama-cpp.
 If the automatic installation fails in your environment, please refer to [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) for instructions of manual installation.
 <br />
 ## Code Example
 Use the default model to continue the conversation from given messages.
 *Write a pipeline with explicit inputs/outputs name specifications:*
 ```python
 from towhee import pipe, ops
 p = (
    pipe.input('question', 'docs', 'history')
        .map(('question', 'docs', 'history'), 'prompt', ops.prompt.question_answer())
        .map('prompt', 'answer', ops.LLM.Llama_2('llama-2-7b-chat'))
        .output('answer')
 )
 history=[('Who won the world series in 2020?', 'The Los Angeles Dodgers won the World Series in 2020.')]
 question = 'Where was it played?'
 answer = p(question, [], history).get()[0]
 ```
 *Write a [retrieval-augmented generation pipeline](https://towhee.io/tasks/detail/pipeline/retrieval-augmented-generation) with explicit inputs/outputs name specifications:*
 ```python
 from towhee import pipe, ops
 temp = '''Use the following pieces of context to answer the question at the end.
 {context}
 Question: {question}
 '''
 system_msg = 'Your name is TowheeChat.'
 q1 = 'Who are you?'
 q2 = 'What is Towhee?'
 p = (
    pipe.input('question', 'docs', 'history')
        .map(('question', 'docs', 'history'),
             'prompt',
             ops.prompt.template(temp, ['question', 'context'], sys_message))
        .map('prompt', 'answer',
             ops.LLM.Llama_2(max_tokens=200))
        .output('answer')
 )
 history = []
 docs = []
 ans1 = p(q1, docs, history).get()[0]
 print(q1, ans1)
 history.append((q1, ans1))
 docs.append('Towhee is a cutting-edge framework designed to streamline the processing of unstructured data through the use of Large Language Model (LLM) based pipeline orchestration.')
 ans2 = p(q2, docs, history)
 print(q2, ans2)
 ```
 <br />
 ## Factory Constructor
 Create the operator via the following factory method:
 ***LLM.Llama_2(model_name_or_file: str)***
 **Parameters:**
 ***model_name_or_file***: *str*
 The model name or path to the model file in string, defaults to 'llama-2-7b-chat'.
 If model name is in `supported_model_names`, it will download corresponding model file from HuggingFace models.
 You can also use the local path of a model file, which can be ran by llama-cpp-python.
 ***\*\*kwargs***
 Other model parameters such as temperature, max_tokens.
 <br />
 ## Interface
 The operator takes a piece of text in string as input.
 It returns answer in json.
 ***\_\_call\_\_(txt)***
 **Parameters:**
 ***messages***: *list*
 	A list of messages to set up chat.
 Must be a list of dictionaries with key value from "system", "question", "answer". For example, [{"question": "a past question?", "answer": "a past answer."}, {"question": "current question?"}]
 **Returns**:
 *answer: str*
 	The answer generated.
 <br />
 ***supported_model_names()***
 **Returns**:
 A dictionary of supported models with model name as key and huggingface hub id & model filename as value.
    {
        'llama-2-7b-chat': {
            'hf_id': 'TheBloke/Llama-2-7B-GGML',
            'filename': 'llama-2-7b.ggmlv3.q4_0.bin'
            },
        'llama-2-13-b-chat': {
            'hf_id': 'TheBloke/Llama-2-13B-GGML',
            'filename': 'llama-2-13b-chat.ggmlv3.q4_0.bin'
        }
    }
--- a/init.py
+++ b/init.py
@ -0,0 +1,5 @@
 from .llama2 import LlamaCpp
 def llama_2(*args, **kwargs):
    return LlamaCpp(*args, **kwargs)
--- a/llama2.py
+++ b/llama2.py
@ -0,0 +1,90 @@
 # Copyright 2021 Zilliz. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 from typing import List
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 from towhee.operator.base import PyOperator, SharedType
 class LlamaCpp(PyOperator):
    '''Wrapper of Dolly inference'''
    def __init__(self,
                 model_name_or_file: str = 'llama-2-7b-chat',
                 **kwargs
                 ):
        self.kwargs = kwargs
        supported_model_names = self.supported_model_names()
        if model_name_or_file in supported_model_names:
            model_info = supported_model_names[model_name_or_file]
            hf_id = model_info['hf_id']
            model_filename = model_info['filename']
            self.model_path = hf_hub_download(repo_id=hf_id, filename=model_filename)
        else:
            self.model_path = model_name_or_file
        assert os.path.isfile(self.model_path), f'Invalid model path: {self.model_path}'
        print(111, self.model_path)
        self.model = Llama(model_path=self.model_path)
    def __call__(self, messages: List[dict]):
        prompt = self.parse_inputs(messages)
        resp = self.model(prompt, **self.kwargs)
        answer = self.parse_outputs(resp)
        return answer
    def parse_inputs(self, messages: List[dict]):
        assert isinstance(messages, list), \
            'Inputs must be a list of dictionaries with keys from ["system", "question", "answer"].'
        prompt = ''
        question = messages.pop[-1]
        assert len(question) == 1 and 'question' in question.keys()
        question = question['question']
        for m in messages:
            for k, v in m.items():
                if k == 'system':
                    prompt += f'''[INST] <<SYS>> {v} <</SYS>> [/INST]\n'''
                elif k == 'question':
                    prompt += f'''[INST] {v} [/INST]\n'''
                elif k == 'answer':
                    prompt += f'''{v}\n'''
                else:
                    raise KeyError(f'Invalid key of message: {k}')
        prompt = '<s> ' + prompt + ' </s>' + f'<s> [INST] {question} [/INST]'
        return prompt
    def parse_outputs(self, response):
        return response['choices'][0]['text']
    @staticmethod
    def supported_model_names():
        models = {
            'llama-2-7b-chat': {
                'hf_id': 'TheBloke/Llama-2-7B-GGML',
                'filename': 'llama-2-7b.ggmlv3.q4_0.bin'
                },
            'llama-2-13-b-chat': {
                'hf_id': 'TheBloke/Llama-2-13B-GGML',
                'filename': 'llama-2-13b-chat.ggmlv3.q4_0.bin'
            }
        }
        return models
    @property
    def shared_type(self):
        return SharedType.Shareable
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
 llama-cpp-python
 huggingface-hub