From c1a9544ef72114c62b187ee12139d9ed8bba6672 Mon Sep 17 00:00:00 2001
From: Jael Gu <mengjia.gu@zilliz.com>
Date: Fri, 28 Jul 2023 18:17:59 +0800
Subject: [PATCH] Update prompt template

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
---
 README.md | 55 +++++++++++++------------------------------------------
 llama2.py |  5 ++++-
 2 files changed, 17 insertions(+), 43 deletions(-)
diff --git a/README.md b/README.md
index 46bd385..c5dd9b4 100644
--- a/README.md
+++ b/README.md
@@ -20,61 +20,32 @@ If the automatic installation fails in your environment, please refer to [llama-
 
 Use the default model to continue the conversation from given messages.
 
-*Write a pipeline with explicit inputs/outputs name specifications:*
+*Use operator:*
 
 ```python
-from towhee import pipe, ops
+from towhee import ops
 
-p = (
-    pipe.input('question', 'docs', 'history')
-        .map(('question', 'docs', 'history'), 'prompt', ops.prompt.question_answer())
-        .map('prompt', 'answer', ops.LLM.Llama_2('llama-2-7b-chat'))
-        .output('answer')
-)
+chat = ops.LLM.Llama_2('path/to/model_file.bin', max_tokens=2048, echo=True)
 
-history=[('Who won the world series in 2020?', 'The Los Angeles Dodgers won the World Series in 2020.')]
-question = 'Where was it played?'
-answer = p(question, [], history).get()[0]
+message = [{"question": "Building a website can be done in 10 simple steps:"}]
+answer = chat(message)
 ```
 
-*Write a [retrieval-augmented generation pipeline](https://towhee.io/tasks/detail/pipeline/retrieval-augmented-generation) with explicit inputs/outputs name specifications:*
+*Write a pipeline with explicit inputs/outputs name specifications:*
 
 ```python
 from towhee import pipe, ops
 
-
-temp = '''Use the following pieces of context to answer the question at the end.
-
-{context}
-
-Question: {question}
-'''
-
-system_msg = 'Your name is TowheeChat.'
-
-q1 = 'Who are you?'
-q2 = 'What is Towhee?'
-
 p = (
     pipe.input('question', 'docs', 'history')
-        .map(('question', 'docs', 'history'),
-             'prompt',
-             ops.prompt.template(temp, ['question', 'context'], system_msg))
-        .map('prompt', 'answer',
-             ops.LLM.Llama_2(temperature=0))
+        .map(('question', 'docs', 'history'), 'prompt', ops.prompt.question_answer())
+        .map('prompt', 'answer', ops.LLM.Llama_2('llama-2-7b-chat'))
         .output('answer')
 )
 
-history = []
-docs = []
-ans1 = p(q1, docs, history).get()[0]
-print(q1, ans1)
-
-history.append((q1, ans1))
-docs.append('Towhee is a cutting-edge framework designed to streamline the processing of unstructured data through the use of Large Language Model (LLM) based pipeline orchestration.')
-ans2 = p(q2, docs, history).get()[0]
-
-print(q2, ans2)
+history=[('Who won the world series in 2020?', 'The Los Angeles Dodgers won the World Series in 2020.')]
+question = 'Where was it played?'
+answer = p(question, [], history).get()[0]
 ```
 
 <br />
@@ -132,8 +103,8 @@ A dictionary of supported models with model name as key and huggingface hub id &
             'hf_id': 'TheBloke/Llama-2-7B-GGML',
             'filename': 'llama-2-7b.ggmlv3.q4_0.bin'
             },
-        'llama-2-13-b-chat': {
-            'hf_id': 'TheBloke/Llama-2-13B-GGML',
+        'llama-2-13b-chat': {
+            'hf_id': 'TheBloke/Llama-2-13B-chat-GGML',
             'filename': 'llama-2-13b-chat.ggmlv3.q4_0.bin'
         }
     }
diff --git a/llama2.py b/llama2.py
index 9cc4483..32e8995 100644
--- a/llama2.py
+++ b/llama2.py
@@ -64,7 +64,10 @@ class LlamaCpp(PyOperator):
                     prompt += f'''{v}\n'''
                 else:
                     raise KeyError(f'Invalid key of message: {k}')
-        prompt = '<s> ' + prompt + ' </s>' + f'<s> [INST] {question} [/INST]'
+        if len(prompt) > 0:
+            prompt = '<s> ' + prompt + ' </s>' + f' {question}'
+        else:
+            prompt = question
         return prompt
     
     def parse_outputs(self, response):