diff --git a/README.md b/README.md
index 46bd385..c5dd9b4 100644
--- a/README.md
+++ b/README.md
@@ -20,61 +20,32 @@ If the automatic installation fails in your environment, please refer to [llama-
Use the default model to continue the conversation from given messages.
-*Write a pipeline with explicit inputs/outputs name specifications:*
+*Use operator:*
```python
-from towhee import pipe, ops
+from towhee import ops
-p = (
- pipe.input('question', 'docs', 'history')
- .map(('question', 'docs', 'history'), 'prompt', ops.prompt.question_answer())
- .map('prompt', 'answer', ops.LLM.Llama_2('llama-2-7b-chat'))
- .output('answer')
-)
+chat = ops.LLM.Llama_2('path/to/model_file.bin', max_tokens=2048, echo=True)
-history=[('Who won the world series in 2020?', 'The Los Angeles Dodgers won the World Series in 2020.')]
-question = 'Where was it played?'
-answer = p(question, [], history).get()[0]
+message = [{"question": "Building a website can be done in 10 simple steps:"}]
+answer = chat(message)
```
-*Write a [retrieval-augmented generation pipeline](https://towhee.io/tasks/detail/pipeline/retrieval-augmented-generation) with explicit inputs/outputs name specifications:*
+*Write a pipeline with explicit inputs/outputs name specifications:*
```python
from towhee import pipe, ops
-
-temp = '''Use the following pieces of context to answer the question at the end.
-
-{context}
-
-Question: {question}
-'''
-
-system_msg = 'Your name is TowheeChat.'
-
-q1 = 'Who are you?'
-q2 = 'What is Towhee?'
-
p = (
pipe.input('question', 'docs', 'history')
- .map(('question', 'docs', 'history'),
- 'prompt',
- ops.prompt.template(temp, ['question', 'context'], system_msg))
- .map('prompt', 'answer',
- ops.LLM.Llama_2(temperature=0))
+ .map(('question', 'docs', 'history'), 'prompt', ops.prompt.question_answer())
+ .map('prompt', 'answer', ops.LLM.Llama_2('llama-2-7b-chat'))
.output('answer')
)
-history = []
-docs = []
-ans1 = p(q1, docs, history).get()[0]
-print(q1, ans1)
-
-history.append((q1, ans1))
-docs.append('Towhee is a cutting-edge framework designed to streamline the processing of unstructured data through the use of Large Language Model (LLM) based pipeline orchestration.')
-ans2 = p(q2, docs, history).get()[0]
-
-print(q2, ans2)
+history=[('Who won the world series in 2020?', 'The Los Angeles Dodgers won the World Series in 2020.')]
+question = 'Where was it played?'
+answer = p(question, [], history).get()[0]
```
@@ -132,8 +103,8 @@ A dictionary of supported models with model name as key and huggingface hub id &
'hf_id': 'TheBloke/Llama-2-7B-GGML',
'filename': 'llama-2-7b.ggmlv3.q4_0.bin'
},
- 'llama-2-13-b-chat': {
- 'hf_id': 'TheBloke/Llama-2-13B-GGML',
+ 'llama-2-13b-chat': {
+ 'hf_id': 'TheBloke/Llama-2-13B-chat-GGML',
'filename': 'llama-2-13b-chat.ggmlv3.q4_0.bin'
}
}
diff --git a/llama2.py b/llama2.py
index 9cc4483..32e8995 100644
--- a/llama2.py
+++ b/llama2.py
@@ -64,7 +64,10 @@ class LlamaCpp(PyOperator):
prompt += f'''{v}\n'''
else:
raise KeyError(f'Invalid key of message: {k}')
- prompt = ' ' + prompt + ' ' + f' [INST] {question} [/INST]'
+ if len(prompt) > 0:
+ prompt = ' ' + prompt + ' ' + f' {question}'
+ else:
+ prompt = question
return prompt
def parse_outputs(self, response):