diff --git a/README.md b/README.md
index c5dd9b4..085a282 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ Use the default model to continue the conversation from given messages.
```python
from towhee import ops
-chat = ops.LLM.Llama_2('path/to/model_file.bin', max_tokens=2048, echo=True)
+chat = ops.LLM.Llama_2('path/to/model_file.bin', max_tokens=2048)
message = [{"question": "Building a website can be done in 10 simple steps:"}]
answer = chat(message)
@@ -39,7 +39,7 @@ from towhee import pipe, ops
p = (
pipe.input('question', 'docs', 'history')
.map(('question', 'docs', 'history'), 'prompt', ops.prompt.question_answer())
- .map('prompt', 'answer', ops.LLM.Llama_2('llama-2-7b-chat'))
+ .map('prompt', 'answer', ops.LLM.Llama_2('llama-2-7b-chat', stop=''))
.output('answer')
)
diff --git a/llama2.py b/llama2.py
index 32e8995..5a64e1e 100644
--- a/llama2.py
+++ b/llama2.py
@@ -57,15 +57,15 @@ class LlamaCpp(PyOperator):
for m in messages:
for k, v in m.items():
if k == 'system':
- prompt += f'''[INST] <> {v} <> [/INST]\n'''
+ prompt += f'''[INST] <> {v} <>\n'''
elif k == 'question':
- prompt += f'''[INST] {v} [/INST]\n'''
+ prompt += f''' {v} [/INST]\n'''
elif k == 'answer':
- prompt += f'''{v}\n'''
+ prompt += f''' {v} '''
else:
raise KeyError(f'Invalid key of message: {k}')
if len(prompt) > 0:
- prompt = ' ' + prompt + ' ' + f' {question}'
+ prompt = ' ' + prompt + ' ' + f' [INST] {question} [/INST]'
else:
prompt = question
return prompt