diff --git a/README.md b/README.md
index c5dd9b4..085a282 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ Use the default model to continue the conversation from given messages.
 ```python
 from towhee import ops
 
-chat = ops.LLM.Llama_2('path/to/model_file.bin', max_tokens=2048, echo=True)
+chat = ops.LLM.Llama_2('path/to/model_file.bin', max_tokens=2048)
 
 message = [{"question": "Building a website can be done in 10 simple steps:"}]
 answer = chat(message)
@@ -39,7 +39,7 @@ from towhee import pipe, ops
 p = (
     pipe.input('question', 'docs', 'history')
         .map(('question', 'docs', 'history'), 'prompt', ops.prompt.question_answer())
-        .map('prompt', 'answer', ops.LLM.Llama_2('llama-2-7b-chat'))
+        .map('prompt', 'answer', ops.LLM.Llama_2('llama-2-7b-chat', stop='</s>'))
         .output('answer')
 )
 
diff --git a/llama2.py b/llama2.py
index 32e8995..5a64e1e 100644
--- a/llama2.py
+++ b/llama2.py
@@ -57,15 +57,15 @@ class LlamaCpp(PyOperator):
         for m in messages:
             for k, v in m.items():
                 if k == 'system':
-                    prompt += f'''[INST] <<SYS>> {v} <</SYS>> [/INST]\n'''
+                    prompt += f'''[INST] <<SYS>> {v} <</SYS>>\n'''
                 elif k == 'question':
-                    prompt += f'''[INST] {v} [/INST]\n'''
+                    prompt += f''' {v} [/INST]\n'''
                 elif k == 'answer':
-                    prompt += f'''{v}\n'''
+                    prompt += f''' {v} '''
                 else:
                     raise KeyError(f'Invalid key of message: {k}')
         if len(prompt) > 0:
-            prompt = '<s> ' + prompt + ' </s>' + f' {question}'
+            prompt = '<s> ' + prompt + ' </s>' + f'<s> [INST] {question} [/INST]'
         else:
             prompt = question
         return prompt