diff --git a/README.md b/README.md
index 3dba710..da6af99 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,140 @@
-# Llama-2
+# Llama-2 Chat
+
+*author: Jael*
+
+<br />
+
+## Description
+
+A LLM operator generates answer given prompt in messages using a large language model or service.
+This operator uses a pretrained [Llama-2](https://ai.meta.com/llama) to generate response.
+By default, it will download the model file from [HuggingFace](https://huggingface.co/TheBloke) 
+and then run the model with [Llama-cpp](https://github.com/ggerganov/llama.cpp).
+
+This operator will automatically install and run model with llama-cpp.
+If the automatic installation fails in your environment, please refer to [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) for instructions of manual installation.
+
+<br />
+
+## Code Example
+
+Use the default model to continue the conversation from given messages.
+
+*Write a pipeline with explicit inputs/outputs name specifications:*
+
+```python
+from towhee import pipe, ops
+
+p = (
+    pipe.input('question', 'docs', 'history')
+        .map(('question', 'docs', 'history'), 'prompt', ops.prompt.question_answer())
+        .map('prompt', 'answer', ops.LLM.Llama_2('llama-2-7b-chat'))
+        .output('answer')
+)
+
+history=[('Who won the world series in 2020?', 'The Los Angeles Dodgers won the World Series in 2020.')]
+question = 'Where was it played?'
+answer = p(question, [], history).get()[0]
+```
+
+*Write a [retrieval-augmented generation pipeline](https://towhee.io/tasks/detail/pipeline/retrieval-augmented-generation) with explicit inputs/outputs name specifications:*
+
+```python
+from towhee import pipe, ops
+
+
+temp = '''Use the following pieces of context to answer the question at the end.
+
+{context}
+
+Question: {question}
+'''
+
+system_msg = 'Your name is TowheeChat.'
+
+q1 = 'Who are you?'
+q2 = 'What is Towhee?'
+
+p = (
+    pipe.input('question', 'docs', 'history')
+        .map(('question', 'docs', 'history'),
+             'prompt',
+             ops.prompt.template(temp, ['question', 'context'], sys_message))
+        .map('prompt', 'answer',
+             ops.LLM.Llama_2(max_tokens=200))
+        .output('answer')
+)
+
+history = []
+docs = []
+ans1 = p(q1, docs, history).get()[0]
+print(q1, ans1)
+
+history.append((q1, ans1))
+docs.append('Towhee is a cutting-edge framework designed to streamline the processing of unstructured data through the use of Large Language Model (LLM) based pipeline orchestration.')
+ans2 = p(q2, docs, history)
+
+print(q2, ans2)
+```
+
+<br />
+
+## Factory Constructor
+
+Create the operator via the following factory method:
+
+***LLM.Llama_2(model_name_or_file: str)***
+
+**Parameters:**
+
+***model_name_or_file***: *str*
+
+The model name or path to the model file in string, defaults to 'llama-2-7b-chat'.
+If model name is in `supported_model_names`, it will download corresponding model file from HuggingFace models.
+You can also use the local path of a model file, which can be ran by llama-cpp-python.
+
+***\*\*kwargs***
+
+Other model parameters such as temperature, max_tokens.
+
+<br />
+
+## Interface
+
+The operator takes a piece of text in string as input.
+It returns answer in json.
+
+***\_\_call\_\_(txt)***
+
+**Parameters:**
+
+***messages***: *list*
+
+​	A list of messages to set up chat.
+Must be a list of dictionaries with key value from "system", "question", "answer". For example, [{"question": "a past question?", "answer": "a past answer."}, {"question": "current question?"}]
+
+**Returns**:
+
+*answer: str*
+
+​	The answer generated.
+
+<br />
+
+***supported_model_names()***
+
+**Returns**:
+
+A dictionary of supported models with model name as key and huggingface hub id & model filename as value.
+
+    {
+        'llama-2-7b-chat': {
+            'hf_id': 'TheBloke/Llama-2-7B-GGML',
+            'filename': 'llama-2-7b.ggmlv3.q4_0.bin'
+            },
+        'llama-2-13-b-chat': {
+            'hf_id': 'TheBloke/Llama-2-13B-GGML',
+            'filename': 'llama-2-13b-chat.ggmlv3.q4_0.bin'
+        }
+    }
 
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..38e9485
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,5 @@
+from .llama2 import LlamaCpp
+
+
+def llama_2(*args, **kwargs):
+    return LlamaCpp(*args, **kwargs)
diff --git a/llama2.py b/llama2.py
new file mode 100644
index 0000000..86db3a0
--- /dev/null
+++ b/llama2.py
@@ -0,0 +1,90 @@
+# Copyright 2021 Zilliz. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import List
+
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+
+from towhee.operator.base import PyOperator, SharedType
+
+
+class LlamaCpp(PyOperator):
+    '''Wrapper of Dolly inference'''
+    def __init__(self,
+                 model_name_or_file: str = 'llama-2-7b-chat',
+                 **kwargs
+                 ):
+        self.kwargs = kwargs
+        supported_model_names = self.supported_model_names()
+        
+        if model_name_or_file in supported_model_names:
+            model_info = supported_model_names[model_name_or_file]
+            hf_id = model_info['hf_id']
+            model_filename = model_info['filename']
+            self.model_path = hf_hub_download(repo_id=hf_id, filename=model_filename)
+        else:
+            self.model_path = model_name_or_file
+        assert os.path.isfile(self.model_path), f'Invalid model path: {self.model_path}'
+
+        print(111, self.model_path)
+        self.model = Llama(model_path=self.model_path)
+
+    def __call__(self, messages: List[dict]):
+        prompt = self.parse_inputs(messages)
+        resp = self.model(prompt, **self.kwargs)
+        answer = self.parse_outputs(resp)
+        return answer
+        
+    def parse_inputs(self, messages: List[dict]):
+        assert isinstance(messages, list), \
+            'Inputs must be a list of dictionaries with keys from ["system", "question", "answer"].'
+        prompt = ''
+        question = messages.pop[-1]
+        assert len(question) == 1 and 'question' in question.keys()
+        question = question['question']
+        for m in messages:
+            for k, v in m.items():
+                if k == 'system':
+                    prompt += f'''[INST] <<SYS>> {v} <</SYS>> [/INST]\n'''
+                elif k == 'question':
+                    prompt += f'''[INST] {v} [/INST]\n'''
+                elif k == 'answer':
+                    prompt += f'''{v}\n'''
+                else:
+                    raise KeyError(f'Invalid key of message: {k}')
+        prompt = '<s> ' + prompt + ' </s>' + f'<s> [INST] {question} [/INST]'
+        return prompt
+    
+    def parse_outputs(self, response):
+        return response['choices'][0]['text']
+
+    @staticmethod
+    def supported_model_names():
+        models = {
+            'llama-2-7b-chat': {
+                'hf_id': 'TheBloke/Llama-2-7B-GGML',
+                'filename': 'llama-2-7b.ggmlv3.q4_0.bin'
+                },
+            'llama-2-13-b-chat': {
+                'hf_id': 'TheBloke/Llama-2-13B-GGML',
+                'filename': 'llama-2-13b-chat.ggmlv3.q4_0.bin'
+            }
+        }
+        return models
+
+    @property
+    def shared_type(self):
+        return SharedType.Shareable
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..de714be
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+llama-cpp-python
+huggingface-hub