diff --git a/README.md b/README.md
index b496a0e..3718aab 100644
--- a/README.md
+++ b/README.md
@@ -69,16 +69,14 @@ Create the operator via the following factory method
 
 ## Interface
 
-An image-text embedding operator takes a [towhee image](link/to/towhee/image/api/doc) as input and generate the correspoing caption.
+An image captioning operator takes a [towhee image](link/to/towhee/image/api/doc) as input and generate the correspoing caption.
 
 
 **Parameters:**
 
 ​	***data:*** *towhee.types.Image (a sub-class of numpy.ndarray)* 
 
-​  The image to generate embedding.	
-
-
+​  The image to generate caption.	
 
 **Returns:** *str*
 
diff --git a/clipcap.py b/clipcap.py
index 7c7c5f9..268be67 100644
--- a/clipcap.py
+++ b/clipcap.py
@@ -33,7 +33,7 @@ class ClipCap(NNOperator):
     def __init__(self, model_name: str):
         super().__init__()
         sys.path.append(str(Path(__file__).parent))
-        from models.clipcap import ClipCaptionModel, generate_beam
+        from clipcap_model.clipcap import ClipCaptionModel, generate_beam
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.generate_beam = generate_beam
         self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
diff --git a/clipcap_model/__init__.py b/clipcap_model/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/clipcap.py b/clipcap_model/clipcap.py
similarity index 100%
rename from models/clipcap.py
rename to clipcap_model/clipcap.py