diff --git a/README.md b/README.md
index d9403e2..60ceffe 100644
--- a/README.md
+++ b/README.md
@@ -28,11 +28,11 @@ import towhee
 
 towhee.glob('./teddy.jpg') \
       .image_decode() \
-      .towhee.clip(model_name='ViT-B/32', modality='image') \
+      .towhee.clip(model_name='clip_vit_b32', modality='image') \
       .show()
 
 towhee.dc(["A teddybear on a skateboard in Times Square."]) \
-      .towhee.clip(model_name='ViT-B/32', modality='text') \
+      .towhee.clip(model_name='clip_vit_b32', modality='text') \
       .show()
 ```
 <img src="https://towhee.io/towhee/clip/raw/branch/main/vec1.png" alt="result1" style="height:20px;"/>
@@ -45,12 +45,12 @@ import towhee
 
 towhee.glob['path']('./teddy.jpg') \
       .image_decode['path', 'img']() \
-      .towhee.clip['img', 'vec'](model_name='ViT-B/32', modality='image') \
+      .towhee.clip['img', 'vec'](model_name='clip_vit_b32', modality='image') \
       .select['img', 'vec']() \
       .show()
 
 towhee.dc['text'](["A teddybear on a skateboard in Times Square."]) \
-      .towhee.clip['text','vec'](model_name='ViT-B/32', modality='text') \
+      .towhee.clip['text','vec'](model_name='clip_vit_b32', modality='text') \
       .select['text', 'vec']() \
       .show()
 ```
diff --git a/clip.py b/clip.py
index a21ef59..a52aa9b 100644
--- a/clip.py
+++ b/clip.py
@@ -49,7 +49,7 @@ class Clip(NNOperator):
             vec = self._inference_from_text(data)
         else:
             raise ValueError("modality[{}] not implemented.".format(self._modality))
-        return vec
+        return vec.detach().cpu().numpy()
 
     def _inference_from_text(self, text):
         text = self.tokenize(text).to(self.device)