diff --git a/README.md b/README.md index d9403e2..60ceffe 100644 --- a/README.md +++ b/README.md @@ -28,11 +28,11 @@ import towhee towhee.glob('./teddy.jpg') \ .image_decode() \ - .towhee.clip(model_name='ViT-B/32', modality='image') \ + .towhee.clip(model_name='clip_vit_b32', modality='image') \ .show() towhee.dc(["A teddybear on a skateboard in Times Square."]) \ - .towhee.clip(model_name='ViT-B/32', modality='text') \ + .towhee.clip(model_name='clip_vit_b32', modality='text') \ .show() ``` result1 @@ -45,12 +45,12 @@ import towhee towhee.glob['path']('./teddy.jpg') \ .image_decode['path', 'img']() \ - .towhee.clip['img', 'vec'](model_name='ViT-B/32', modality='image') \ + .towhee.clip['img', 'vec'](model_name='clip_vit_b32', modality='image') \ .select['img', 'vec']() \ .show() towhee.dc['text'](["A teddybear on a skateboard in Times Square."]) \ - .towhee.clip['text','vec'](model_name='ViT-B/32', modality='text') \ + .towhee.clip['text','vec'](model_name='clip_vit_b32', modality='text') \ .select['text', 'vec']() \ .show() ``` diff --git a/clip.py b/clip.py index a21ef59..a52aa9b 100644 --- a/clip.py +++ b/clip.py @@ -49,7 +49,7 @@ class Clip(NNOperator): vec = self._inference_from_text(data) else: raise ValueError("modality[{}] not implemented.".format(self._modality)) - return vec + return vec.detach().cpu().numpy() def _inference_from_text(self, text): text = self.tokenize(text).to(self.device)