Browse Source
update the readme.
Signed-off-by: wxywb <xy.wang@zilliz.com>
main
wxywb
2 years ago
2 changed files with
11 additions and
19 deletions
-
README.md
-
clipcap.py
|
|
@ -19,28 +19,20 @@ This operator generates the caption with [ClipCap](https://arxiv.org/abs/2111.09 |
|
|
|
|
|
|
|
Load an image from path './hulk.jpg' to generate the caption. |
|
|
|
|
|
|
|
*Write the pipeline in simplified style*: |
|
|
|
*Write a pipeline with explicit inputs/outputs name specifications:* |
|
|
|
|
|
|
|
```python |
|
|
|
import towhee |
|
|
|
from towhee.dc2 import pipe, ops, DataCollection |
|
|
|
|
|
|
|
towhee.glob('./hulk.jpg') \ |
|
|
|
.image_decode() \ |
|
|
|
.image_captioning.clipcap(model_name='clipcap_coco') \ |
|
|
|
.show() |
|
|
|
``` |
|
|
|
<img src="./cap.png" alt="result1" style="height:20px;"/> |
|
|
|
|
|
|
|
*Write a same pipeline with explicit inputs/outputs name specifications:* |
|
|
|
p = ( |
|
|
|
pipe.input('url') |
|
|
|
.map('url', 'img', ops.image_decode.cv2_rgb()) |
|
|
|
.map('img', 'text', ops.image_captioning.clipcap(model_name='clipcap_coco')) |
|
|
|
.output('img', 'text') |
|
|
|
) |
|
|
|
|
|
|
|
```python |
|
|
|
import towhee |
|
|
|
DataCollection(p('./image.jpg')).show() |
|
|
|
|
|
|
|
towhee.glob['path']('./hulk.jpg') \ |
|
|
|
.image_decode['path', 'img']() \ |
|
|
|
.image_captioning.clipcap['img', 'text'](model_name='clipcap_coco') \ |
|
|
|
.select['img', 'text']() \ |
|
|
|
.show() |
|
|
|
``` |
|
|
|
<img src="./tabular.png" alt="result2" style="height:60px;"/> |
|
|
|
|
|
|
|
|
|
@ -55,8 +55,8 @@ class ClipCap(NNOperator): |
|
|
|
self.model = ClipCaptionModel(self.prefix_length) |
|
|
|
model_path = os.path.dirname(__file__) + '/weights/' + config['weights'] |
|
|
|
self.model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu'))) |
|
|
|
self.model.to(self.device) |
|
|
|
self.model = self.model.eval() |
|
|
|
|
|
|
|
|
|
|
|
@arg(1, to_image_color('RGB')) |
|
|
|
def inference_single_data(self, data): |
|
|
@ -85,7 +85,7 @@ class ClipCap(NNOperator): |
|
|
|
@arg(1, to_image_color('RGB')) |
|
|
|
def _inference_from_image(self, img): |
|
|
|
img = self._preprocess(img) |
|
|
|
clip_feat = self.clip_model.encode_image(img) |
|
|
|
clip_feat = self.clip_model.encode_image(img).float() |
|
|
|
|
|
|
|
self.prefix_length = 10 |
|
|
|
prefix_embed = self.model.clip_project(clip_feat).reshape(1, self.prefix_length, -1) |
|
|
|