logo
Browse Source

update the readme.

Signed-off-by: wxywb <xy.wang@zilliz.com>
main
wxywb 2 years ago
parent
commit
dd32a1fe77
  1. 26
      README.md
  2. 4
      clipcap.py

26
README.md

@ -19,28 +19,20 @@ This operator generates the caption with [ClipCap](https://arxiv.org/abs/2111.09
Load an image from path './hulk.jpg' to generate the caption. Load an image from path './hulk.jpg' to generate the caption.
*Write the pipeline in simplified style*:
*Write a pipeline with explicit inputs/outputs name specifications:*
```python ```python
import towhee
from towhee.dc2 import pipe, ops, DataCollection
towhee.glob('./hulk.jpg') \
.image_decode() \
.image_captioning.clipcap(model_name='clipcap_coco') \
.show()
```
<img src="./cap.png" alt="result1" style="height:20px;"/>
*Write a same pipeline with explicit inputs/outputs name specifications:*
p = (
pipe.input('url')
.map('url', 'img', ops.image_decode.cv2_rgb())
.map('img', 'text', ops.image_captioning.clipcap(model_name='clipcap_coco'))
.output('img', 'text')
)
```python
import towhee
DataCollection(p('./image.jpg')).show()
towhee.glob['path']('./hulk.jpg') \
.image_decode['path', 'img']() \
.image_captioning.clipcap['img', 'text'](model_name='clipcap_coco') \
.select['img', 'text']() \
.show()
``` ```
<img src="./tabular.png" alt="result2" style="height:60px;"/> <img src="./tabular.png" alt="result2" style="height:60px;"/>

4
clipcap.py

@ -55,9 +55,9 @@ class ClipCap(NNOperator):
self.model = ClipCaptionModel(self.prefix_length) self.model = ClipCaptionModel(self.prefix_length)
model_path = os.path.dirname(__file__) + '/weights/' + config['weights'] model_path = os.path.dirname(__file__) + '/weights/' + config['weights']
self.model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu'))) self.model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
self.model.to(self.device)
self.model = self.model.eval() self.model = self.model.eval()
@arg(1, to_image_color('RGB')) @arg(1, to_image_color('RGB'))
def inference_single_data(self, data): def inference_single_data(self, data):
text = self._inference_from_image(data) text = self._inference_from_image(data)
@ -85,7 +85,7 @@ class ClipCap(NNOperator):
@arg(1, to_image_color('RGB')) @arg(1, to_image_color('RGB'))
def _inference_from_image(self, img): def _inference_from_image(self, img):
img = self._preprocess(img) img = self._preprocess(img)
clip_feat = self.clip_model.encode_image(img)
clip_feat = self.clip_model.encode_image(img).float()
self.prefix_length = 10 self.prefix_length = 10
prefix_embed = self.model.clip_project(clip_feat).reshape(1, self.prefix_length, -1) prefix_embed = self.model.clip_project(clip_feat).reshape(1, self.prefix_length, -1)

Loading…
Cancel
Save