diff --git a/README.md b/README.md index 120fe66..245309b 100644 --- a/README.md +++ b/README.md @@ -24,40 +24,33 @@ Read the text 'kids feeding and playing with the horse' to generate an text embe *Write the pipeline in simplified style*: ```python -import towhee +from towhee.dc2 import pipe, ops, DataCollection -towhee.dc(['./demo_video.mp4']) \ - .video_decode.ffmpeg(sample_type='uniform_temporal_subsample', args={'num_samples': 12}) \ - .runas_op(func=lambda x: [y for y in x]) \ - .clip4clip(model_name='clip_vit_b32', modality='video', device='cpu') \ - .show() +p = ( + pipe.input('text') \ + .map('text', 'vec', ops.video_text_embedding.clip4clip(model_name='clip_vit_b32', modality='text', device='cuda:1')) \ + .output('text', 'vec') +) -towhee.dc(['kids feeding and playing with the horse']) \ - .clip4clip(model_name='clip_vit_b32', modality='text', device='cpu') \ - .show() -``` -![](vect_simplified_video.png) -![](vect_simplified_text.png) +DataCollection(p('kids feeding and playing with the horse')).show() -*Write a same pipeline with explicit inputs/outputs name specifications:* +``` +![](text_emb_output.png) ```python -import towhee - -towhee.dc['path'](['./demo_video.mp4']) \ - .video_decode.ffmpeg['path', 'frames'](sample_type='uniform_temporal_subsample', args={'num_samples': 12}) \ - .runas_op['frames', 'frames'](func=lambda x: [y for y in x]) \ - .clip4clip['frames', 'vec'](model_name='clip_vit_b32', modality='video', device='cpu') \ - .show() - -towhee.dc['text'](["kids feeding and playing with the horse"]) \ - .clip4clip['text','vec'](model_name='clip_vit_b32', modality='text', device='cpu') \ - .select['text', 'vec']() \ - .show() -``` +from towhee.dc2 import pipe, ops, DataCollection -![](vect_explicit_video.png) -![](vect_explicit_text.png) +p = ( + pipe.input('video_path') \ + .map('video_path', 'flame_gen', ops.video_decode.ffmpeg(sample_type='uniform_temporal_subsample', args={'num_samples': 12})) \ + .map('flame_gen', 'flame_list', lambda x: [y for y in x]) \ + .map('flame_list', 'vec', ops.video_text_embedding.clip4clip(model_name='clip_vit_b32', modality='video', device='cuda:2')) \ + .output('video_path', 'flame_list', 'vec') +) + +DataCollection(p('./demo_video.mp4')).show() +``` +![](video_emb_ouput.png)
diff --git a/text_emb_output.png b/text_emb_output.png new file mode 100644 index 0000000..458831d Binary files /dev/null and b/text_emb_output.png differ diff --git a/vect_explicit_text.png b/vect_explicit_text.png deleted file mode 100644 index 9569f6f..0000000 Binary files a/vect_explicit_text.png and /dev/null differ diff --git a/vect_explicit_video.png b/vect_explicit_video.png deleted file mode 100644 index c534085..0000000 Binary files a/vect_explicit_video.png and /dev/null differ diff --git a/vect_simplified_text.png b/vect_simplified_text.png deleted file mode 100644 index 7ce111d..0000000 Binary files a/vect_simplified_text.png and /dev/null differ diff --git a/vect_simplified_video.png b/vect_simplified_video.png deleted file mode 100644 index a4a3235..0000000 Binary files a/vect_simplified_video.png and /dev/null differ diff --git a/video_emb_ouput.png b/video_emb_ouput.png new file mode 100644 index 0000000..5b41263 Binary files /dev/null and b/video_emb_ouput.png differ