update readme with dc2

3 years ago · df99d701aa
8 changed files with 21 additions and 28 deletions
--- a/README.md
+++ b/README.md
@ -25,42 +25,35 @@ Read the text 'kids feeding and playing with the horse' to generate a text embed
 *Write the pipeline in simplified style*:
 ```python
 import towhee
 from towhee.dc2 import pipe, ops, DataCollection
 towhee.dc(['./demo_video.mp4']) \
    .video_decode.ffmpeg(sample_type='uniform_temporal_subsample', args={'num_samples': 12}) \
    .runas_op(func=lambda x: [y for y in x]) \
    .video_text_embedding.drl(base_encoder='clip_vit_b32', modality='video', device='cpu') \
    .show()
 p = (
    pipe.input('text') \
        .map('text', 'vec', ops.video_text_embedding.drl(base_encoder='clip_vit_b32', modality='text', device='cuda:0')) \
        .output('text', 'vec')
 )
 towhee.dc(['kids feeding and playing with the horse']) \
    .video_text_embedding.drl(base_encoder='clip_vit_b32', modality='text', device='cpu') \
    .show()
 DataCollection(p('kids feeding and playing with the horse')).show()
 ```
 ![](vect_simplified_video.png)   
 ![](vect_simplified_text.png)   
 ![](text_emb_result.png)    
 *Write a same pipeline with explicit inputs/outputs name specifications:*
 ```python
 import towhee
 towhee.dc['path'](['./demo_video.mp4']) \
        .video_decode.ffmpeg['path', 'frames'](sample_type='uniform_temporal_subsample', args={'num_samples': 12}) \
        .runas_op['frames', 'frames'](func=lambda x: [y for y in x]) \
        .video_text_embedding.drl['frames', 'vec'](base_encoder='clip_vit_b32', modality='video', device='cpu') \
        .show(formatter={'path': 'video_path'})
 towhee.dc['text'](['kids feeding and playing with the horse']) \
      .video_text_embedding.drl['text','vec'](base_encoder='clip_vit_b32', modality='text', device='cpu') \
      .select['text', 'vec']() \
      .show()
 ```
 from towhee.dc2 import pipe, ops, DataCollection
 p = (
    pipe.input('video_path') \
        .map('video_path', 'flame_gen', ops.video_decode.ffmpeg(sample_type='uniform_temporal_subsample', args={'num_samples': 12})) \
        .map('flame_gen', 'flame_list', lambda x: [y for y in x]) \
        .map('flame_list', 'vec', ops.video_text_embedding.drl(base_encoder='clip_vit_b32', modality='video', device='cuda:0')) \
        .output('video_path', 'flame_list', 'vec')
 )
 ![](vect_explicit_video.png)    
 ![](vect_explicit_text.png)   
 DataCollection(p('./demo_video.mp4')).show()
 ```
 ![](video_emb_result.png)    
 <br />
--- a/drl.py
+++ b/drl.py
@ -41,7 +41,7 @@ class DRL(NNOperator):
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
        else:
            self.device = device
        self.model = drl.create_model(base_encoder=base_encoder, pretrained=True, cdcr=0, weights_path=weight_path)
        self.model = drl.create_model(base_encoder=base_encoder, pretrained=True, cdcr=0, weights_path=weight_path, device=device)
        self.tokenize = clip4clip.SimpleTokenizer()
        self.tfms = transforms.Compose([
--- a/text_emb_result.png
+++ b/text_emb_result.png
--- a/vect_explicit_text.png
+++ b/vect_explicit_text.png
--- a/vect_explicit_video.png
+++ b/vect_explicit_video.png
--- a/vect_simplified_text.png
+++ b/vect_simplified_text.png
--- a/vect_simplified_video.png
+++ b/vect_simplified_video.png
--- a/video_emb_result.png
+++ b/video_emb_result.png