|
|
@ -17,15 +17,27 @@ This operator extracts features for video or text with [MDMMT: Multidomain Multi |
|
|
|
|
|
|
|
## Code Example |
|
|
|
|
|
|
|
Load a video embeddings extracted from different upstream expert networks, such as video, RGB, audio. |
|
|
|
|
|
|
|
Read the text to generate a text embedding. |
|
|
|
|
|
|
|
*Write the pipeline code*: |
|
|
|
```python |
|
|
|
from towhee.dc2 import pipe, ops, DataCollection |
|
|
|
|
|
|
|
p = ( |
|
|
|
pipe.input('text') \ |
|
|
|
.map('text', 'vec', ops.video_text_embedding.mdmmt(modality='text', device='cuda:0')) \ |
|
|
|
.output('text', 'vec') |
|
|
|
) |
|
|
|
|
|
|
|
DataCollection(p('Hello world.')).show() |
|
|
|
``` |
|
|
|
![](text_emb_result.png) |
|
|
|
|
|
|
|
Load a video embeddings extracted from different upstream expert networks, such as video, RGB, audio. |
|
|
|
|
|
|
|
```python |
|
|
|
import towhee |
|
|
|
import torch |
|
|
|
from towhee.dc2 import pipe, ops, DataCollection |
|
|
|
|
|
|
|
|
|
|
|
torch.manual_seed(42) |
|
|
|
|
|
|
@ -52,14 +64,16 @@ features_ind = { |
|
|
|
|
|
|
|
video_input_dict = {"features": features, "features_t": features_t, "features_ind": features_ind} |
|
|
|
|
|
|
|
towhee.dc([video_input_dict]).video_text_embedding.mdmmt(modality='video', device='cpu').show() |
|
|
|
p = ( |
|
|
|
pipe.input('video_input_dict') \ |
|
|
|
.map('video_input_dict', 'vec', ops.video_text_embedding.mdmmt(modality='video', device='cuda:0')) \ |
|
|
|
.output('video_input_dict', 'vec') |
|
|
|
) |
|
|
|
|
|
|
|
towhee.dc(['Hello world.']).video_text_embedding.mdmmt(modality='text', device='cpu').show() |
|
|
|
DataCollection(p(video_input_dict)).show() |
|
|
|
``` |
|
|
|
![](vect_simplified_video.png) |
|
|
|
![](vect_simplified_text.png) |
|
|
|
![](video_emb_result.png) |
|
|
|
|
|
|
|
*Write a same pipeline with explicit inputs/outputs name specifications:* |
|
|
|
|
|
|
|
<br /> |
|
|
|
|
|
|
|