logo
Browse Source

l

Signed-off-by: xujinling <jinling.xu@zilliz.com>
main
xujinling 3 years ago
parent
commit
c051de2314
  1. 3
      README.md
  2. 4
      bridge_former.py
  3. BIN
      result1.png
  4. BIN
      result2.png
  5. BIN
      result3.png
  6. BIN
      result4.png

3
README.md

@ -47,8 +47,7 @@ towhee.dc(['kids feeding and playing with the horse']) \
import towhee
towhee.dc['path'](['./demo_video.mp4']) \
.video_decode.ffmpeg['path', 'frames'](sample_type='uniform_temporal_subsample', args={'num_samples': 4}) \
.runas_op['frames', 'frames'](func=lambda x: [y for y in x]) \
.video_decode.ffmpeg['path', 'frames']() \
.video_text_embedding.bridge_former['frames', 'vec'](model_name='frozen_model', modality='video') \
.select['path', 'vec']() \
.show(formatter={'path': 'video_path'})

4
bridge_former.py

@ -82,12 +82,12 @@ class BridgeFormer(NNOperator):
return vec
def _inference_from_text(self, text: List[str]):
text_data = self.tokenizer(text, return_tensors='pt')
text_data = text_data.to(self.device)
if self.model_name == "clip_initialized_model":
text_data = self.tokenizer(text, return_tensors='pt', padding='max_length', max_length=77).to(self.device)
text_features = self.model.encode_text(text_data["input_ids"])
else:
text_data = self.tokenizer(text, return_tensors='pt').to(self.device)
text_features = self.model.compute_text(text_data)
return text_features.squeeze(0).detach().flatten().cpu().numpy()

BIN
result1.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

BIN
result2.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

BIN
result3.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

BIN
result4.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.7 KiB

Loading…
Cancel
Save