logo
Browse Source

add UNIFORM_TEMPORAL_SUBSAMPLE

Signed-off-by: junjie.jiang <junjie.jiang@zilliz.com>
main
junjie.jiang 1 year ago
parent
commit
31e0e5d491
  1. 119
      README.md
  2. 1
      requirements.txt
  3. 37
      video_decoder.py

119
README.md

@ -1,2 +1,119 @@
# VPF
# Operator: video-decoder
Author: JunJie Jiang
## Overview
Decode video by using https://github.com/NVIDIA/VideoProcessingFramework
- Users need to install the vpf package by themselves.
- Gpu decode only support h.264, h.265 and vp9, others will use cpu decode.
- 4% diff with cpu-decode.
## Interface
```python
__init__(self, gpu_id, start_time=None, end_time=None, sample_type=None, args=None)
Args:
- gpu_id: int >= 0
- start_time: float
- end_time: float
decode video from start_time to end_time
- sample_type: str
uniform_temporal_subsample
time_step_sample
- args: dict
sample_type is `uniform_temporal_subsample`
num_samples: int
sample_type is `time_step_sample`
time_step: int
```
```python
__call__(self, video_path: str)
```
Args:
- video_path:
support local path and http/https url.
Returns:
- towhee.types.VideoFrame
## Requirements
https://github.com/NVIDIA/VideoProcessingFramework
## How it works
```python
from towhee import pipe, ops, DataCollection
p = (
pipe.input('video_file')
.flat_map('video_file', 'frame', ops.video_decode.ffmpeg(gpu_id=0, start_time=10.0, end_time=15.0, sample_type='time_step_sample', args={'time_step': 1}))
.output('frame')
)
DataCollection(p('./video.mp4')).show(limit=1)
```
```python
from towhee import ops
d = ops.video_decode.ffmpeg(gpu_id=0, start_time=10.0, end_time=20.0, sample_type='uniform_temporal_subsample', args={'num_samples': 10})
for frame in d(video_path):
print(frame)
print('#' * 50)
d = ops.video_decode.ffmpeg(gpu_id=0, start_time=10.0, end_time=20.0, sample_type='time_step_sample', args={'time_step': 1})
for frame in d(video_path):
print(frame)
result:
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 10010, key_frame: 1
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 11078, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 12145, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 13280, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 14348, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 15482, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 16550, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 17684, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 18752, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 19887, key_frame: 0
##################################################
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 10010, key_frame: 1
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 11011, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 12012, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 13013, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 14014, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 15015, key_frame: 1
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 16015, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 17017, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 18018, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 19019, key_frame: 0
## Reference

1
requirements.txt

@ -1 +1,2 @@
av
numpy

37
video_decoder.py

@ -1,5 +1,6 @@
import logging
import av
import numpy as np
from towhee.operator.base import PyOperator
from cpu_decode import PyAVDecode
@ -31,7 +32,8 @@ class VideoDecoder(PyOperator):
super().__init__()
self._gpu_id = gpu_id
self._start_time = start_time if start_time is not None else 0
self._end_time = end_time * 1000 if end_time is not None else None
self._end_time = end_time if end_time is not None else None
self._end_time_ms = end_time * 1000 if end_time is not None else None
self._sample_type = sample_type.lower() if sample_type else None
self._args = args if args is not None else {}
@ -61,12 +63,34 @@ class VideoDecoder(PyOperator):
logger.warn('GPU decode failed, only supports [h264,h265,vp9] format, will use CPU')
yield from self._cpu_time_step_decode(video_path, time_step)
def _uniform_temporal_subsample(self, frames, num_samples, total_frames):
print(num_samples, total_frames)
indexs = np.linspace(0, total_frames - 1, num_samples).astype('int')
cur_index = 0
count = 0
for frame in frames:
if cur_index >= len(indexs):
return
while cur_index < len(indexs) and indexs[cur_index] <= count:
cur_index += 1
yield frame
count += 1
def _filter(self, frames):
for f in frames:
if self._end_time and f.timestamp > self._end_time:
if self._end_time_ms and f.timestamp > self._end_time_ms:
break
yield f
def frame_nums(self, video_path):
with av.open(video_path) as c:
video = c.streams.video[0]
start = self._start_time if self._start_time is not None else 0
duration = c.duration / 1000000
end = self._end_time if self._end_time and self._end_time <= duration else duration
return int(round((end - start) * video.average_rate))
def __call__(self, video_path: str):
if self._sample_type is None:
yield from self._filter(self.decode(video_path))
@ -75,7 +99,10 @@ class VideoDecoder(PyOperator):
if time_step is None:
raise RuntimeError('time_step_sample sample lost args time_step')
yield from self._filter(self.time_step_decode(video_path, time_step))
elif self._sample_type == SAMPLE_TYPE.TIME_STEP_SAMPLE:
pass
elif self._sample_type == SAMPLE_TYPE.UNIFORM_TEMPORAL_SUBSAMPLE:
num_samples = self._args.get('num_samples')
if num_samples is None:
raise RuntimeError('uniform_temporal_subsample lost args num_samples')
yield from self._uniform_temporal_subsample(self.decode(video_path), num_samples, self.frame_nums(video_path))
else:
raise RuntimeError('Unkown sample type, only supports: [%s|%s]' % (SAMPLE_TYPE.TIME_STEP_SAMPLE, SAMPLE_TYPE.UNIFORM_TEMPORAL_SUBSAMPLE))

Loading…
Cancel
Save