logo
Browse Source

update

main
video-decode 2 years ago
parent
commit
9c2ed27ce0
  1. 50
      README.md
  2. 38
      video_decoder.py

50
README.md

@ -23,10 +23,15 @@ Args:
- sample_type: str
uniform_temporal_subsample
time_step_sample
- args: dict
sample_type is `uniform_temporal_subsample`
num_samples: int
sample_type is `time_step_sample`
time_step: int
```
@ -41,7 +46,7 @@ Args:
Returns:
- Image
- towhee.types.VideoImage
## Requirements
av
@ -56,18 +61,37 @@ d = ops.video_decode.ffmpeg(start_time=10.0, end_time=20.0, sample_type='uniform
for frame in d(video_path):
print(frame)
print('#' * 50)
d = ops.video_decode.ffmpeg(start_time=10.0, end_time=20.0, sample_type='time_step_sample', args={'time_step': 1})
for frame in d(video_path):
print(frame)
result:
Outputs(image=<towhee.types.image.Image object at 0x7fa444776310>, TIMESTAMP=10010)
Outputs(image=<towhee.types.image.Image object at 0x7fa444776700>, TIMESTAMP=11078)
Outputs(image=<towhee.types.image.Image object at 0x7fa444776310>, TIMESTAMP=12145)
Outputs(image=<towhee.types.image.Image object at 0x7fa444776700>, TIMESTAMP=13280)
Outputs(image=<towhee.types.image.Image object at 0x7fa444776310>, TIMESTAMP=14348)
Outputs(image=<towhee.types.image.Image object at 0x7fa444776700>, TIMESTAMP=15482)
Outputs(image=<towhee.types.image.Image object at 0x7fa444776310>, TIMESTAMP=16550)
Outputs(image=<towhee.types.image.Image object at 0x7fa444776700>, TIMESTAMP=17684)
Outputs(image=<towhee.types.image.Image object at 0x7fa444776310>, TIMESTAMP=18752)
Outputs(image=<towhee.types.image.Image object at 0x7fa444776700>, TIMESTAMP=19887)
```
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 10010, key_frame: 1
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 11078, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 12145, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 13280, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 14348, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 15482, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 16550, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 17684, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 18752, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 19887, key_frame: 0
##################################################
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 10010, key_frame: 1
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 11011, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 12012, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 13013, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 14014, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 15015, key_frame: 1
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 16015, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 17017, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 18018, key_frame: 0
VideoFrame shape: (360, 480, 3), mode: RGB, timestamp: 19019, key_frame: 0
## Reference

38
video_decoder.py

@ -8,16 +8,16 @@ import logging
import av
import numpy as np
from towhee.types.image import Image
from towhee.types.video_frame import VideoFrame
from towhee.operator.base import PyOperator
VideoOutput = NamedTuple("Outputs", [("image", Image), ("TIMESTAMP", int)])
logger = logging.getLogger()
class SAMPLE_TYPE:
UNIFORM_TEMPORAL_SUBSAMPLE = 'uniform_temporal_subsample'
TIME_STEP_SAMPLE = 'time_step_sample'
class VideoDecoder(PyOperator):
@ -35,17 +35,22 @@ class VideoDecoder(PyOperator):
def get_sample(self, stream):
if self._sample_type is None:
return self._no_smaple
return self._no_sample
elif self._sample_type.lower() == SAMPLE_TYPE.UNIFORM_TEMPORAL_SUBSAMPLE:
duration = VideoDecoder.get_video_duration(stream)
end_time = self._end_time if self._end_time is not None and self._end_time <= duration else duration
start_time = self._start_time if self._start_time is not None else 0
nums = int(stream.rate * (end_time - start_time))
return partial(self._uniform_temporal_subsample, total_frames=nums)
elif self._sample_type.lower() == SAMPLE_TYPE.TIME_STEP_SAMPLE:
duration = VideoDecoder.get_video_duration(stream)
start_time = self._start_time if self._start_time is not None else 0
end_time = self._end_time if self._end_time is not None and self._end_time <= duration else duration
return partial(self._time_step_sample, start_time=start_time, end_time=end_time)
else:
raise RuntimeError('Unkown sample type: %s' % self._sample_type)
def _no_smaple(self, frame_iter):
def _no_sample(self, frame_iter):
if self._end_time is None:
yield from frame_iter
else:
@ -53,6 +58,20 @@ class VideoDecoder(PyOperator):
frame.time < self._end_time
yield frame
def _time_step_sample(self, frame_iter, start_time, end_time):
time_step = self._args.get('time_step')
if time_step is None:
raise RuntimeError('time_step_sample sample lost args time_step')
time_index = start_time
for frame in frame_iter:
if time_index >= self._end_time:
break
if frame.time >= time_index:
time_index += time_step
yield frame
def _uniform_temporal_subsample(self, frame_iter, total_frames):
num_samples = self._args.get('num_samples')
if num_samples is None:
@ -101,9 +120,6 @@ class VideoDecoder(PyOperator):
def __call__(self, video_path: str) -> Generator:
with av.open(video_path) as container:
stream = container.streams.video[0]
width = stream.width
height = stream.height
channel = 3
image_format = 'RGB'
frame_gen = VideoDecoder._decdoe(stream, container, self._start_time)
@ -111,8 +127,8 @@ class VideoDecoder(PyOperator):
for frame in sample_function(frame_gen):
timestamp = int(frame.time * 1000)
ndarray = frame.to_ndarray(format='rgb24')
img = Image(ndarray.tobytes(), width, height, channel, image_format, None, key_frame=frame.key_frame)
yield VideoOutput(img, timestamp)
img = VideoFrame(ndarray, image_format, timestamp, frame.key_frame)
yield img
@ -123,10 +139,10 @@ class VideoDecoder(PyOperator):
# video_path3 = "/home/zhangchen/zhangchen_workspace/dataset/MSRVTT/msrvtt_data/MSRVTT_Videos/video9991.mp4"
# def d(video_path):
# d = VideoDecoder(10, 11, 'uniform_temporal_subsample', {'num_samples': 30})
# d = VideoDecoder(10, 17, 'time_step_sample', {'time_step': 1})
# fs = d(video_path)
# for f in fs:
# print(f.TIMESTAMP)
# print(f.mode, f.key_frame, f.timestamp)
# d(video_path1)
# print('#' * 100)

Loading…
Cancel
Save