|
@ -8,16 +8,16 @@ import logging |
|
|
import av |
|
|
import av |
|
|
import numpy as np |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
from towhee.types.image import Image |
|
|
|
|
|
|
|
|
from towhee.types.video_frame import VideoFrame |
|
|
from towhee.operator.base import PyOperator |
|
|
from towhee.operator.base import PyOperator |
|
|
|
|
|
|
|
|
VideoOutput = NamedTuple("Outputs", [("image", Image), ("TIMESTAMP", int)]) |
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger() |
|
|
logger = logging.getLogger() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SAMPLE_TYPE: |
|
|
class SAMPLE_TYPE: |
|
|
UNIFORM_TEMPORAL_SUBSAMPLE = 'uniform_temporal_subsample' |
|
|
UNIFORM_TEMPORAL_SUBSAMPLE = 'uniform_temporal_subsample' |
|
|
|
|
|
TIME_STEP_SAMPLE = 'time_step_sample' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class VideoDecoder(PyOperator): |
|
|
class VideoDecoder(PyOperator): |
|
@ -35,17 +35,22 @@ class VideoDecoder(PyOperator): |
|
|
|
|
|
|
|
|
def get_sample(self, stream): |
|
|
def get_sample(self, stream): |
|
|
if self._sample_type is None: |
|
|
if self._sample_type is None: |
|
|
return self._no_smaple |
|
|
|
|
|
|
|
|
return self._no_sample |
|
|
elif self._sample_type.lower() == SAMPLE_TYPE.UNIFORM_TEMPORAL_SUBSAMPLE: |
|
|
elif self._sample_type.lower() == SAMPLE_TYPE.UNIFORM_TEMPORAL_SUBSAMPLE: |
|
|
duration = VideoDecoder.get_video_duration(stream) |
|
|
duration = VideoDecoder.get_video_duration(stream) |
|
|
end_time = self._end_time if self._end_time is not None and self._end_time <= duration else duration |
|
|
end_time = self._end_time if self._end_time is not None and self._end_time <= duration else duration |
|
|
start_time = self._start_time if self._start_time is not None else 0 |
|
|
start_time = self._start_time if self._start_time is not None else 0 |
|
|
nums = int(stream.rate * (end_time - start_time)) |
|
|
nums = int(stream.rate * (end_time - start_time)) |
|
|
return partial(self._uniform_temporal_subsample, total_frames=nums) |
|
|
return partial(self._uniform_temporal_subsample, total_frames=nums) |
|
|
|
|
|
elif self._sample_type.lower() == SAMPLE_TYPE.TIME_STEP_SAMPLE: |
|
|
|
|
|
duration = VideoDecoder.get_video_duration(stream) |
|
|
|
|
|
start_time = self._start_time if self._start_time is not None else 0 |
|
|
|
|
|
end_time = self._end_time if self._end_time is not None and self._end_time <= duration else duration |
|
|
|
|
|
return partial(self._time_step_sample, start_time=start_time, end_time=end_time) |
|
|
else: |
|
|
else: |
|
|
raise RuntimeError('Unkown sample type: %s' % self._sample_type) |
|
|
raise RuntimeError('Unkown sample type: %s' % self._sample_type) |
|
|
|
|
|
|
|
|
def _no_smaple(self, frame_iter): |
|
|
|
|
|
|
|
|
def _no_sample(self, frame_iter): |
|
|
if self._end_time is None: |
|
|
if self._end_time is None: |
|
|
yield from frame_iter |
|
|
yield from frame_iter |
|
|
else: |
|
|
else: |
|
@ -53,6 +58,20 @@ class VideoDecoder(PyOperator): |
|
|
frame.time < self._end_time |
|
|
frame.time < self._end_time |
|
|
yield frame |
|
|
yield frame |
|
|
|
|
|
|
|
|
|
|
|
def _time_step_sample(self, frame_iter, start_time, end_time): |
|
|
|
|
|
time_step = self._args.get('time_step') |
|
|
|
|
|
if time_step is None: |
|
|
|
|
|
raise RuntimeError('time_step_sample sample lost args time_step') |
|
|
|
|
|
|
|
|
|
|
|
time_index = start_time |
|
|
|
|
|
for frame in frame_iter: |
|
|
|
|
|
if time_index >= self._end_time: |
|
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
if frame.time >= time_index: |
|
|
|
|
|
time_index += time_step |
|
|
|
|
|
yield frame |
|
|
|
|
|
|
|
|
def _uniform_temporal_subsample(self, frame_iter, total_frames): |
|
|
def _uniform_temporal_subsample(self, frame_iter, total_frames): |
|
|
num_samples = self._args.get('num_samples') |
|
|
num_samples = self._args.get('num_samples') |
|
|
if num_samples is None: |
|
|
if num_samples is None: |
|
@ -101,9 +120,6 @@ class VideoDecoder(PyOperator): |
|
|
def __call__(self, video_path: str) -> Generator: |
|
|
def __call__(self, video_path: str) -> Generator: |
|
|
with av.open(video_path) as container: |
|
|
with av.open(video_path) as container: |
|
|
stream = container.streams.video[0] |
|
|
stream = container.streams.video[0] |
|
|
width = stream.width |
|
|
|
|
|
height = stream.height |
|
|
|
|
|
channel = 3 |
|
|
|
|
|
image_format = 'RGB' |
|
|
image_format = 'RGB' |
|
|
|
|
|
|
|
|
frame_gen = VideoDecoder._decdoe(stream, container, self._start_time) |
|
|
frame_gen = VideoDecoder._decdoe(stream, container, self._start_time) |
|
@ -111,8 +127,8 @@ class VideoDecoder(PyOperator): |
|
|
for frame in sample_function(frame_gen): |
|
|
for frame in sample_function(frame_gen): |
|
|
timestamp = int(frame.time * 1000) |
|
|
timestamp = int(frame.time * 1000) |
|
|
ndarray = frame.to_ndarray(format='rgb24') |
|
|
ndarray = frame.to_ndarray(format='rgb24') |
|
|
img = Image(ndarray.tobytes(), width, height, channel, image_format, None, key_frame=frame.key_frame) |
|
|
|
|
|
yield VideoOutput(img, timestamp) |
|
|
|
|
|
|
|
|
img = VideoFrame(ndarray, image_format, timestamp, frame.key_frame) |
|
|
|
|
|
yield img |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -123,10 +139,10 @@ class VideoDecoder(PyOperator): |
|
|
# video_path3 = "/home/zhangchen/zhangchen_workspace/dataset/MSRVTT/msrvtt_data/MSRVTT_Videos/video9991.mp4" |
|
|
# video_path3 = "/home/zhangchen/zhangchen_workspace/dataset/MSRVTT/msrvtt_data/MSRVTT_Videos/video9991.mp4" |
|
|
|
|
|
|
|
|
# def d(video_path): |
|
|
# def d(video_path): |
|
|
# d = VideoDecoder(10, 11, 'uniform_temporal_subsample', {'num_samples': 30}) |
|
|
|
|
|
|
|
|
# d = VideoDecoder(10, 17, 'time_step_sample', {'time_step': 1}) |
|
|
# fs = d(video_path) |
|
|
# fs = d(video_path) |
|
|
# for f in fs: |
|
|
# for f in fs: |
|
|
# print(f.TIMESTAMP) |
|
|
|
|
|
|
|
|
# print(f.mode, f.key_frame, f.timestamp) |
|
|
|
|
|
|
|
|
# d(video_path1) |
|
|
# d(video_path1) |
|
|
# print('#' * 100) |
|
|
# print('#' * 100) |
|
|