diff --git a/video_decoder.py b/video_decoder.py index 2ee17de..08717d1 100644 --- a/video_decoder.py +++ b/video_decoder.py @@ -33,17 +33,15 @@ class VideoDecoder(PyOperator): self._sample_type = sample_type self._args = args if args is not None else {} - def get_sample(self, stream): + def get_sample(self, stream, duration): if self._sample_type is None: return self._no_sample elif self._sample_type.lower() == SAMPLE_TYPE.UNIFORM_TEMPORAL_SUBSAMPLE: - duration = VideoDecoder.get_video_duration(stream) end_time = self._end_time if self._end_time is not None and self._end_time <= duration else duration start_time = self._start_time if self._start_time is not None else 0 nums = int(stream.rate * (end_time - start_time)) return partial(self._uniform_temporal_subsample, total_frames=nums) elif self._sample_type.lower() == SAMPLE_TYPE.TIME_STEP_SAMPLE: - duration = VideoDecoder.get_video_duration(stream) start_time = self._start_time if self._start_time is not None else 0 end_time = self._end_time if self._end_time is not None and self._end_time <= duration else duration return partial(self._time_step_sample, start_time=start_time, end_time=end_time) @@ -108,22 +106,25 @@ class VideoDecoder(PyOperator): continue yield frame - def get_video_duration(video): - if video.duration is not None: - return float(video.duration * video.time_base) - elif video.metadata.get('DURATION') is not None: - time_str = video.metadata['DURATION'] - return reduce(lambda x, y: float(x) * 60 + float(y), time_str.split(':')) - else: - return None + # @staticmethod + # def get_video_duration(video): + # print(video) + # if video.duration is not None: + # return float(video.duration * video.time_base) + # elif video.metadata.get('DURATION') is not None: + # time_str = video.metadata['DURATION'] + # return reduce(lambda x, y: float(x) * 60 + float(y), time_str.split(':')) + # else: + # return None def __call__(self, video_path: str) -> Generator: with av.open(video_path) as container: stream = container.streams.video[0] + duration = float(container.duration) / 1000000 image_format = 'RGB' frame_gen = VideoDecoder._decdoe(stream, container, self._start_time) - sample_function = self.get_sample(stream) + sample_function = self.get_sample(stream, duration) for frame in sample_function(frame_gen): timestamp = int(frame.time * 1000) ndarray = frame.to_ndarray(format='rgb24') @@ -136,7 +137,8 @@ class VideoDecoder(PyOperator): # video_path = "/home/junjie.jiangjjj/workspace/video/[The Rock] [1996] [Trailer] [#2]-16-l-rO5B64.mkv" # video_path1 = "/home/junjie.jiangjjj/workspace/video/'Eagle Eye' Trailer (2008)-_wkqo_Rd3_Q.mp4" # video_path2 = "/home/junjie.jiangjjj/workspace/video/2001 - A Space Odyssey - Trailer [1968] HD-Z2UWOeBcsJI.webm" -# video_path3 = "/home/zhangchen/zhangchen_workspace/dataset/MSRVTT/msrvtt_data/MSRVTT_Videos/video9991.mp4" +# # video_path3 = "/home/zhangchen/zhangchen_workspace/dataset/MSRVTT/msrvtt_data/MSRVTT_Videos/video9991.mp4" +# video_path3 = "/home/junjie.jiangjjj/e2adc784b83446ae775f698b9d17c9fd392b2f75.flv" # def d(video_path): # d = VideoDecoder(10, 17, 'time_step_sample', {'time_step': 1}) @@ -144,7 +146,12 @@ class VideoDecoder(PyOperator): # for f in fs: # print(f.mode, f.key_frame, f.timestamp) -# d(video_path1) -# print('#' * 100) -# d(video_path2) +# d(video_path) +# # print('#' * 100) + +# # with av.open(video_path) as container: +# # print(container.duration) +# # stream = container.streams.video[0] +# # print(stream.time_base) +