logo
Browse Source

update

main
video-decode 2 years ago
parent
commit
c26bca59c5
  1. 44
      README.md
  2. 5
      __init__.py
  3. 1
      requirements.txt
  4. 134
      video_decoder.py
  5. 10
      video_decoder.yaml

44
README.md

@ -1,2 +1,44 @@
# ffmpeg
# Operator: video-decoder
Author: JunJie Jiang
## Overview
## Interface
```python
__init__(self, key_frame: bool)
```
Args:
- key_frame:
True: Only return key frame
False: Return all image frames of video
```python
__call__(self, video_path: str)
```
Args:
- video_path:
support local path and http/https url.
Returns:
- Image
## Requirements
av
## How it works
## Reference

5
__init__.py

@ -0,0 +1,5 @@
from .video_decoder import VideoDecoder
def ffmpeg(start_time=None, end_time=None, sample_type=None, args=None):
return VideoDecoder(start_time, end_time, sample_type, args)

1
requirements.txt

@ -0,0 +1 @@
av

134
video_decoder.py

@ -0,0 +1,134 @@
from typing import Generator, NamedTuple
from functools import partial, reduce
import math
import logging
import av
import numpy as np
from towhee.types.image import Image
from towhee.operator.base import PyOperator
VideoOutput = NamedTuple("Outputs", [("image", Image), ("TIMESTAMP", int)])
logger = logging.getLogger()
class SAMPLE_TYPE:
UNIFORM_TEMPORAL_SUBSAMPLE = 'uniform_temporal_subsample'
class VideoDecoder(PyOperator):
'''
VideoDecoder
Return images with RGB format.
'''
def __init__(self, start_time=None, end_time=None, sample_type=None, args=None) -> None:
super().__init__()
self._start_time = start_time
self._end_time = end_time
self._sample_type = sample_type
self._args = args if args is not None else {}
def get_sample(self, stream):
if self._sample_type is None:
return self._no_smaple
elif self._sample_type.lower() == SAMPLE_TYPE.UNIFORM_TEMPORAL_SUBSAMPLE:
duration = VideoDecoder.get_video_duration(stream)
end_time = self._end_time if self._end_time is not None and self._end_time <= duration else duration
start_time = self._start_time if self._start_time is not None else 0
nums = int(stream.rate * (end_time - start_time))
return partial(self._uniform_temporal_subsample, total_frames=nums)
else:
raise RuntimeError('Unkown sample type: %s' % self._sample_type)
def _no_smaple(self, frame_iter):
if self._end_time is None:
yield from frame_iter
else:
for frame in frame_iter:
frame.time < self._end_time
yield frame
def _uniform_temporal_subsample(self, frame_iter, total_frames):
num_samples = self._args.get('num_samples')
if num_samples is None:
raise RuntimeError('uniform_temporal_subsample lost args num_samples')
indexs = np.linspace(0, total_frames - 1, num_samples).astype('int')
cur_index = 0
count = 0
for frame in frame_iter:
if cur_index >= len(indexs):
return
while cur_index < len(indexs) and indexs[cur_index] <= count:
cur_index += 1
yield frame
count += 1
@staticmethod
def _decdoe(video, container, start_time):
if start_time is not None:
start_offset = int(math.floor(start_time * (1 / video.time_base)))
else:
start_offset = 0
seek_offset = start_offset
seek_offset = max(seek_offset - 1, 0)
try:
container.seek(seek_offset, any_frame=False, backward=True, stream=video)
except av.AVError as e:
logger.error('Seek to start_time: %s sec failed, the offset is %s, errors: %s' % (start_time, seek_offset, str(e)))
raise RuntimeError from e
for frame in container.decode(video):
if frame.time < start_time:
continue
yield frame
def get_video_duration(video):
if video.duration is not None:
return float(video.duration * video.time_base)
elif video.metadata.get('DURATION') is not None:
time_str = video.metadata['DURATION']
return reduce(lambda x, y: float(x) * 60 + float(y), time_str.split(':'))
else:
return None
def __call__(self, video_path: str) -> Generator:
with av.open(video_path) as container:
stream = container.streams.video[0]
width = stream.width
height = stream.height
channel = 3
image_format = 'RGB'
frame_gen = VideoDecoder._decdoe(stream, container, self._start_time)
sample_function = self.get_sample(stream)
for frame in sample_function(frame_gen):
timestamp = int(frame.time * 1000)
ndarray = frame.to_ndarray(format='rgb24')
img = Image(ndarray.tobytes(), width, height, channel, image_format, None, key_frame=frame.key_frame)
yield VideoOutput(img, timestamp)
# if __name__ == '__main__':
# video_path = "/home/junjie.jiangjjj/workspace/video/[The Rock] [1996] [Trailer] [#2]-16-l-rO5B64.mkv"
# video_path1 = "/home/junjie.jiangjjj/workspace/video/'Eagle Eye' Trailer (2008)-_wkqo_Rd3_Q.mp4"
# video_path2 = "/home/junjie.jiangjjj/workspace/video/2001 - A Space Odyssey - Trailer [1968] HD-Z2UWOeBcsJI.webm"
# video_path3 = "/home/zhangchen/zhangchen_workspace/dataset/MSRVTT/msrvtt_data/MSRVTT_Videos/video9991.mp4"
# def d(video_path):
# d = VideoDecoder(10, 11, 'uniform_temporal_subsample', {'num_samples': 30})
# fs = d(video_path)
# for f in fs:
# print(f.TIMESTAMP)
# d(video_path1)
# print('#' * 100)
# d(video_path2)

10
video_decoder.yaml

@ -0,0 +1,10 @@
name: 'video-decoder'
labels:
operator: 'towhee/video-decoder'
init:
key_frame: bool
call:
input:
video_path: str
output:
image: Image
Loading…
Cancel
Save