VPF
copied
5 changed files with 201 additions and 0 deletions
@ -0,0 +1,5 @@ |
|||
from .video_decoder import VideoDecoder |
|||
|
|||
|
|||
def VPF(gpu_id=0, start_time=None, end_time=None, sample_type=None, args=None): |
|||
return VideoDecoder(gpu_id, start_time, end_time, sample_type, args) |
@ -0,0 +1,50 @@ |
|||
import math |
|||
import logging |
|||
import av |
|||
|
|||
from towhee.types.video_frame import VideoFrame |
|||
|
|||
|
|||
logger = logging.getLogger() |
|||
|
|||
|
|||
class PyAVDecode: |
|||
def __init__(self, video_path, start_time=None, time_step=None) -> None: |
|||
self._container = av.open(video_path) |
|||
self._stream = self._container.streams.video[0] |
|||
self._start_time = start_time if start_time is not None else 0 |
|||
self._time_step = time_step |
|||
|
|||
def close(self): |
|||
self._container.close() |
|||
|
|||
def time_step_decode(self): |
|||
ts = self._start_time |
|||
is_end = False |
|||
while not is_end: |
|||
is_end = True |
|||
offset = int(math.floor(ts / self._stream.time_base)) |
|||
self._container.seek(offset, stream=self._stream) |
|||
for f in self._container.decode(self._stream): |
|||
if f.time < ts: |
|||
continue |
|||
yield self.av_frame_to_video_frame(f) |
|||
is_end = False |
|||
break |
|||
ts += self._time_step |
|||
|
|||
def av_frame_to_video_frame(self, frame): |
|||
timestamp = int(round(frame.time * 1000)) |
|||
ndarray = frame.to_ndarray(format='rgb24') |
|||
img = VideoFrame(ndarray, 'RGB', timestamp, frame.key_frame) |
|||
return img |
|||
|
|||
def decode(self): |
|||
if self._start_time > 0: |
|||
offset = int(math.floor(self._start_time / self._stream.time_base)) |
|||
self._container.seek(offset, any_frame=False, backward=True, stream=self._stream) |
|||
|
|||
for frame in self._container.decode(self._stream): |
|||
if frame.time < self._start_time: |
|||
continue |
|||
yield self.av_frame_to_video_frame(frame) |
@ -0,0 +1,64 @@ |
|||
import math |
|||
import PyNvCodec as nvc |
|||
import numpy as np |
|||
from towhee.types import VideoFrame |
|||
|
|||
|
|||
class VPFDecode: |
|||
def __init__(self, video_path: str, gpu_id: int, start_time: int = None, time_step=None): |
|||
self._gpu_id = gpu_id |
|||
self._nv_dec = nvc.PyNvDecoder(video_path, gpu_id) |
|||
self._start_time = start_time |
|||
self._time_step = time_step |
|||
self._target_w, self._target_h, self._time_base = self._nv_dec.Width(), self._nv_dec.Height(), self._nv_dec.Timebase() |
|||
self._avg_rate = self._nv_dec.AvgFramerate() |
|||
self._to_rgb = nvc.PySurfaceConverter( |
|||
self._target_w, self._target_h, nvc.PixelFormat.NV12, nvc.PixelFormat.RGB, self._gpu_id |
|||
) |
|||
self._cc = nvc.ColorspaceConversionContext(nvc.ColorSpace.BT_709, nvc.ColorRange.JPEG) |
|||
self._pdata = nvc.PacketData() |
|||
|
|||
def timestamp(self, pts) -> int: |
|||
return int(round(pts * self._time_base * 1000)) |
|||
|
|||
def surface_to_videoframe(self, nv12_surface): |
|||
if nv12_surface.Empty(): |
|||
return None |
|||
|
|||
nv_dwn = nvc.PySurfaceDownloader(self._target_w, self._target_h, nvc.PixelFormat.RGB, self._gpu_id) |
|||
rgb24_small = self._to_rgb.Execute(nv12_surface, self._cc) |
|||
if rgb24_small.Empty(): |
|||
raise RuntimeError('Convert to rgb failed') |
|||
|
|||
rawFrameRGB = np.ndarray(shape=(self._target_h, self._target_w, 3), dtype=np.uint8) |
|||
if not nv_dwn.DownloadSingleSurface(rgb24_small, rawFrameRGB): |
|||
raise RuntimeError('Download image from gpu failed') |
|||
return VideoFrame(rawFrameRGB, 'RGB', self.timestamp(self._pdata.pts), self._pdata.key) |
|||
|
|||
def time_step_decode(self): |
|||
ts = self._start_time if self._start_time is not None and self._start_time > 0 else 0 |
|||
while True: |
|||
seek_ctx = nvc.SeekContext( |
|||
ts, nvc.SeekMode.PREV_KEY_FRAME, nvc.SeekCriteria.BY_TIMESTAMP |
|||
) |
|||
nv12_surface = self._nv_dec.DecodeSingleSurface(seek_ctx, self._pdata) |
|||
frame = self.surface_to_videoframe(nv12_surface) |
|||
if frame is None: |
|||
break |
|||
yield frame |
|||
ts += self._time_step |
|||
|
|||
def decode(self): |
|||
if self._start_time is not None and self._start_time > 0: |
|||
seek_ctx = nvc.SeekContext( |
|||
self._start_time, nvc.SeekMode.PREV_KEY_FRAME, nvc.SeekCriteria.BY_TIMESTAMP |
|||
) |
|||
nv12_surface = self._nv_dec.DecodeSingleSurface(seek_ctx, self._pdata) |
|||
yield self.surface_to_videoframe(nv12_surface) |
|||
|
|||
while True: |
|||
nv12_surface = self._nv_dec.DecodeSingleSurface(self._pdata) |
|||
frame = self.surface_to_videoframe(nv12_surface) |
|||
if frame is None: |
|||
break |
|||
yield frame |
@ -0,0 +1 @@ |
|||
av |
@ -0,0 +1,81 @@ |
|||
import logging |
|||
|
|||
from towhee.operator.base import PyOperator |
|||
|
|||
from cpu_decode import PyAVDecode |
|||
|
|||
logger = logging.getLogger() |
|||
|
|||
try: |
|||
from gpu_decode import VPFDecode |
|||
except Exception: |
|||
logger.error('Import GPUDecoder failed, use CPU decode') |
|||
VPFDecode = PyAVDecode |
|||
|
|||
|
|||
logger = logging.getLogger() |
|||
|
|||
|
|||
class SAMPLE_TYPE: |
|||
UNIFORM_TEMPORAL_SUBSAMPLE = 'uniform_temporal_subsample' |
|||
TIME_STEP_SAMPLE = 'time_step_sample' |
|||
|
|||
|
|||
class VideoDecoder(PyOperator): |
|||
''' |
|||
VideoDecoder |
|||
Return images with RGB format. |
|||
''' |
|||
|
|||
def __init__(self, gpu_id=0, start_time=None, end_time=None, sample_type=None, args=None) -> None: |
|||
super().__init__() |
|||
self._gpu_id = gpu_id |
|||
self._start_time = start_time if start_time is not None else 0 |
|||
self._end_time = end_time * 1000 if end_time is not None else None |
|||
self._sample_type = sample_type.lower() if sample_type else None |
|||
self._args = args if args is not None else {} |
|||
|
|||
def _gpu_decode(self, video_path): |
|||
yield from VPFDecode(video_path, self._gpu_id, self._start_time).decode() |
|||
|
|||
def _cpu_decode(self, video_path): |
|||
yield from PyAVDecode(video_path, self._start_time).decode() |
|||
|
|||
def _gpu_time_step_decode(self, video_path, time_step): |
|||
yield from VPFDecode(video_path, self._gpu_id, self._start_time, time_step).time_step_decode() |
|||
|
|||
def _cpu_time_step_decode(self, video_path, time_step): |
|||
yield from PyAVDecode(video_path, self._start_time, time_step).time_step_decode() |
|||
|
|||
def decode(self, video_path: str): |
|||
try: |
|||
yield from self._gpu_decode(video_path) |
|||
except RuntimeError: |
|||
logger.warn('GPU decode failed, only supports [h264,h265,vp9] format, will use CPU') |
|||
yield from self._cpu_decode(video_path) |
|||
|
|||
def time_step_decode(self, video_path, time_step): |
|||
try: |
|||
yield from self._gpu_time_step_decode(video_path, time_step) |
|||
except RuntimeError: |
|||
logger.warn('GPU decode failed, only supports [h264,h265,vp9] format, will use CPU') |
|||
yield from self._cpu_time_step_decode(video_path, time_step) |
|||
|
|||
def _filter(self, frames): |
|||
for f in frames: |
|||
if self._end_time and f.timestamp > self._end_time: |
|||
break |
|||
yield f |
|||
|
|||
def __call__(self, video_path: str): |
|||
if self._sample_type is None: |
|||
yield from self._filter(self.decode(video_path)) |
|||
elif self._sample_type == SAMPLE_TYPE.TIME_STEP_SAMPLE: |
|||
time_step = self._args.get('time_step') |
|||
if time_step is None: |
|||
raise RuntimeError('time_step_sample sample lost args time_step') |
|||
yield from self._filter(self.time_step_decode(video_path, time_step)) |
|||
elif self._sample_type == SAMPLE_TYPE.TIME_STEP_SAMPLE: |
|||
pass |
|||
else: |
|||
raise RuntimeError('Unkown sample type, only supports: [%s|%s]' % (SAMPLE_TYPE.TIME_STEP_SAMPLE, SAMPLE_TYPE.UNIFORM_TEMPORAL_SUBSAMPLE)) |
Loading…
Reference in new issue