|
|
@ -11,15 +11,30 @@ class AudioDecoderFFmpeg(PyOperator): |
|
|
|
""" |
|
|
|
""" |
|
|
|
|
|
|
|
def __init__(self) -> None: |
|
|
|
def __init__(self, batch_size=-1) -> None: |
|
|
|
super().__init__() |
|
|
|
self._batch_size = batch_size |
|
|
|
|
|
|
|
def __call__(self, audio_path: str): |
|
|
|
frames = [] |
|
|
|
in_container = av.open(audio_path) |
|
|
|
stream = in_container.streams.get(audio=0)[0] |
|
|
|
for frame in in_container.decode(stream): |
|
|
|
timestamp = int(frame.time * 1000) |
|
|
|
sample_rate = frame.sample_rate |
|
|
|
layout = frame.layout.name |
|
|
|
ndarray = frame.to_ndarray() |
|
|
|
yield AudioFrame(ndarray, sample_rate, timestamp, layout) |
|
|
|
stream = in_container.streams.get(audio=0)[0] |
|
|
|
if self._batch_size <= 0: |
|
|
|
for frame in in_container.decode(stream): |
|
|
|
timestamp = int(frame.time * 1000) |
|
|
|
sample_rate = frame.sample_rate |
|
|
|
layout = frame.layout.name |
|
|
|
ndarray = frame.to_ndarray() |
|
|
|
yield AudioFrame(ndarray, sample_rate, timestamp, layout) |
|
|
|
else: |
|
|
|
for frame in in_container.decode(stream): |
|
|
|
timestamp = int(frame.time * 1000) |
|
|
|
sample_rate = frame.sample_rate |
|
|
|
layout = frame.layout.name |
|
|
|
ndarray = frame.to_ndarray() |
|
|
|
frames.append(AudioFrame(ndarray, sample_rate, timestamp, layout)) |
|
|
|
if len(frames) == self._batch_size: |
|
|
|
yield frames |
|
|
|
frames = [] |
|
|
|
if len(frames) != 0: |
|
|
|
yield frames |
|
|
|