towhee
/
audio-decoder
copied
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Readme
Files and versions
32 lines
890 B
32 lines
890 B
from typing import Generator, NamedTuple
|
|
|
|
import av
|
|
|
|
from towhee.operator.base import Operator
|
|
|
|
AudioOutput = NamedTuple("Outputs", [("audio_frame", 'ndarray'), ('simple_rate', 'int'), ("TIMESTAMP", 'int')])
|
|
|
|
|
|
class AudioDecoder(Operator):
|
|
"""
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
|
|
def __call__(self, audio_path: str):
|
|
in_container = av.open(audio_path)
|
|
stream = in_container.streams.get(audio=0)[0]
|
|
for frame in in_container.decode(stream):
|
|
timestamp = frame.time * 1000
|
|
sample_rate = frame.sample_rate
|
|
ndarray = frame.to_ndarray()
|
|
yield AudioOutput(ndarray, sample_rate, timestamp)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
d = AudioDecoder()
|
|
audio_path = '/Users/jiangjunjie/WorkSpace/audio_data/stereo.wav'
|
|
it = d(audio_path)
|
|
for data in it:
|
|
print(data._asdict())
|