from typing import Generator, NamedTuple import av from towhee.operator.base import Operator AudioOutput = NamedTuple("Outputs", [("audio_frame", 'ndarray'), ('simple_rate', 'int'), ("TIMESTAMP", 'int')]) class AudioDecoder(Operator): """ """ def __init__(self) -> None: super().__init__() def __call__(self, audio_path: str): in_container = av.open(audio_path) stream = in_container.streams.get(audio=0)[0] for frame in in_container.decode(stream): timestamp = frame.time * 1000 sample_rate = frame.sample_rate ndarray = frame.to_ndarray() yield AudioOutput(ndarray, sample_rate, timestamp) if __name__ == '__main__': d = AudioDecoder() audio_path = '/Users/jiangjunjie/WorkSpace/audio_data/stereo.wav' it = d(audio_path) for data in it: print(data._asdict())