diff --git a/audio_decoder.py b/audio_decoder.py index ce6be33..48fa49d 100644 --- a/audio_decoder.py +++ b/audio_decoder.py @@ -1,10 +1,15 @@ +import soundfile as sf +import numpy as np from typing import Generator, NamedTuple import av from towhee.operator.base import Operator +from torch_vggish import TorchVggish -AudioOutput = NamedTuple("Outputs", [("audio_frame", 'ndarray'), ('simple_rate', 'int'), ("TIMESTAMP", 'int')]) +# from torch_vggish import TorchVggish + +AudioOutput = NamedTuple("Outputs", [("audio_frame", 'ndarray'), ('sample_rate', 'int'), ("TIMESTAMP", 'int')]) class AudioDecoder(Operator): @@ -18,15 +23,7 @@ class AudioDecoder(Operator): in_container = av.open(audio_path) stream = in_container.streams.get(audio=0)[0] for frame in in_container.decode(stream): - timestamp = frame.time * 1000 + timestamp = int(frame.time * 1000) sample_rate = frame.sample_rate ndarray = frame.to_ndarray() yield AudioOutput(ndarray, sample_rate, timestamp) - - -if __name__ == '__main__': - d = AudioDecoder() - audio_path = '/Users/jiangjunjie/WorkSpace/audio_data/stereo.wav' - it = d(audio_path) - for data in it: - print(data._asdict())