|
|
@ -1,10 +1,15 @@ |
|
|
|
import soundfile as sf |
|
|
|
import numpy as np |
|
|
|
from typing import Generator, NamedTuple |
|
|
|
|
|
|
|
import av |
|
|
|
|
|
|
|
from towhee.operator.base import Operator |
|
|
|
from torch_vggish import TorchVggish |
|
|
|
|
|
|
|
AudioOutput = NamedTuple("Outputs", [("audio_frame", 'ndarray'), ('simple_rate', 'int'), ("TIMESTAMP", 'int')]) |
|
|
|
# from torch_vggish import TorchVggish |
|
|
|
|
|
|
|
AudioOutput = NamedTuple("Outputs", [("audio_frame", 'ndarray'), ('sample_rate', 'int'), ("TIMESTAMP", 'int')]) |
|
|
|
|
|
|
|
|
|
|
|
class AudioDecoder(Operator): |
|
|
@ -18,15 +23,7 @@ class AudioDecoder(Operator): |
|
|
|
in_container = av.open(audio_path) |
|
|
|
stream = in_container.streams.get(audio=0)[0] |
|
|
|
for frame in in_container.decode(stream): |
|
|
|
timestamp = frame.time * 1000 |
|
|
|
timestamp = int(frame.time * 1000) |
|
|
|
sample_rate = frame.sample_rate |
|
|
|
ndarray = frame.to_ndarray() |
|
|
|
yield AudioOutput(ndarray, sample_rate, timestamp) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
d = AudioDecoder() |
|
|
|
audio_path = '/Users/jiangjunjie/WorkSpace/audio_data/stereo.wav' |
|
|
|
it = d(audio_path) |
|
|
|
for data in it: |
|
|
|
print(data._asdict()) |
|
|
|