logo
Browse Source

update

main
junjiejiangjjj 3 years ago
parent
commit
c6a176a8d5
  1. 17
      audio_decoder.py

17
audio_decoder.py

@ -1,10 +1,15 @@
import soundfile as sf
import numpy as np
from typing import Generator, NamedTuple from typing import Generator, NamedTuple
import av import av
from towhee.operator.base import Operator from towhee.operator.base import Operator
from torch_vggish import TorchVggish
AudioOutput = NamedTuple("Outputs", [("audio_frame", 'ndarray'), ('simple_rate', 'int'), ("TIMESTAMP", 'int')])
# from torch_vggish import TorchVggish
AudioOutput = NamedTuple("Outputs", [("audio_frame", 'ndarray'), ('sample_rate', 'int'), ("TIMESTAMP", 'int')])
class AudioDecoder(Operator): class AudioDecoder(Operator):
@ -18,15 +23,7 @@ class AudioDecoder(Operator):
in_container = av.open(audio_path) in_container = av.open(audio_path)
stream = in_container.streams.get(audio=0)[0] stream = in_container.streams.get(audio=0)[0]
for frame in in_container.decode(stream): for frame in in_container.decode(stream):
timestamp = frame.time * 1000
timestamp = int(frame.time * 1000)
sample_rate = frame.sample_rate sample_rate = frame.sample_rate
ndarray = frame.to_ndarray() ndarray = frame.to_ndarray()
yield AudioOutput(ndarray, sample_rate, timestamp) yield AudioOutput(ndarray, sample_rate, timestamp)
if __name__ == '__main__':
d = AudioDecoder()
audio_path = '/Users/jiangjunjie/WorkSpace/audio_data/stereo.wav'
it = d(audio_path)
for data in it:
print(data._asdict())

Loading…
Cancel
Save