|
@ -60,13 +60,15 @@ class Vggish(NNOperator): |
|
|
|
|
|
|
|
|
def preprocess(self, frames: List[AudioFrame]): |
|
|
def preprocess(self, frames: List[AudioFrame]): |
|
|
sr = frames[0].sample_rate |
|
|
sr = frames[0].sample_rate |
|
|
layout = frames[0].lay_out |
|
|
|
|
|
audio = numpy.hstack(frames) |
|
|
|
|
|
|
|
|
layout = frames[0].layout |
|
|
if layout == 'stereo': |
|
|
if layout == 'stereo': |
|
|
audio = audio.reshape(-1, 2) |
|
|
|
|
|
|
|
|
frames = [frame.reshape(-1, 2) for frame in frames] |
|
|
|
|
|
audio = numpy.vstack(frames) |
|
|
|
|
|
else: |
|
|
|
|
|
audio = numpy.hstack(frames) |
|
|
|
|
|
audio = audio.transpose() |
|
|
audio = self.int2float(audio) |
|
|
audio = self.int2float(audio) |
|
|
try: |
|
|
try: |
|
|
audio = audio.transpose() |
|
|
|
|
|
audio_tensors = vggish_input.waveform_to_examples(audio, sr, return_tensor=True) |
|
|
audio_tensors = vggish_input.waveform_to_examples(audio, sr, return_tensor=True) |
|
|
return audio_tensors |
|
|
return audio_tensors |
|
|
except Exception as e: |
|
|
except Exception as e: |
|
@ -81,11 +83,14 @@ class Vggish(NNOperator): |
|
|
|
|
|
|
|
|
The code is inspired by https://github.com/mgeier/python-audio/blob/master/audio-files/utility.py |
|
|
The code is inspired by https://github.com/mgeier/python-audio/blob/master/audio-files/utility.py |
|
|
""" |
|
|
""" |
|
|
assert wav.dtype.kind in 'iu' |
|
|
|
|
|
dtype = numpy.dtype(dtype) |
|
|
dtype = numpy.dtype(dtype) |
|
|
assert dtype.kind == 'f' |
|
|
assert dtype.kind == 'f' |
|
|
|
|
|
|
|
|
ii = numpy.iinfo(wav.dtype) |
|
|
|
|
|
abs_max = 2 ** (ii.bits - 1) |
|
|
|
|
|
offset = ii.min + abs_max |
|
|
|
|
|
return (wav.astype(dtype) - offset) / abs_max |
|
|
|
|
|
|
|
|
if wav.dtype.kind in 'iu': |
|
|
|
|
|
ii = numpy.iinfo(wav.dtype) |
|
|
|
|
|
abs_max = 2 ** (ii.bits - 1) |
|
|
|
|
|
offset = ii.min + abs_max |
|
|
|
|
|
return (wav.astype(dtype) - offset) / abs_max |
|
|
|
|
|
else: |
|
|
|
|
|
log.warning('Converting float dtype from %s to %s.', wav.dtype, dtype) |
|
|
|
|
|
return wav.astype(dtype) |
|
|