Browse Source
Support all int types of audio data
Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
main
1 changed files with
2 additions and
4 deletions
-
vggish.py
|
@ -61,10 +61,8 @@ class Vggish(NNOperator): |
|
|
def preprocess(self, frames: List[AudioFrame]): |
|
|
def preprocess(self, frames: List[AudioFrame]): |
|
|
sr = frames[0].sample_rate |
|
|
sr = frames[0].sample_rate |
|
|
audio = numpy.hstack(frames) |
|
|
audio = numpy.hstack(frames) |
|
|
if audio.dtype == numpy.int32: |
|
|
|
|
|
audio = audio / 2147483648.0 |
|
|
|
|
|
elif audio.dtype == numpy.int16: |
|
|
|
|
|
audio = audio / 32768.0 |
|
|
|
|
|
|
|
|
ii = numpy.iinfo(audio.dtype) |
|
|
|
|
|
audio = 2 * audio / (ii.max - ii.min + 1) |
|
|
try: |
|
|
try: |
|
|
audio = audio.transpose() |
|
|
audio = audio.transpose() |
|
|
audio_tensors = vggish_input.waveform_to_examples(audio, sr, return_tensor=True) |
|
|
audio_tensors = vggish_input.waveform_to_examples(audio, sr, return_tensor=True) |
|
|