From 531fcae57ba918f8cd3aa0d429df87eded64202b Mon Sep 17 00:00:00 2001 From: Jael Gu Date: Tue, 29 Mar 2022 14:24:55 +0800 Subject: [PATCH] Update Signed-off-by: Jael Gu --- README.md | 2 +- vggish.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 885cb6c..59cf692 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ audio_encoder = ops.audio_embedding.vggish() audio_embedding = audio_encoder("/audio/path/or/url/") # Audio data as input -audio_data = np.zeros((441344, 2)) +audio_data = np.zeros((2, 441344)) sample_rate = 44100 audio_embedding = audio_encoder(audio_data, sample_rate) ``` diff --git a/vggish.py b/vggish.py index 3983341..69c3e3f 100644 --- a/vggish.py +++ b/vggish.py @@ -62,6 +62,7 @@ class Vggish(NNOperator): audio_tensors = vggish_input.wavfile_to_examples(audio) elif isinstance(audio, numpy.ndarray): try: + audio = audio.transpose() audio_tensors = vggish_input.waveform_to_examples(audio, sr, return_tensor=True) except Exception as e: log.error("Fail to load audio data.") @@ -77,7 +78,7 @@ class Vggish(NNOperator): # # audio_path = '/path/to/audio' # # vec = encoder(audio_path) # -# audio_data = numpy.zeros((441344, 2)) +# audio_data = numpy.zeros((2, 441344)) # sample_rate = 44100 # vec = encoder(audio_data, sample_rate) # print(vec)