diff --git a/README.md b/README.md index a3e36d2..885cb6c 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,18 @@ The model is pre-trained with a large scale of audio dataset [AudioSet](https:// As suggested, it is suitable to extract features at high level or warm up a larger model. ```python +import numpy as np from towhee import ops audio_encoder = ops.audio_embedding.vggish() -audio_embedding = audio_encoder("/path/to/audio") + +# Path or url as input +audio_embedding = audio_encoder("/audio/path/or/url/") + +# Audio data as input +audio_data = np.zeros((441344, 2)) +sample_rate = 44100 +audio_embedding = audio_encoder(audio_data, sample_rate) ``` ## Factory Constructor diff --git a/vggish.py b/vggish.py index 86d327b..dfc3de2 100644 --- a/vggish.py +++ b/vggish.py @@ -71,6 +71,11 @@ class Vggish(NNOperator): # if __name__ == '__main__': # encoder = Vggish() -# audio_path = '/path/to/audio' -# vec = encoder(audio_path) +# +# # audio_path = '/path/to/audio' +# # vec = encoder(audio_path) +# +# audio_data = numpy.zeros((441344, 2)) +# sample_rate = 44100 +# vec = encoder(audio_data, sample_rate) # print(vec)