Update

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
3 years ago · b251960776
3 changed files with 14 additions and 27 deletions
--- a/README.md
+++ b/README.md
@ -15,33 +15,19 @@ The pre-trained model used here is from the paper **PANNs: Large-Scale Pretraine

 Predict labels and generate embeddings given the audio path "test.wav".

- *Write the pipeline in simplified style*:
+*Write a pipeline with explicit inputs/outputs name specifications:*

 ```python
-import towhee
-
-(
-    towhee.glob('test.wav')
-          .audio_decode.ffmpeg()
-          .runas_op(func=lambda x:[y[0] for y in x])
-          .audio_classification.panns()
-          .show()
-)
-```
-
-*Write a same pipeline with explicit inputs/outputs name specifications:*
+from towhee.dc2 import pipe, ops, DataCollection

-```python
-import towhee
-
-(
-    towhee.glob['path']('test.wav')
-          .audio_decode.ffmpeg['path', 'frames']()
-          .runas_op['frames', 'frames'](func=lambda x:[y[0] for y in x])
-          .audio_classification.panns['frames', ('labels', 'scores', 'vec')]()
-          .select['path', 'labels', 'scores', 'vec']()
-          .show()
+p = (
+    pipe.input('path')
+        .map('path', 'frame', ops.audio_decode.ffmpeg())
+        .map('frame', ('labels', 'scores', 'vec'), ops.audio_classification.panns())
+        .output('path', 'labels', 'scores', 'vec')
 )
+
+DataCollection(p('./test.wav')).show()
 ```
 <img src="./result.png" width="800px"/>

--- a/panns.py
+++ b/panns.py
@ -17,10 +17,10 @@ import warnings

 import os
 import numpy
-import resampy
 from typing import List

 import torch
+import torchaudio

 from panns_inference import AudioTagging, labels

@ -67,9 +67,10 @@ class Panns(NNOperator):

        audio = self.int2float(audio).astype('float32')
        if sr != self.sample_rate:
-            audio = resampy.resample(audio, sr, self.sample_rate)
+            resampler = torchaudio.transforms.Resample(sr, self.sample_rate, dtype=audio.dtype)
+            audio = resampler(audio)

-        audio = torch.from_numpy(audio)[None, :]
+        audio = audio[None, :]
        clipwise_output, embedding = self.tagger.inference(audio)

        sorted_indexes = numpy.argsort(clipwise_output[0])[::-1]
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,4 @@
 panns_inference
-resampy
 torch
+torchaudio
 towhee>=0.7.0