Allow string input

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
3 years ago · 324e08ebf0
1 changed files with 16 additions and 13 deletions
--- a/nn_fingerprint.py
+++ b/nn_fingerprint.py
@ -17,7 +17,7 @@ import warnings

 import os
 from pathlib import Path
-from typing import List
+from typing import List, Union

 import torch
 import torchaudio
@ -92,7 +92,7 @@ class NNFingerprint(NNOperator):
            self.model.eval()
        log.info('Model is loaded.')

-    def __call__(self, data: List[AudioFrame]) -> numpy.ndarray:
+    def __call__(self, data: Union[str, List[AudioFrame]]) -> numpy.ndarray:
        audio_tensors = self.preprocess(data)
        if audio_tensors.device != self.device:
            audio_tensors = audio_tensors.to(self.device)
@ -107,19 +107,22 @@ class NNFingerprint(NNOperator):
            outs = features.detach().cpu().numpy()
        return outs

-    def preprocess(self, frames: List[AudioFrame]):
-        sr = frames[0].sample_rate
-        layout = frames[0].layout
-        if layout == 'stereo':
-            frames = [frame.reshape(-1, 2) for frame in frames]
-            audio = numpy.vstack(frames).transpose()
+    def preprocess(self, frames: Union[str, List[AudioFrame]]):
+        if isinstance(frames, str):
+            audio, sr = torchaudio.load(frames)
        else:
-            audio = numpy.hstack(frames)
-            if len(audio.shape) == 1:
-                audio = audio[None, :]
+            sr = frames[0].sample_rate
+            layout = frames[0].layout
+            if layout == 'stereo':
+                frames = [frame.reshape(-1, 2) for frame in frames]
+                audio = numpy.vstack(frames).transpose()
+            else:
+                audio = numpy.hstack(frames)
+                if len(audio.shape) == 1:
+                    audio = audio[None, :]
+            audio = self.int2float(audio)
+            audio = torch.from_numpy(audio)
        assert len(audio.shape) == 2
-        audio = self.int2float(audio)
-        audio = torch.from_numpy(audio)

        if sr != self.params['sample_rate']:
            resampler = torchaudio.transforms.Resample(sr, self.params['sample_rate'], dtype=audio.dtype)