Update test

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
3 years ago · 9a81b6d19d
4 changed files with 130 additions and 35 deletions
--- a/README.md
+++ b/README.md
@ -56,7 +56,7 @@ import towhee

 Create the operator via the following factory method

-***audio_embedding.nnfp(params=None, checkpoint_path=None, framework='pytorch')***
+***audio_embedding.nnfp(params=None, model_path=None, framework='pytorch')***

 **Parameters:**

@ -64,9 +64,10 @@ Create the operator via the following factory method

 A dictionary of model parameters. If None, it will use default parameters to create model.

-*checkpoint_path: str*
+*model_path: str*

-The path to model weights. If None, it will load default model weights.
+The path to model. If None, it will load default model weights.
+When the path ends with '.onnx', the operator will use onnx inference.

 *framework: str*

@ -79,6 +80,8 @@ Default value is "pytorch" since the model is implemented in Pytorch.

 An audio embedding operator generates vectors in numpy.ndarray given towhee audio frames.

+***\_\_call\_\_(data)***
+
 **Parameters:**

 *data: List[towhee.types.audio_frame.AudioFrame]*
@ -93,3 +96,18 @@ The audio input should be at least 1s.

 Audio embeddings in shape (num_clips, 128).
 Each embedding stands for features of an audio clip with length of 1s.
+
+
+***save_model(format='pytorch', path='default')***
+
+**Parameters:**
+
+*format: str*
+
+Format used to save model, defaults to 'pytorch'.
+Accepted formats: 'pytorch', 'torchscript, 'onnx', 'tensorrt' (in progress)
+
+*path: str*
+
+Path to save model, defaults to 'default'.
+The default path is under 'saved' in the same directory of operator cache.
--- a/nn_fingerprint.py
+++ b/nn_fingerprint.py
@ -22,6 +22,7 @@ from typing import List
 import torch
 import numpy
 import resampy
+import onnxruntime

 from towhee.operator.base import NNOperator
 from towhee import register
@ -43,8 +44,9 @@ class NNFingerprint(NNOperator):

    def __init__(self,
                 params: dict = None,
-                 checkpoint_path: str = None,
-                 framework: str = 'pytorch'):
+                 model_path: str = None,
+                 framework: str = 'pytorch',
+                 ):
        super().__init__(framework=framework)
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        if params is None:
@ -52,29 +54,31 @@ class NNFingerprint(NNOperator):
        else:
            self.params = params

+        log.info('Loading model...')
+        if model_path is None:
+            path = str(Path(__file__).parent)
+            model_path = os.path.join(path, 'saved_model', 'pfann_fma_m.pt')
+        if model_path.endswith('.onnx'):
+            log.warning('Using onnx.')
+            self.model = onnxruntime.InferenceSession(model_path)
+        else:
+            state_dict = torch.load(model_path, map_location=self.device)
+            if isinstance(state_dict, torch.nn.Module):
+                self.model = state_dict
+            else:
                dim = self.params['dim']
                h = self.params['h']
                u = self.params['u']
                f_bin = self.params['n_mels']
                n_seg = int(self.params['segment_size'] * self.params['sample_rate'])
                t = (n_seg + self.params['hop_length'] - 1) // self.params['hop_length']
-
-        log.info('Creating model...')
+                log.info('Creating model with parameters...')
                self.model = NNFp(
                    dim=dim, h=h, u=u,
                    in_f=f_bin, in_t=t,
                    fuller=self.params['fuller'],
                    activation=self.params['activation']
                ).to(self.device)
-
-        log.info('Loading weights...')
-        if checkpoint_path is None:
-            path = str(Path(__file__).parent)
-            checkpoint_path = os.path.join(path, 'saved_model', 'pfann_fma_m.pt')
-        state_dict = torch.load(checkpoint_path, map_location=self.device)
-        if isinstance(state_dict, torch.nn.Module):
-            self.model = state_dict
-        else:
                self.model.load_state_dict(state_dict)
            self.model.eval()
        log.info('Model is loaded.')
@ -82,8 +86,15 @@ class NNFingerprint(NNOperator):
    def __call__(self, data: List[AudioFrame]) -> numpy.ndarray:
        audio_tensors = self.preprocess(data).to(self.device)
        # print(audio_tensors.shape)
+        if isinstance(self.model, onnxruntime.InferenceSession):
+            audio_numpy = audio_tensors.detach().cpu().numpy() if audio_tensors.requires_grad \
+                else audio_tensors.cpu().numpy()
+            ort_inputs = {self.model.get_inputs()[0].name: audio_numpy}
+            outs = self.model.run(None, ort_inputs)[0]
+        else:
            features = self.model(audio_tensors)
-        return features.detach().cpu().numpy()
+            outs = features.detach().cpu().numpy()
+        return outs

    def preprocess(self, frames: List[AudioFrame]):
        sr = frames[0].sample_rate
@ -137,7 +148,7 @@ class NNFingerprint(NNOperator):
            log.warning('Converting float dtype from %s to %s.', wav.dtype, dtype)
            return wav.astype(dtype)

-    def save_model(self, format: str='pytorch', path: str = 'default'):
+    def save_model(self, format: str = 'pytorch', path: str = 'default'):
        if path == 'default':
            path = str(Path(__file__).parent)
            path = os.path.join(path, 'saved', format)
@ -156,6 +167,9 @@ class NNFingerprint(NNOperator):
                try:
                    jit_model = torch.jit.script(self.model)
                except Exception:
+                    log.warning(
+                        'Failed to directly export as torchscript.'
+                        'Using dummy input in shape of %s now.', dummy_input.shape)
                    jit_model = torch.jit.trace(self.model, dummy_input, strict=False)
                torch.jit.save(jit_model, path)
            except Exception as e:
@ -181,3 +195,9 @@ class NNFingerprint(NNOperator):
            # todo: elif format == 'tensorrt':
        else:
            log.error(f'Unsupported format "{format}".')
+
+    def input_schema(self):
+        return [(AudioFrame, (1024,))]
+
+    def output_schema(self):
+        return [(numpy.ndarray, (-1, self.params['dim']))]
--- a/performance.md
+++ b/performance.md
@ -0,0 +1,46 @@
+# Inference Performance
+
+## Test Scripts
+
+```python
+from towhee import ops
+import time
+
+decode = ops.audio_decode.ffmpeg()
+audio = [x[0] for x in decode('path/to/test.wav')]
+
+op = ops.audio_embedding.nnfp()
+# op = ops.audio_embedding.nnfp(
+#		    model_path='path/to/torchscript/model.pt')
+# op = ops.audio_embedding.nnfp(
+#		    model_path='path/to/model.onnx')
+
+
+start = time.time()
+for _ in range(100):
+	embs = op(audio)
+	assert(embs.shape == (10, 128))
+end = time.time()
+
+print((end-start) / 100)
+```
+
+## Results
+
+- Device: MacOS, 2.3 GHz Quad-Core Intel Core i7, 8 CPUs
+- Input: 10s audio, loop for 100 times
+
+| inference method | mem usage | avg time |
+| -- | -- | -- |
+| pytorch | 0.3G | 0.451s |
+｜ torchscript | 0.3G | 0.470s |
+| onnx | 0.3G | 0.378s |
+
+- Device: MacOS, 2.3 GHz Quad-Core Intel Core i7, 8 CPUs
+- Input: 188s audio, loop for 100 times
+
+| inference method | mem usage | avg time |
+| -- | -- | -- |
+| pytorch | 2.6G | 8.162s |
+｜ torchscript | 2.8G | 7.507s |
+| onnx | 1.7G | 6.769s |
--- a/test.py
+++ b/test.py
@ -1,4 +1,7 @@
 from towhee import ops
+
+import warnings
+
 import torch
 import numpy
 import onnx
@ -17,11 +20,19 @@ op = ops.audio_embedding.nnfp()
 out0 = op.get_op().model(audio)
 # print(out0)

+# Test Pytorch
 op.get_op().save_model(format='pytorch')
 op = ops.audio_embedding.nnfp(checkpoint_path='./saved/pytorch/nnfp.pt')
 out1 = op.get_op().model(audio)
-assert((out0 == out1).all())
+assert ((out0 == out1).all())
+
+# Test Torchscript
+op.get_op().save_model(format='torchscript')
+op = ops.audio_embedding.nnfp(checkpoint_path='./saved/torchscript/nnfp.pt')
+out2 = op.get_op().model(audio)
+assert ((out0 == out2).all())

+# Test ONNX
 op.get_op().save_model(format='onnx')
 op = ops.audio_embedding.nnfp()
 onnx_model = onnx.load('./saved/onnx/nnfp.onnx')
@ -30,6 +41,6 @@ onnx.checker.check_model(onnx_model)
 ort_session = onnxruntime.InferenceSession('./saved/onnx/nnfp.onnx')
 ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(audio)}
 ort_outs = ort_session.run(None, ort_inputs)
-out2 = ort_outs[0]
-# print(out2)
-assert(numpy.allclose(to_numpy(out0), out2, rtol=1e-03, atol=1e-05))
+out3 = ort_outs[0]
+# print(out3)
+assert (numpy.allclose(to_numpy(out0), out3, rtol=1e-03, atol=1e-05))