logo
Browse Source

Update test

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
main
Jael Gu 2 years ago
parent
commit
9a81b6d19d
  1. 24
      README.md
  2. 48
      nn_fingerprint.py
  3. 46
      performance.md
  4. 17
      test.py

24
README.md

@ -56,7 +56,7 @@ import towhee
Create the operator via the following factory method
***audio_embedding.nnfp(params=None, checkpoint_path=None, framework='pytorch')***
***audio_embedding.nnfp(params=None, model_path=None, framework='pytorch')***
**Parameters:**
@ -64,9 +64,10 @@ Create the operator via the following factory method
A dictionary of model parameters. If None, it will use default parameters to create model.
*checkpoint_path: str*
*model_path: str*
The path to model weights. If None, it will load default model weights.
The path to model. If None, it will load default model weights.
When the path ends with '.onnx', the operator will use onnx inference.
*framework: str*
@ -79,6 +80,8 @@ Default value is "pytorch" since the model is implemented in Pytorch.
An audio embedding operator generates vectors in numpy.ndarray given towhee audio frames.
***\_\_call\_\_(data)***
**Parameters:**
*data: List[towhee.types.audio_frame.AudioFrame]*
@ -93,3 +96,18 @@ The audio input should be at least 1s.
Audio embeddings in shape (num_clips, 128).
Each embedding stands for features of an audio clip with length of 1s.
***save_model(format='pytorch', path='default')***
**Parameters:**
*format: str*
Format used to save model, defaults to 'pytorch'.
Accepted formats: 'pytorch', 'torchscript, 'onnx', 'tensorrt' (in progress)
*path: str*
Path to save model, defaults to 'default'.
The default path is under 'saved' in the same directory of operator cache.

48
nn_fingerprint.py

@ -22,6 +22,7 @@ from typing import List
import torch
import numpy
import resampy
import onnxruntime
from towhee.operator.base import NNOperator
from towhee import register
@ -43,8 +44,9 @@ class NNFingerprint(NNOperator):
def __init__(self,
params: dict = None,
checkpoint_path: str = None,
framework: str = 'pytorch'):
model_path: str = None,
framework: str = 'pytorch',
):
super().__init__(framework=framework)
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
if params is None:
@ -52,29 +54,31 @@ class NNFingerprint(NNOperator):
else:
self.params = params
log.info('Loading model...')
if model_path is None:
path = str(Path(__file__).parent)
model_path = os.path.join(path, 'saved_model', 'pfann_fma_m.pt')
if model_path.endswith('.onnx'):
log.warning('Using onnx.')
self.model = onnxruntime.InferenceSession(model_path)
else:
state_dict = torch.load(model_path, map_location=self.device)
if isinstance(state_dict, torch.nn.Module):
self.model = state_dict
else:
dim = self.params['dim']
h = self.params['h']
u = self.params['u']
f_bin = self.params['n_mels']
n_seg = int(self.params['segment_size'] * self.params['sample_rate'])
t = (n_seg + self.params['hop_length'] - 1) // self.params['hop_length']
log.info('Creating model...')
log.info('Creating model with parameters...')
self.model = NNFp(
dim=dim, h=h, u=u,
in_f=f_bin, in_t=t,
fuller=self.params['fuller'],
activation=self.params['activation']
).to(self.device)
log.info('Loading weights...')
if checkpoint_path is None:
path = str(Path(__file__).parent)
checkpoint_path = os.path.join(path, 'saved_model', 'pfann_fma_m.pt')
state_dict = torch.load(checkpoint_path, map_location=self.device)
if isinstance(state_dict, torch.nn.Module):
self.model = state_dict
else:
self.model.load_state_dict(state_dict)
self.model.eval()
log.info('Model is loaded.')
@ -82,8 +86,15 @@ class NNFingerprint(NNOperator):
def __call__(self, data: List[AudioFrame]) -> numpy.ndarray:
audio_tensors = self.preprocess(data).to(self.device)
# print(audio_tensors.shape)
if isinstance(self.model, onnxruntime.InferenceSession):
audio_numpy = audio_tensors.detach().cpu().numpy() if audio_tensors.requires_grad \
else audio_tensors.cpu().numpy()
ort_inputs = {self.model.get_inputs()[0].name: audio_numpy}
outs = self.model.run(None, ort_inputs)[0]
else:
features = self.model(audio_tensors)
return features.detach().cpu().numpy()
outs = features.detach().cpu().numpy()
return outs
def preprocess(self, frames: List[AudioFrame]):
sr = frames[0].sample_rate
@ -156,6 +167,9 @@ class NNFingerprint(NNOperator):
try:
jit_model = torch.jit.script(self.model)
except Exception:
log.warning(
'Failed to directly export as torchscript.'
'Using dummy input in shape of %s now.', dummy_input.shape)
jit_model = torch.jit.trace(self.model, dummy_input, strict=False)
torch.jit.save(jit_model, path)
except Exception as e:
@ -181,3 +195,9 @@ class NNFingerprint(NNOperator):
# todo: elif format == 'tensorrt':
else:
log.error(f'Unsupported format "{format}".')
def input_schema(self):
return [(AudioFrame, (1024,))]
def output_schema(self):
return [(numpy.ndarray, (-1, self.params['dim']))]

46
performance.md

@ -0,0 +1,46 @@
# Inference Performance
## Test Scripts
```python
from towhee import ops
import time
decode = ops.audio_decode.ffmpeg()
audio = [x[0] for x in decode('path/to/test.wav')]
op = ops.audio_embedding.nnfp()
# op = ops.audio_embedding.nnfp(
# model_path='path/to/torchscript/model.pt')
# op = ops.audio_embedding.nnfp(
# model_path='path/to/model.onnx')
start = time.time()
for _ in range(100):
embs = op(audio)
assert(embs.shape == (10, 128))
end = time.time()
print((end-start) / 100)
```
## Results
- Device: MacOS, 2.3 GHz Quad-Core Intel Core i7, 8 CPUs
- Input: 10s audio, loop for 100 times
| inference method | mem usage | avg time |
| -- | -- | -- |
| pytorch | 0.3G | 0.451s |
| torchscript | 0.3G | 0.470s |
| onnx | 0.3G | 0.378s |
- Device: MacOS, 2.3 GHz Quad-Core Intel Core i7, 8 CPUs
- Input: 188s audio, loop for 100 times
| inference method | mem usage | avg time |
| -- | -- | -- |
| pytorch | 2.6G | 8.162s |
| torchscript | 2.8G | 7.507s |
| onnx | 1.7G | 6.769s |

17
test.py

@ -1,4 +1,7 @@
from towhee import ops
import warnings
import torch
import numpy
import onnx
@ -17,11 +20,19 @@ op = ops.audio_embedding.nnfp()
out0 = op.get_op().model(audio)
# print(out0)
# Test Pytorch
op.get_op().save_model(format='pytorch')
op = ops.audio_embedding.nnfp(checkpoint_path='./saved/pytorch/nnfp.pt')
out1 = op.get_op().model(audio)
assert ((out0 == out1).all())
# Test Torchscript
op.get_op().save_model(format='torchscript')
op = ops.audio_embedding.nnfp(checkpoint_path='./saved/torchscript/nnfp.pt')
out2 = op.get_op().model(audio)
assert ((out0 == out2).all())
# Test ONNX
op.get_op().save_model(format='onnx')
op = ops.audio_embedding.nnfp()
onnx_model = onnx.load('./saved/onnx/nnfp.onnx')
@ -30,6 +41,6 @@ onnx.checker.check_model(onnx_model)
ort_session = onnxruntime.InferenceSession('./saved/onnx/nnfp.onnx')
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(audio)}
ort_outs = ort_session.run(None, ort_inputs)
out2 = ort_outs[0]
# print(out2)
assert(numpy.allclose(to_numpy(out0), out2, rtol=1e-03, atol=1e-05))
out3 = ort_outs[0]
# print(out3)
assert (numpy.allclose(to_numpy(out0), out3, rtol=1e-03, atol=1e-05))

Loading…
Cancel
Save