logo
Browse Source

Update test

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
main
Jael Gu 2 years ago
parent
commit
9a81b6d19d
  1. 24
      README.md
  2. 76
      nn_fingerprint.py
  3. 46
      performance.md
  4. 19
      test.py

24
README.md

@ -56,7 +56,7 @@ import towhee
Create the operator via the following factory method Create the operator via the following factory method
***audio_embedding.nnfp(params=None, checkpoint_path=None, framework='pytorch')***
***audio_embedding.nnfp(params=None, model_path=None, framework='pytorch')***
**Parameters:** **Parameters:**
@ -64,9 +64,10 @@ Create the operator via the following factory method
A dictionary of model parameters. If None, it will use default parameters to create model. A dictionary of model parameters. If None, it will use default parameters to create model.
*checkpoint_path: str*
*model_path: str*
The path to model weights. If None, it will load default model weights.
The path to model. If None, it will load default model weights.
When the path ends with '.onnx', the operator will use onnx inference.
*framework: str* *framework: str*
@ -79,6 +80,8 @@ Default value is "pytorch" since the model is implemented in Pytorch.
An audio embedding operator generates vectors in numpy.ndarray given towhee audio frames. An audio embedding operator generates vectors in numpy.ndarray given towhee audio frames.
***\_\_call\_\_(data)***
**Parameters:** **Parameters:**
*data: List[towhee.types.audio_frame.AudioFrame]* *data: List[towhee.types.audio_frame.AudioFrame]*
@ -93,3 +96,18 @@ The audio input should be at least 1s.
Audio embeddings in shape (num_clips, 128). Audio embeddings in shape (num_clips, 128).
Each embedding stands for features of an audio clip with length of 1s. Each embedding stands for features of an audio clip with length of 1s.
***save_model(format='pytorch', path='default')***
**Parameters:**
*format: str*
Format used to save model, defaults to 'pytorch'.
Accepted formats: 'pytorch', 'torchscript, 'onnx', 'tensorrt' (in progress)
*path: str*
Path to save model, defaults to 'default'.
The default path is under 'saved' in the same directory of operator cache.

76
nn_fingerprint.py

@ -22,6 +22,7 @@ from typing import List
import torch import torch
import numpy import numpy
import resampy import resampy
import onnxruntime
from towhee.operator.base import NNOperator from towhee.operator.base import NNOperator
from towhee import register from towhee import register
@ -43,8 +44,9 @@ class NNFingerprint(NNOperator):
def __init__(self, def __init__(self,
params: dict = None, params: dict = None,
checkpoint_path: str = None,
framework: str = 'pytorch'):
model_path: str = None,
framework: str = 'pytorch',
):
super().__init__(framework=framework) super().__init__(framework=framework)
self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
if params is None: if params is None:
@ -52,38 +54,47 @@ class NNFingerprint(NNOperator):
else: else:
self.params = params self.params = params
dim = self.params['dim']
h = self.params['h']
u = self.params['u']
f_bin = self.params['n_mels']
n_seg = int(self.params['segment_size'] * self.params['sample_rate'])
t = (n_seg + self.params['hop_length'] - 1) // self.params['hop_length']
log.info('Creating model...')
self.model = NNFp(
dim=dim, h=h, u=u,
in_f=f_bin, in_t=t,
fuller=self.params['fuller'],
activation=self.params['activation']
).to(self.device)
log.info('Loading weights...')
if checkpoint_path is None:
log.info('Loading model...')
if model_path is None:
path = str(Path(__file__).parent) path = str(Path(__file__).parent)
checkpoint_path = os.path.join(path, 'saved_model', 'pfann_fma_m.pt')
state_dict = torch.load(checkpoint_path, map_location=self.device)
if isinstance(state_dict, torch.nn.Module):
self.model = state_dict
model_path = os.path.join(path, 'saved_model', 'pfann_fma_m.pt')
if model_path.endswith('.onnx'):
log.warning('Using onnx.')
self.model = onnxruntime.InferenceSession(model_path)
else: else:
self.model.load_state_dict(state_dict)
self.model.eval()
state_dict = torch.load(model_path, map_location=self.device)
if isinstance(state_dict, torch.nn.Module):
self.model = state_dict
else:
dim = self.params['dim']
h = self.params['h']
u = self.params['u']
f_bin = self.params['n_mels']
n_seg = int(self.params['segment_size'] * self.params['sample_rate'])
t = (n_seg + self.params['hop_length'] - 1) // self.params['hop_length']
log.info('Creating model with parameters...')
self.model = NNFp(
dim=dim, h=h, u=u,
in_f=f_bin, in_t=t,
fuller=self.params['fuller'],
activation=self.params['activation']
).to(self.device)
self.model.load_state_dict(state_dict)
self.model.eval()
log.info('Model is loaded.') log.info('Model is loaded.')
def __call__(self, data: List[AudioFrame]) -> numpy.ndarray: def __call__(self, data: List[AudioFrame]) -> numpy.ndarray:
audio_tensors = self.preprocess(data).to(self.device) audio_tensors = self.preprocess(data).to(self.device)
# print(audio_tensors.shape) # print(audio_tensors.shape)
features = self.model(audio_tensors)
return features.detach().cpu().numpy()
if isinstance(self.model, onnxruntime.InferenceSession):
audio_numpy = audio_tensors.detach().cpu().numpy() if audio_tensors.requires_grad \
else audio_tensors.cpu().numpy()
ort_inputs = {self.model.get_inputs()[0].name: audio_numpy}
outs = self.model.run(None, ort_inputs)[0]
else:
features = self.model(audio_tensors)
outs = features.detach().cpu().numpy()
return outs
def preprocess(self, frames: List[AudioFrame]): def preprocess(self, frames: List[AudioFrame]):
sr = frames[0].sample_rate sr = frames[0].sample_rate
@ -137,7 +148,7 @@ class NNFingerprint(NNOperator):
log.warning('Converting float dtype from %s to %s.', wav.dtype, dtype) log.warning('Converting float dtype from %s to %s.', wav.dtype, dtype)
return wav.astype(dtype) return wav.astype(dtype)
def save_model(self, format: str='pytorch', path: str = 'default'):
def save_model(self, format: str = 'pytorch', path: str = 'default'):
if path == 'default': if path == 'default':
path = str(Path(__file__).parent) path = str(Path(__file__).parent)
path = os.path.join(path, 'saved', format) path = os.path.join(path, 'saved', format)
@ -156,6 +167,9 @@ class NNFingerprint(NNOperator):
try: try:
jit_model = torch.jit.script(self.model) jit_model = torch.jit.script(self.model)
except Exception: except Exception:
log.warning(
'Failed to directly export as torchscript.'
'Using dummy input in shape of %s now.', dummy_input.shape)
jit_model = torch.jit.trace(self.model, dummy_input, strict=False) jit_model = torch.jit.trace(self.model, dummy_input, strict=False)
torch.jit.save(jit_model, path) torch.jit.save(jit_model, path)
except Exception as e: except Exception as e:
@ -181,3 +195,9 @@ class NNFingerprint(NNOperator):
# todo: elif format == 'tensorrt': # todo: elif format == 'tensorrt':
else: else:
log.error(f'Unsupported format "{format}".') log.error(f'Unsupported format "{format}".')
def input_schema(self):
return [(AudioFrame, (1024,))]
def output_schema(self):
return [(numpy.ndarray, (-1, self.params['dim']))]

46
performance.md

@ -0,0 +1,46 @@
# Inference Performance
## Test Scripts
```python
from towhee import ops
import time
decode = ops.audio_decode.ffmpeg()
audio = [x[0] for x in decode('path/to/test.wav')]
op = ops.audio_embedding.nnfp()
# op = ops.audio_embedding.nnfp(
# model_path='path/to/torchscript/model.pt')
# op = ops.audio_embedding.nnfp(
# model_path='path/to/model.onnx')
start = time.time()
for _ in range(100):
embs = op(audio)
assert(embs.shape == (10, 128))
end = time.time()
print((end-start) / 100)
```
## Results
- Device: MacOS, 2.3 GHz Quad-Core Intel Core i7, 8 CPUs
- Input: 10s audio, loop for 100 times
| inference method | mem usage | avg time |
| -- | -- | -- |
| pytorch | 0.3G | 0.451s |
| torchscript | 0.3G | 0.470s |
| onnx | 0.3G | 0.378s |
- Device: MacOS, 2.3 GHz Quad-Core Intel Core i7, 8 CPUs
- Input: 188s audio, loop for 100 times
| inference method | mem usage | avg time |
| -- | -- | -- |
| pytorch | 2.6G | 8.162s |
| torchscript | 2.8G | 7.507s |
| onnx | 1.7G | 6.769s |

19
test.py

@ -1,4 +1,7 @@
from towhee import ops from towhee import ops
import warnings
import torch import torch
import numpy import numpy
import onnx import onnx
@ -17,11 +20,19 @@ op = ops.audio_embedding.nnfp()
out0 = op.get_op().model(audio) out0 = op.get_op().model(audio)
# print(out0) # print(out0)
# Test Pytorch
op.get_op().save_model(format='pytorch') op.get_op().save_model(format='pytorch')
op = ops.audio_embedding.nnfp(checkpoint_path='./saved/pytorch/nnfp.pt') op = ops.audio_embedding.nnfp(checkpoint_path='./saved/pytorch/nnfp.pt')
out1 = op.get_op().model(audio) out1 = op.get_op().model(audio)
assert((out0 == out1).all())
assert ((out0 == out1).all())
# Test Torchscript
op.get_op().save_model(format='torchscript')
op = ops.audio_embedding.nnfp(checkpoint_path='./saved/torchscript/nnfp.pt')
out2 = op.get_op().model(audio)
assert ((out0 == out2).all())
# Test ONNX
op.get_op().save_model(format='onnx') op.get_op().save_model(format='onnx')
op = ops.audio_embedding.nnfp() op = ops.audio_embedding.nnfp()
onnx_model = onnx.load('./saved/onnx/nnfp.onnx') onnx_model = onnx.load('./saved/onnx/nnfp.onnx')
@ -30,6 +41,6 @@ onnx.checker.check_model(onnx_model)
ort_session = onnxruntime.InferenceSession('./saved/onnx/nnfp.onnx') ort_session = onnxruntime.InferenceSession('./saved/onnx/nnfp.onnx')
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(audio)} ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(audio)}
ort_outs = ort_session.run(None, ort_inputs) ort_outs = ort_session.run(None, ort_inputs)
out2 = ort_outs[0]
# print(out2)
assert(numpy.allclose(to_numpy(out0), out2, rtol=1e-03, atol=1e-05))
out3 = ort_outs[0]
# print(out3)
assert (numpy.allclose(to_numpy(out0), out3, rtol=1e-03, atol=1e-05))

Loading…
Cancel
Save