From 24e4837ac7fca888e49b30465b93d82ba6ade38c Mon Sep 17 00:00:00 2001 From: Jael Gu Date: Tue, 31 Jan 2023 10:26:00 +0800 Subject: [PATCH] Add benchmark with qps test Signed-off-by: Jael Gu --- benchmark/README.md | 54 +++++++++-- benchmark/{run.py => performance.py} | 0 benchmark/{run.sh => performance.sh} | 4 +- benchmark/qps_test.py | 131 +++++++++++++++++++++++++++ 4 files changed, 179 insertions(+), 10 deletions(-) rename benchmark/{run.py => performance.py} (100%) rename benchmark/{run.sh => performance.sh} (93%) create mode 100644 benchmark/qps_test.py diff --git a/benchmark/README.md b/benchmark/README.md index ac4a3d1..7f0e472 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -1,10 +1,13 @@ -# Evaluate with Similarity Search +# Evaluation Method -## Introduction +- Model performance (WIP) +- Pipeline speed -Build a image classification system based on similarity search across embeddings. +## Model Performance -The core ideas in `run.py`: +Build an image classification system based on similarity search across embeddings. + +The core ideas in `performance.py`: 1. create a new Milvus collection each time 2. extract embeddings using a pretrained model with model name specified by `--model` 3. specify inference method with `--format` in value of `pytorch` or `onnx` @@ -14,9 +17,44 @@ The core ideas in `run.py`: 2. compare final prediction with ground truth 3. calculate percent of correct predictions over all queries -## Example Usage +### Example Usage + +```bash +# Option 1: +python performance.py --model MODEL_NAME --format pytorch +python performance.py --model MODEL_NAME --format onnx + +# Option 2: +chmod +x performance.sh +./performance.sh +``` + +## Pipeline Speed + +QPS test of the embedding pipeline including steps below: + 1. load image from path (pipe.input) + 2. decode image into arrays (ops.image_decode) + 3. generate image embedding (preprocess, model inference, post-process) + +There are 3 methods with different pipeline speeds: +- Towhee pipe (regular method) +- Onnxruntime (model inference using onnx at local) +- TritonServe with onnx enabled (request as client) + +### Example usage + +Please note that `qps_test.py` uses: +- `localhost:8000`: to connect triton client +- `../towhee/jpeg`: as test image path ```bash -python evaluate.py --model MODEL_NAME --format pytorch -python evaluate.py --model MODEL_NAME --format onnx -``` \ No newline at end of file +python qps_test.py --model 'resnet50' --pipe --onnx --triton --num 100 --device cuda:0 +``` + +**Args:** +- `--model`: mandatory, string, model name +- `--pipe`: optional, on/off flag to enable qps test for pipe +- `--onnx`: optional, on/off flag to enable qps test for onnx +- `--triton`: optional, on/off flag to enable qps for triton (please make sure that triton client is ready) +- `--num`: optional, integer, defaults to 100, batch size in each loop (10 loops in total) +- `--device`: optional, string, defaults to 'cpu' \ No newline at end of file diff --git a/benchmark/run.py b/benchmark/performance.py similarity index 100% rename from benchmark/run.py rename to benchmark/performance.py diff --git a/benchmark/run.sh b/benchmark/performance.sh similarity index 93% rename from benchmark/run.sh rename to benchmark/performance.sh index bdf4b65..6d65f14 100755 --- a/benchmark/run.sh +++ b/benchmark/performance.sh @@ -3,6 +3,6 @@ for name in beit_base_patch16_224 beit_base_patch16_224_in22k beit_base_patch16_384 beit_large_patch16_224 beit_large_patch16_224_in22k beit_large_patch16_384 beit_large_patch16_512 beitv2_base_patch16_224 beitv2_base_patch16_224_in22k beitv2_large_patch16_224 beitv2_large_patch16_224_in22k cait_m36_384 cait_m48_448 cait_s24_224 cait_xs24_384 convnext_large_in22ft1k convnext_small_384_in22ft1k convnext_tiny_in22k convnext_xlarge_in22ft1k convnext_xlarge_in22k deit3_medium_patch16_224 deit3_small_patch16_384 deit_base_distilled_patch16_384 mixer_b16_224 mixer_b16_224_in21k mixer_b16_224_miil mixer_b16_224_miil_in21k mixer_l16_224 mixer_l16_224_in21k mobilevitv2_175_384_in22ft1k mobilevitv2_200_384_in22ft1k repvgg_b2g4 res2net50_26w_8s resmlp_big_24_distilled_224 seresnextaa101d_32x8d vit_base_patch16_224_in21k vit_base_patch16_384 vit_base_patch8_224 vit_base_patch8_224_in21k vit_giant_patch14_224_clip_laion2b vit_large_patch16_224 vit_large_patch16_224_in21k vit_large_patch16_384 vit_large_patch32_384 vit_large_r50_s32_224 vit_large_r50_s32_384 vit_relpos_base_patch16_clsgap_224 vit_relpos_medium_patch16_224 vit_relpos_small_patch16_224 vit_small_patch32_224 vit_small_patch32_224_in21k vit_small_r26_s32_384 xcit_large_24_p8_224 xcit_large_24_p8_224_dist xcit_large_24_p8_384_dist xcit_nano_12_p16_384_dist xcit_nano_12_p8_224 xcit_nano_12_p8_224_dist xcit_nano_12_p8_384_dist xcit_small_24_p8_224 xcit_tiny_12_p8_224 xcit_tiny_12_p8_384_dist xcit_tiny_24_p8_224 xcit_tiny_24_p8_384_dist do echo ***${name}*** - python run.py --model ${name} --format pytorch - python run.py --model ${name} --format onnx + python performance.py --model ${name} --format pytorch + python performance.py --model ${name} --format onnx done diff --git a/benchmark/qps_test.py b/benchmark/qps_test.py new file mode 100644 index 0000000..57a6c3e --- /dev/null +++ b/benchmark/qps_test.py @@ -0,0 +1,131 @@ +import towhee +from towhee.dc2 import pipe, ops +from towhee import triton_client + +import onnxruntime +import numpy +import torch +from statistics import mean + +import time +import argparse + +import os +import re +import warnings +import logging +from transformers import logging as t_logging + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +warnings.filterwarnings("ignore") +t_logging.set_verbosity_error() + +parser = argparse.ArgumentParser() +parser.add_argument('--model', required=True, type=str) +parser.add_argument('--pipe', action='store_true') +parser.add_argument('--triton', action='store_true') +parser.add_argument('--onnx', action='store_true') +parser.add_argument('--atol', type=float, default=1e-3) +parser.add_argument('--num', type=int, default=100) +parser.add_argument('--device', type=str, default='cpu') +args = parser.parse_args() + +model_name = args.model +# model_name = 'resnet50' +# model_name = 'vgg16' +# model_name = 'deit3_base_patch16_224' +# model_name = 'deit_tiny_patch16_224' +# model_name = 'deit_base_distilled_patch16_224' +# model_name = 'convnext_base' +# model_name = 'vit_base_patch16_224' +# model_name = 'tf_efficientnet_b5' + + +p = ( + pipe.input('url') + .map('url', 'img', ops.image_decode.cv2_rgb()) + .map('img', 'vec', ops.image_embedding.timm(model_name=model_name, device=args.device)) + .output('vec') +) + +data = '../towhee.jpeg' +out1 = p(data).get()[0] +print('Pipe: OK') + +if args.num and args.pipe: + qps = [] + for _ in range(10): + start = time.time() + p.batch([data] * args.num) + # for _ in range(args.num): + # p(data) + end = time.time() + q = args.num / (end - start) + qps.append(q) + print('Pipe qps:', mean(qps)) + +if args.triton: + client = triton_client.Client(url='localhost:8000') + out2 = client(data)[0][0][0] + print('Triton: OK') + + if numpy.allclose(out1, out2, atol=args.atol): + print('Check accuracy: OK') + else: + max_diff = numpy.abs(out1 - out2).max() + min_diff = numpy.abs(out1 - out2).min() + mean_diff = numpy.abs(out1 - out2).mean() + print(f'Check accuracy: atol is larger than {args.atol}.') + print(f'Maximum absolute difference is {max_diff}.') + print(f'Minimum absolute difference is {min_diff}.') + print(f'Mean difference is {mean_diff}.') + + if args.num: + qps = [] + for _ in range(10): + start = time.time() + client.batch([data] * args.num) + end = time.time() + q = args.num / (end - start) + qps.append(q) + print('Triton qps:', mean(qps)) + +if args.onnx: + op = ops.image_embedding.timm(model_name=model_name, device='cpu').get_op() + decoder = ops.image_decode.cv2_rgb().get_op() + # if not os.path.exists('test.onnx'): + op.save_model('onnx', 'test.onnx') + sess = onnxruntime.InferenceSession('test.onnx', + providers=['CUDAExecutionProvider']) + inputs = decoder(data) + inputs = op.convert_img(inputs) + inputs = op.tfms(inputs).unsqueeze(0) + out3 = sess.run(None, input_feed={'input_0': inputs.cpu().detach().numpy()})[0] + op.device = 'cuda' if args.device != 'cpu' else 'cpu' + out3 = op.post_proc(torch.from_numpy(out3)).cpu().detach().numpy() + print('Onnx: OK') + if numpy.allclose(out1, out3, atol=args.atol): + print('Check accuracy: OK') + else: + max_diff = numpy.abs(out1 - out3).max() + min_diff = numpy.abs(out1 - out3).min() + mean_diff = numpy.abs(out1 - out3).mean() + print(f'Check accuracy: atol is larger than {args.atol}.') + print(f'Maximum absolute difference is {max_diff}.') + print(f'Minimum absolute difference is {min_diff}.') + print(f'Mean difference is {mean_diff}.') + + if args.num: + qps = [] + for _ in range(10): + start = time.time() + for _ in range(args.num): + inputs = decoder(data) + inputs = op.convert_img(inputs) + inputs = op.tfms(inputs).unsqueeze(0) + outs = sess.run(None, input_feed={'input_0': inputs.cpu().detach().numpy()})[0] + outs = op.post_proc(torch.from_numpy(outs)) + end = time.time() + q = args.num / (end - start) + qps.append(q) + print('Onnx qps:', mean(qps))