diff --git a/benchmark/client_test.py b/benchmark/client_test.py new file mode 100644 index 0000000..01e3539 --- /dev/null +++ b/benchmark/client_test.py @@ -0,0 +1,21 @@ + +m towhee import triton_client +import sys +import time + +num = int(sys.argv[-1]) +data = '../towhee.jpeg' +client = triton_client.Client('localhost:8000') + +# warm up +client.batch([data]) +print('client: ok') + +time.sleep(5) + +print('test...') +start = time.time() +client.batch([data] * num, batch_size=8) +end = time.time() +print(f'duration: {end - start}') +print(f'qps: {num / (end - start)}') diff --git a/benchmark/qps_test.py b/benchmark/qps_test.py index 367a5df..17c143b 100644 --- a/benchmark/qps_test.py +++ b/benchmark/qps_test.py @@ -30,6 +30,7 @@ parser.add_argument('--num', type=int, default=100) parser.add_argument('--device', type=str, default='cpu') args = parser.parse_args() +device_id = 0 if args.device in ['cpu', 'cuda'] else int(args.device[-1]) model_name = args.model # model_name = 'resnet50' # model_name = 'vgg16' @@ -96,7 +97,8 @@ if args.onnx: # if not os.path.exists('test.onnx'): op.save_model('onnx', 'test.onnx') sess = onnxruntime.InferenceSession('test.onnx', - providers=['CUDAExecutionProvider']) + providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) + onnx.set_providers(['CUDAExecutionProvider'], [{'device_id': device_id}]) inputs = decoder(data) inputs = op.convert_img(inputs) inputs = op.tfms(inputs).unsqueeze(0)