From 66113926bfbb051c67bd741dbebf9ae16e700c83 Mon Sep 17 00:00:00 2001 From: Kazuki Kyakuno Date: Fri, 27 Jan 2023 16:55:53 +0900 Subject: [PATCH 1/6] Added quantized model of yolox --- object_detection/yolox/yolox.py | 34 ++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/object_detection/yolox/yolox.py b/object_detection/yolox/yolox.py index 49114728f..94b0bdf4d 100644 --- a/object_detection/yolox/yolox.py +++ b/object_detection/yolox/yolox.py @@ -101,12 +101,24 @@ default=-1, type=int, help='The detection height and height for yolo. (default: auto)' ) +parser.add_argument( + '-qt', '--quantize', + action='store_true', + help='Use quantized model (require ONNX Runtime).' +) + args = update_parser(parser) MODEL_NAME = args.model_name WEIGHT_PATH = MODEL_NAME + ".opt.onnx" MODEL_PATH = MODEL_NAME + ".opt.onnx.prototxt" +if args.quantize: + import onnxruntime + MODEL_NAME = "yolox_tiny" + WEIGHT_PATH = MODEL_NAME + "_quantized.onnx" + MODEL_PATH = None + HEIGHT = MODEL_PARAMS[MODEL_NAME]['input_shape'][0] WIDTH = MODEL_PARAMS[MODEL_NAME]['input_shape'][1] @@ -128,7 +140,11 @@ def compute(): detector.compute(raw_img, args.threshold, args.iou) return None else: - return detector.run(img[None, :, :, :]) + if args.quantize: + input_name = detector.get_inputs()[0].name + return detector.run([], {input_name:img[None, :, :, :]}) + else: + return detector.run(img[None, :, :, :]) # inference logger.info('Start inference...') @@ -242,12 +258,16 @@ def main(): if args.detection_width!=-1 or args.detection_height!=-1: detector.set_input_shape(args.detection_width,args.detection_height) else: - detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) - if args.detection_width!=-1 or args.detection_height!=-1: - global WIDTH,HEIGHT - WIDTH=args.detection_width - HEIGHT=args.detection_height - detector.set_input_shape((1,3,HEIGHT,WIDTH)) + if args.quantize: + detector = onnxruntime.InferenceSession(WEIGHT_PATH) + else: + + detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) + if args.detection_width!=-1 or args.detection_height!=-1: + global WIDTH,HEIGHT + WIDTH=args.detection_width + HEIGHT=args.detection_height + detector.set_input_shape((1,3,HEIGHT,WIDTH)) if args.video is not None: # video mode From 5977177bf3dc15dc10ed1b44d3d037582ef8bfff Mon Sep 17 00:00:00 2001 From: Kazuki Kyakuno Date: Fri, 27 Jan 2023 17:18:16 +0900 Subject: [PATCH 2/6] Add model to list --- object_detection/yolox/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/object_detection/yolox/README.md b/object_detection/yolox/README.md index 23f9e5a78..b5ef93f9b 100644 --- a/object_detection/yolox/README.md +++ b/object_detection/yolox/README.md @@ -66,4 +66,6 @@ ONNX opset = 11 [yolox_darknet.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_darknet.opt.onnx.prototxt) -[yolox_x.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_x.opt.onnx.prototxt) \ No newline at end of file +[yolox_x.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_x.opt.onnx.prototxt) + +[yolox_tiny_quantized.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_tiny_quantized.onnx.prototxt) From b702cbd30505d7719c35f7cfa60514ebd77d970e Mon Sep 17 00:00:00 2001 From: Kazuki Kyakuno Date: Wed, 1 Feb 2023 13:15:19 +0900 Subject: [PATCH 3/6] Move quantuized model to model list --- object_detection/yolox/README.md | 4 +++- object_detection/yolox/yolox.py | 20 ++++++++------------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/object_detection/yolox/README.md b/object_detection/yolox/README.md index b5ef93f9b..40494a5af 100644 --- a/object_detection/yolox/README.md +++ b/object_detection/yolox/README.md @@ -68,4 +68,6 @@ ONNX opset = 11 [yolox_x.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_x.opt.onnx.prototxt) -[yolox_tiny_quantized.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_tiny_quantized.onnx.prototxt) +[yolox_tiny_int8_per_tensor.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_tiny_int8_per_tensor.opt.onnx.prototxt) + +[yolox_tiny_int8_per_channel.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_tiny_int8_per_channel.opt.onnx.prototxt) diff --git a/object_detection/yolox/yolox.py b/object_detection/yolox/yolox.py index 94b0bdf4d..defb531e1 100644 --- a/object_detection/yolox/yolox.py +++ b/object_detection/yolox/yolox.py @@ -35,7 +35,9 @@ 'yolox_m': {'input_shape': [640, 640]}, 'yolox_l': {'input_shape': [640, 640]}, 'yolox_darknet': {'input_shape': [640, 640]}, - 'yolox_x': {'input_shape': [640, 640]}} + 'yolox_x': {'input_shape': [640, 640]}, + 'yolox_tiny_int8_per_tensor': {'input_shape': [416, 416]}, + 'yolox_tiny_int8_per_channel': {'input_shape': [416, 416]}} REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/yolox/' @@ -101,11 +103,6 @@ default=-1, type=int, help='The detection height and height for yolo. (default: auto)' ) -parser.add_argument( - '-qt', '--quantize', - action='store_true', - help='Use quantized model (require ONNX Runtime).' -) args = update_parser(parser) @@ -113,11 +110,10 @@ WEIGHT_PATH = MODEL_NAME + ".opt.onnx" MODEL_PATH = MODEL_NAME + ".opt.onnx.prototxt" -if args.quantize: +QUANTIZED = False +if "int8" in MODEL_NAME: import onnxruntime - MODEL_NAME = "yolox_tiny" - WEIGHT_PATH = MODEL_NAME + "_quantized.onnx" - MODEL_PATH = None + QUANTIZED = True HEIGHT = MODEL_PARAMS[MODEL_NAME]['input_shape'][0] WIDTH = MODEL_PARAMS[MODEL_NAME]['input_shape'][1] @@ -140,7 +136,7 @@ def compute(): detector.compute(raw_img, args.threshold, args.iou) return None else: - if args.quantize: + if QUANTIZED: input_name = detector.get_inputs()[0].name return detector.run([], {input_name:img[None, :, :, :]}) else: @@ -258,7 +254,7 @@ def main(): if args.detection_width!=-1 or args.detection_height!=-1: detector.set_input_shape(args.detection_width,args.detection_height) else: - if args.quantize: + if QUANTIZED: detector = onnxruntime.InferenceSession(WEIGHT_PATH) else: From 744e01405bdfd1c2f631f0cffba1f3f901d78866 Mon Sep 17 00:00:00 2001 From: Kazuki Kyakuno Date: Wed, 1 Feb 2023 21:35:31 +0900 Subject: [PATCH 4/6] Implement quantized version of yolov3 --- object_detection/yolov3-tiny/README.md | 2 + object_detection/yolov3-tiny/yolov3-tiny.py | 163 ++++++++++++++------ 2 files changed, 120 insertions(+), 45 deletions(-) diff --git a/object_detection/yolov3-tiny/README.md b/object_detection/yolov3-tiny/README.md index 94c585d91..8e8a62a71 100644 --- a/object_detection/yolov3-tiny/README.md +++ b/object_detection/yolov3-tiny/README.md @@ -54,3 +54,5 @@ ONNX opset=10 ## Netron [yolov3-tiny.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolov3-tiny/yolov3-tiny.opt.onnx.prototxt) + +[yolov3-tiny_int8_per_tensor.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolov3-tiny/yolov3-tiny_int8_per_tensor.opt.onnx.prototxt) diff --git a/object_detection/yolov3-tiny/yolov3-tiny.py b/object_detection/yolov3-tiny/yolov3-tiny.py index e677a03bd..d8d0ff2e8 100644 --- a/object_detection/yolov3-tiny/yolov3-tiny.py +++ b/object_detection/yolov3-tiny/yolov3-tiny.py @@ -2,6 +2,7 @@ import sys import time import math +import numpy as np import cv2 @@ -22,8 +23,6 @@ # ====================== # Parameters # ====================== -WEIGHT_PATH = 'yolov3-tiny.opt.onnx' -MODEL_PATH = 'yolov3-tiny.opt.onnx.prototxt' REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/yolov3-tiny/' IMAGE_PATH = 'input.jpg' @@ -78,31 +77,88 @@ default=DETECTION_SIZE, type=int, help='The detection height and height for yolo. (default: 416)' ) +parser.add_argument( + '--quantize', + action='store_true', + help='Use quantized model.' +) args = update_parser(parser) +if args.quantize: + import onnxruntime + WEIGHT_PATH = 'yolov3-tiny_int8_per_tensor.opt.onnx' + MODEL_PATH = 'yolov3-tiny_int8_per_tensor.opt.onnx.prototxt' + #WEIGHT_PATH = 'yolov3-tiny.opt.onnx' + #MODEL_PATH = 'yolov3-tiny.opt.onnx.prototxt' +else: + WEIGHT_PATH = 'yolov3-tiny.opt.onnx' + MODEL_PATH = 'yolov3-tiny.opt.onnx.prototxt' # ====================== -# Main functions +# Quantized model functions # ====================== -def recognize_from_image(): - # net initialize - detector = ailia.Detector( - MODEL_PATH, - WEIGHT_PATH, - len(COCO_CATEGORY), - format=ailia.NETWORK_IMAGE_FORMAT_RGB, - channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, - range=ailia.NETWORK_IMAGE_RANGE_U_FP32, - algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, - env_id=args.env_id, - ) - if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE: - detector.set_input_shape( - args.detection_width, args.detection_height - ) - if args.profile: - detector.set_profile_mode(True) +def letterbox_image(image, size): + '''resize image with unchanged aspect ratio using padding''' + ih, iw, c = image.shape + w, h = size + scale = min(w/iw, h/ih) + nw = int(iw*scale) + nh = int(ih*scale) + + image = cv2.resize(image, (nw,nh)) + new_image = np.zeros((size[0], size[1], 3)) + new_image[0:nh,0:nw,0:3] = image[0:nh,0:nw,0:3] + new_image = new_image[:,:,::-1] # bgr to rgb + return new_image, nw, nh + +def detect_quantized_model(detector, image): + model_image_size = [args.detection_width, args.detection_height] + boxed_image, nw, nh = letterbox_image(image, model_image_size) + + image_data = np.array(boxed_image, dtype='float32') + image_data /= 255. + image_data = np.transpose(image_data, [2, 0, 1]) + + image_data = np.expand_dims(image_data, 0) # Add batch dimension. + feed_f = dict(zip(['input_1', 'image_shape', 'iou_threshold', 'layer.score_threshold'], + (image_data, np.array([args.detection_height, args.detection_width],dtype='float32').reshape(1, 2), + np.array([args.iou], dtype='float32').reshape(1), + np.array([args.threshold], dtype='float32').reshape(1)))) + all_boxes, all_scores, indices = detector.run(None, input_feed=feed_f) + + out_boxes, out_scores, out_classes = [], [], [] + for idx_ in indices: + out_classes.append(idx_[1]) + out_scores.append(all_scores[tuple(idx_)]) + idx_1 = (idx_[0], idx_[2]) + out_boxes.append(all_boxes[idx_1]) + + detections = [] + for i, c in reversed(list(enumerate(out_classes))): + box = out_boxes[i] + score = out_scores[i] + top, left, bottom, right = box + top = top / nh + left = left / nw + bottom = bottom / nh + right = right / nw + + obj = ailia.DetectorObject( + category=c, + prob=score, + x=left, + y=top, + w=right - left, + h=bottom - top) + detections.append(obj) + + return detections + +# ====================== +# Main functions +# ====================== +def recognize_from_image(detector): # input image loop for image_path in args.input: # prepare input data @@ -117,17 +173,25 @@ def recognize_from_image(): total_time = 0 for i in range(args.benchmark_count): start = int(round(time.time() * 1000)) - detector.compute(img, args.threshold, args.iou) + if args.quantize: + detections = detect_quantized_model(detector, img) + else: + detector.compute(img, args.threshold, args.iou) + detections = detector end = int(round(time.time() * 1000)) if i != 0: total_time = total_time + (end - start) logger.info(f'\tailia processing time {end - start} ms') logger.info(f'\taverage time {total_time / (args.benchmark_count-1)} ms') else: - detector.compute(img, args.threshold, args.iou) + if args.quantize: + detections = detect_quantized_model(detector, img) + else: + detector.compute(img, args.threshold, args.iou) + detections = detector # plot result - res_img = plot_results(detector, img, COCO_CATEGORY) + res_img = plot_results(detections, img, COCO_CATEGORY) savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) @@ -143,23 +207,7 @@ def recognize_from_image(): logger.info('Script finished successfully.') -def recognize_from_video(): - # net initialize - detector = ailia.Detector( - MODEL_PATH, - WEIGHT_PATH, - len(COCO_CATEGORY), - format=ailia.NETWORK_IMAGE_FORMAT_RGB, - channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, - range=ailia.NETWORK_IMAGE_RANGE_U_FP32, - algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, - env_id=args.env_id, - ) - if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE: - detector.set_input_shape( - args.detection_width, args.detection_height - ) - +def recognize_from_video(detector): capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format @@ -184,7 +232,11 @@ def recognize_from_video(): break img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) - detector.compute(img, args.threshold, args.iou) + if args.quantize: + detections = detect_quantized_model(detector, img) + else: + detector.compute(img, args.threshold, args.iou) + detections = detector res_img = plot_results(detector, frame, COCO_CATEGORY, False) cv2.imshow('frame', res_img) frame_shown = True @@ -197,7 +249,7 @@ def recognize_from_video(): if args.write_prediction: savepath = get_savepath(args.savepath, video_name, post_fix = '_%s' % (str(frame_count).zfill(frame_digit) + '_res'), ext='.png') pred_file = '%s.txt' % savepath.rsplit('.', 1)[0] - write_predictions(pred_file, detector, frame, COCO_CATEGORY) + write_predictions(pred_file, detections, frame, COCO_CATEGORY) frame_count += 1 capture.release() @@ -211,12 +263,33 @@ def main(): # model files check and download check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH) + # net initialize + if args.quantize: + detector = onnxruntime.InferenceSession(WEIGHT_PATH) + else: + detector = ailia.Detector( + MODEL_PATH, + WEIGHT_PATH, + len(COCO_CATEGORY), + format=ailia.NETWORK_IMAGE_FORMAT_RGB, + channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, + range=ailia.NETWORK_IMAGE_RANGE_U_FP32, + algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, + env_id=args.env_id, + ) + if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE: + detector.set_input_shape( + args.detection_width, args.detection_height + ) + if args.profile: + detector.set_profile_mode(True) + if args.video is not None: # video mode - recognize_from_video() + recognize_from_video(detector) else: # image mode - recognize_from_image() + recognize_from_image(detector) if __name__ == '__main__': From d7848c2c97dce51da4501dd543de3d95ce207df3 Mon Sep 17 00:00:00 2001 From: Kazuki Kyakuno Date: Wed, 1 Feb 2023 21:39:27 +0900 Subject: [PATCH 5/6] Write predictions --- object_detection/yolov3-tiny/yolov3-tiny.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/object_detection/yolov3-tiny/yolov3-tiny.py b/object_detection/yolov3-tiny/yolov3-tiny.py index d8d0ff2e8..ae0946d33 100644 --- a/object_detection/yolov3-tiny/yolov3-tiny.py +++ b/object_detection/yolov3-tiny/yolov3-tiny.py @@ -199,7 +199,7 @@ def recognize_from_image(detector): # write prediction if args.write_prediction: pred_file = '%s.txt' % savepath.rsplit('.', 1)[0] - write_predictions(pred_file, detector, img, COCO_CATEGORY) + write_predictions(pred_file, detections, img, COCO_CATEGORY) if args.profile: print(detector.get_summary()) @@ -237,7 +237,7 @@ def recognize_from_video(detector): else: detector.compute(img, args.threshold, args.iou) detections = detector - res_img = plot_results(detector, frame, COCO_CATEGORY, False) + res_img = plot_results(detections, frame, COCO_CATEGORY, False) cv2.imshow('frame', res_img) frame_shown = True From c30791119c682bac2f8442326c7a56838eed2b28 Mon Sep 17 00:00:00 2001 From: Kazuki Kyakuno Date: Thu, 2 Feb 2023 13:42:42 +0900 Subject: [PATCH 6/6] Center crop --- object_detection/yolov3-tiny/yolov3-tiny.py | 35 ++++++++++++--------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/object_detection/yolov3-tiny/yolov3-tiny.py b/object_detection/yolov3-tiny/yolov3-tiny.py index ae0946d33..435aa30a6 100644 --- a/object_detection/yolov3-tiny/yolov3-tiny.py +++ b/object_detection/yolov3-tiny/yolov3-tiny.py @@ -77,6 +77,11 @@ default=DETECTION_SIZE, type=int, help='The detection height and height for yolo. (default: 416)' ) +parser.add_argument( + '--onnx', + action='store_true', + help='Use onnx runtime.' +) parser.add_argument( '--quantize', action='store_true', @@ -84,12 +89,12 @@ ) args = update_parser(parser) -if args.quantize: +if args.onnx or args.quantize: import onnxruntime + +if args.quantize: WEIGHT_PATH = 'yolov3-tiny_int8_per_tensor.opt.onnx' MODEL_PATH = 'yolov3-tiny_int8_per_tensor.opt.onnx.prototxt' - #WEIGHT_PATH = 'yolov3-tiny.opt.onnx' - #MODEL_PATH = 'yolov3-tiny.opt.onnx.prototxt' else: WEIGHT_PATH = 'yolov3-tiny.opt.onnx' MODEL_PATH = 'yolov3-tiny.opt.onnx.prototxt' @@ -107,14 +112,14 @@ def letterbox_image(image, size): nh = int(ih*scale) image = cv2.resize(image, (nw,nh)) - new_image = np.zeros((size[0], size[1], 3)) - new_image[0:nh,0:nw,0:3] = image[0:nh,0:nw,0:3] + new_image = np.zeros((w, h, 3)) + new_image[(h-nh)//2:(h-nh)//2+nh,(w-nw)//2:(w-nw)//2+nw,0:3] = image[0:nh,0:nw,0:3] new_image = new_image[:,:,::-1] # bgr to rgb - return new_image, nw, nh + return new_image, nw, nh, (w - nw)//2, (h - nh) //2 def detect_quantized_model(detector, image): model_image_size = [args.detection_width, args.detection_height] - boxed_image, nw, nh = letterbox_image(image, model_image_size) + boxed_image, nw, nh, ow, oh = letterbox_image(image, model_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. @@ -139,10 +144,10 @@ def detect_quantized_model(detector, image): box = out_boxes[i] score = out_scores[i] top, left, bottom, right = box - top = top / nh - left = left / nw - bottom = bottom / nh - right = right / nw + top = (top - oh) / nh + left = (left - ow) / nw + bottom = (bottom - oh) / nh + right = (right - ow) / nw obj = ailia.DetectorObject( category=c, @@ -173,7 +178,7 @@ def recognize_from_image(detector): total_time = 0 for i in range(args.benchmark_count): start = int(round(time.time() * 1000)) - if args.quantize: + if args.quantize or args.onnx: detections = detect_quantized_model(detector, img) else: detector.compute(img, args.threshold, args.iou) @@ -184,7 +189,7 @@ def recognize_from_image(detector): logger.info(f'\tailia processing time {end - start} ms') logger.info(f'\taverage time {total_time / (args.benchmark_count-1)} ms') else: - if args.quantize: + if args.quantize or args.onnx: detections = detect_quantized_model(detector, img) else: detector.compute(img, args.threshold, args.iou) @@ -232,7 +237,7 @@ def recognize_from_video(detector): break img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) - if args.quantize: + if args.quantize or args.onnx: detections = detect_quantized_model(detector, img) else: detector.compute(img, args.threshold, args.iou) @@ -264,7 +269,7 @@ def main(): check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH) # net initialize - if args.quantize: + if args.quantize or args.onnx: detector = onnxruntime.InferenceSession(WEIGHT_PATH) else: detector = ailia.Detector(