ailia-ai · kyakuno · Jan 27, 2023 · Jan 27, 2023 · Feb 1, 2023 · Feb 1, 2023
diff --git a/object_detection/yolov3-tiny/README.md b/object_detection/yolov3-tiny/README.md
@@ -54,3 +54,5 @@ ONNX opset=10
 ## Netron
 
 [yolov3-tiny.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolov3-tiny/yolov3-tiny.opt.onnx.prototxt)
+
+[yolov3-tiny_int8_per_tensor.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolov3-tiny/yolov3-tiny_int8_per_tensor.opt.onnx.prototxt)
diff --git a/object_detection/yolov3-tiny/yolov3-tiny.py b/object_detection/yolov3-tiny/yolov3-tiny.py
@@ -2,6 +2,7 @@
 import sys
 import time
 import math
+import numpy as np
 
 import cv2
 
@@ -22,8 +23,6 @@
 # ======================
 # Parameters
 # ======================
-WEIGHT_PATH = 'yolov3-tiny.opt.onnx'
-MODEL_PATH = 'yolov3-tiny.opt.onnx.prototxt'
 REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/yolov3-tiny/'
 
 IMAGE_PATH = 'input.jpg'
@@ -78,31 +77,93 @@
     default=DETECTION_SIZE, type=int,
     help='The detection height and height for yolo. (default: 416)'
 )
+parser.add_argument(
+    '--onnx',
+    action='store_true',
+    help='Use onnx runtime.'
+)
+parser.add_argument(
+    '--quantize',
+    action='store_true',
+    help='Use quantized model.'
+)
 args = update_parser(parser)
 
+if args.onnx or args.quantize:
+    import onnxruntime
+
+if args.quantize:
+    WEIGHT_PATH = 'yolov3-tiny_int8_per_tensor.opt.onnx'
+    MODEL_PATH = 'yolov3-tiny_int8_per_tensor.opt.onnx.prototxt'
+else:
+    WEIGHT_PATH = 'yolov3-tiny.opt.onnx'
+    MODEL_PATH = 'yolov3-tiny.opt.onnx.prototxt'
 
 # ======================
-# Main functions
+# Quantized model functions
 # ======================
-def recognize_from_image():
-    # net initialize
-    detector = ailia.Detector(
-        MODEL_PATH,
-        WEIGHT_PATH,
-        len(COCO_CATEGORY),
-        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
-        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
-        range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
-        algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
-        env_id=args.env_id,
-    )
-    if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE:
-        detector.set_input_shape(
-            args.detection_width, args.detection_height
-        )
-    if args.profile:
-        detector.set_profile_mode(True)
 
+def letterbox_image(image, size):
+    '''resize image with unchanged aspect ratio using padding'''
+    ih, iw, c = image.shape
+    w, h = size
+    scale = min(w/iw, h/ih)
+    nw = int(iw*scale)
+    nh = int(ih*scale)
+
+    image = cv2.resize(image, (nw,nh))
+    new_image = np.zeros((w, h, 3))
+    new_image[(h-nh)//2:(h-nh)//2+nh,(w-nw)//2:(w-nw)//2+nw,0:3] = image[0:nh,0:nw,0:3]
+    new_image = new_image[:,:,::-1] # bgr to rgb
+    return new_image, nw, nh, (w - nw)//2, (h - nh) //2
+
+def detect_quantized_model(detector, image):
+    model_image_size = [args.detection_width, args.detection_height]
+    boxed_image, nw, nh, ow, oh = letterbox_image(image, model_image_size)
+
+    image_data = np.array(boxed_image, dtype='float32')
+    image_data /= 255.
+    image_data = np.transpose(image_data, [2, 0, 1])
+
+    image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
+    feed_f = dict(zip(['input_1', 'image_shape', 'iou_threshold', 'layer.score_threshold'],
+                        (image_data, np.array([args.detection_height, args.detection_width],dtype='float32').reshape(1, 2),
+                        np.array([args.iou], dtype='float32').reshape(1),
+                        np.array([args.threshold], dtype='float32').reshape(1))))
+    all_boxes, all_scores, indices = detector.run(None, input_feed=feed_f)
+
+    out_boxes, out_scores, out_classes = [], [], []
+    for idx_ in indices:
+        out_classes.append(idx_[1])
+        out_scores.append(all_scores[tuple(idx_)])
+        idx_1 = (idx_[0], idx_[2])
+        out_boxes.append(all_boxes[idx_1])
+
+    detections = []
+    for i, c in reversed(list(enumerate(out_classes))):
+        box = out_boxes[i]
+        score = out_scores[i]
+        top, left, bottom, right = box
+        top = (top - oh) / nh
+        left = (left - ow) / nw
+        bottom = (bottom - oh) / nh
+        right = (right - ow) / nw
+
+        obj = ailia.DetectorObject(
+            category=c,
+            prob=score,
+            x=left,
+            y=top,
+            w=right - left,
+            h=bottom - top)
+        detections.append(obj)
+
+    return detections
+
+# ======================
+# Main functions
+# ======================
+def recognize_from_image(detector):
     # input image loop
     for image_path in args.input:
         # prepare input data
@@ -117,49 +178,41 @@ def recognize_from_image():
             total_time = 0
             for i in range(args.benchmark_count):
                 start = int(round(time.time() * 1000))
-                detector.compute(img, args.threshold, args.iou)
+                if args.quantize or args.onnx:
+                    detections = detect_quantized_model(detector, img)
+                else:
+                    detector.compute(img, args.threshold, args.iou)
+                    detections = detector
                 end = int(round(time.time() * 1000))
                 if i != 0:
                     total_time = total_time + (end - start)
                 logger.info(f'\tailia processing time {end - start} ms')
             logger.info(f'\taverage time {total_time / (args.benchmark_count-1)} ms')
         else:
-            detector.compute(img, args.threshold, args.iou)
+            if args.quantize or args.onnx:
+                detections = detect_quantized_model(detector, img)
+            else:
+                detector.compute(img, args.threshold, args.iou)
+                detections = detector
 
         # plot result
-        res_img = plot_results(detector, img, COCO_CATEGORY)
+        res_img = plot_results(detections, img, COCO_CATEGORY)
         savepath = get_savepath(args.savepath, image_path)
         logger.info(f'saved at : {savepath}')
         cv2.imwrite(savepath, res_img)
 
         # write prediction
         if args.write_prediction:
             pred_file = '%s.txt' % savepath.rsplit('.', 1)[0]
-            write_predictions(pred_file, detector, img, COCO_CATEGORY)
+            write_predictions(pred_file, detections, img, COCO_CATEGORY)
 
     if args.profile:
         print(detector.get_summary())
 
     logger.info('Script finished successfully.')
 
 
-def recognize_from_video():
-    # net initialize
-    detector = ailia.Detector(
-        MODEL_PATH,
-        WEIGHT_PATH,
-        len(COCO_CATEGORY),
-        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
-        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
-        range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
-        algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
-        env_id=args.env_id,
-    )
-    if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE:
-        detector.set_input_shape(
-            args.detection_width, args.detection_height
-        )
-
+def recognize_from_video(detector):
     capture = webcamera_utils.get_capture(args.video)
 
     # create video writer if savepath is specified as video format
@@ -184,8 +237,12 @@ def recognize_from_video():
             break
 
         img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
-        detector.compute(img, args.threshold, args.iou)
-        res_img = plot_results(detector, frame, COCO_CATEGORY, False)
+        if args.quantize or args.onnx:
+            detections = detect_quantized_model(detector, img)
+        else:
+            detector.compute(img, args.threshold, args.iou)
+            detections = detector
+        res_img = plot_results(detections, frame, COCO_CATEGORY, False)
         cv2.imshow('frame', res_img)
         frame_shown = True
 
@@ -197,7 +254,7 @@ def recognize_from_video():
         if args.write_prediction:
             savepath = get_savepath(args.savepath, video_name, post_fix = '_%s' % (str(frame_count).zfill(frame_digit) + '_res'), ext='.png')
             pred_file = '%s.txt' % savepath.rsplit('.', 1)[0]
-            write_predictions(pred_file, detector, frame, COCO_CATEGORY)
+            write_predictions(pred_file, detections, frame, COCO_CATEGORY)
             frame_count += 1
 
     capture.release()
@@ -211,12 +268,33 @@ def main():
     # model files check and download
     check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)
 
+    # net initialize
+    if args.quantize or args.onnx:
+        detector = onnxruntime.InferenceSession(WEIGHT_PATH)
+    else:
+        detector = ailia.Detector(
+            MODEL_PATH,
+            WEIGHT_PATH,
+            len(COCO_CATEGORY),
+            format=ailia.NETWORK_IMAGE_FORMAT_RGB,
+            channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
+            range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
+            algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
+            env_id=args.env_id,
+        )
+        if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE:
+            detector.set_input_shape(
+                args.detection_width, args.detection_height
+            )
+        if args.profile:
+            detector.set_profile_mode(True)
+
     if args.video is not None:
         # video mode
-        recognize_from_video()
+        recognize_from_video(detector)
     else:
         # image mode
-        recognize_from_image()
+        recognize_from_image(detector)
 
 
 if __name__ == '__main__':

diff --git a/object_detection/yolox/README.md b/object_detection/yolox/README.md
@@ -66,4 +66,8 @@ ONNX opset = 11
 
 [yolox_darknet.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_darknet.opt.onnx.prototxt)
 
-[yolox_x.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_x.opt.onnx.prototxt)
+[yolox_x.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_x.opt.onnx.prototxt)
+
+[yolox_tiny_int8_per_tensor.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_tiny_int8_per_tensor.opt.onnx.prototxt)
+
+[yolox_tiny_int8_per_channel.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_tiny_int8_per_channel.opt.onnx.prototxt)
diff --git a/object_detection/yolox/yolox.py b/object_detection/yolox/yolox.py
@@ -35,7 +35,9 @@
                 'yolox_m': {'input_shape': [640, 640]},
                 'yolox_l': {'input_shape': [640, 640]},
                 'yolox_darknet': {'input_shape': [640, 640]},
-                'yolox_x': {'input_shape': [640, 640]}}
+                'yolox_x': {'input_shape': [640, 640]},
+                'yolox_tiny_int8_per_tensor': {'input_shape': [416, 416]},
+                'yolox_tiny_int8_per_channel': {'input_shape': [416, 416]}}
 
 REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/yolox/'
 
@@ -101,12 +103,18 @@
     default=-1, type=int,
     help='The detection height and height for yolo. (default: auto)'
 )
+
 args = update_parser(parser)
 
 MODEL_NAME = args.model_name
 WEIGHT_PATH = MODEL_NAME + ".opt.onnx"
 MODEL_PATH = MODEL_NAME + ".opt.onnx.prototxt"
 
+QUANTIZED = False
+if "int8" in MODEL_NAME:
+    import onnxruntime
+    QUANTIZED = True
+
 HEIGHT = MODEL_PARAMS[MODEL_NAME]['input_shape'][0]
 WIDTH = MODEL_PARAMS[MODEL_NAME]['input_shape'][1]
 
@@ -128,7 +136,11 @@ def compute():
                 detector.compute(raw_img, args.threshold, args.iou)
                 return None
             else:
-                return detector.run(img[None, :, :, :])
+                if QUANTIZED:
+                    input_name = detector.get_inputs()[0].name
+                    return detector.run([], {input_name:img[None, :, :, :]})
+                else:
+                    return detector.run(img[None, :, :, :])
 
         # inference
         logger.info('Start inference...')
@@ -242,12 +254,16 @@ def main():
         if args.detection_width!=-1 or args.detection_height!=-1:
             detector.set_input_shape(args.detection_width,args.detection_height)
     else:
-        detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
-        if args.detection_width!=-1 or args.detection_height!=-1:
-            global WIDTH,HEIGHT
-            WIDTH=args.detection_width
-            HEIGHT=args.detection_height
-            detector.set_input_shape((1,3,HEIGHT,WIDTH))
+        if QUANTIZED:
+            detector = onnxruntime.InferenceSession(WEIGHT_PATH)
+        else:
+
+            detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
+            if args.detection_width!=-1 or args.detection_height!=-1:
+                global WIDTH,HEIGHT
+                WIDTH=args.detection_width
+                HEIGHT=args.detection_height
+                detector.set_input_shape((1,3,HEIGHT,WIDTH))
 
     if args.video is not None:
         # video mode
Original file line number	Diff line number	Diff line change
Expand Up		@@ -54,3 +54,5 @@ ONNX opset=10
		## Netron

		[yolov3-tiny.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolov3-tiny/yolov3-tiny.opt.onnx.prototxt)

		[yolov3-tiny_int8_per_tensor.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolov3-tiny/yolov3-tiny_int8_per_tensor.opt.onnx.prototxt)