Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions object_detection/yolov3-tiny/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,5 @@ ONNX opset=10
## Netron

[yolov3-tiny.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolov3-tiny/yolov3-tiny.opt.onnx.prototxt)

[yolov3-tiny_int8_per_tensor.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolov3-tiny/yolov3-tiny_int8_per_tensor.opt.onnx.prototxt)
172 changes: 125 additions & 47 deletions object_detection/yolov3-tiny/yolov3-tiny.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import time
import math
import numpy as np

import cv2

Expand All @@ -22,8 +23,6 @@
# ======================
# Parameters
# ======================
WEIGHT_PATH = 'yolov3-tiny.opt.onnx'
MODEL_PATH = 'yolov3-tiny.opt.onnx.prototxt'
REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/yolov3-tiny/'

IMAGE_PATH = 'input.jpg'
Expand Down Expand Up @@ -78,31 +77,93 @@
default=DETECTION_SIZE, type=int,
help='The detection height and height for yolo. (default: 416)'
)
parser.add_argument(
'--onnx',
action='store_true',
help='Use onnx runtime.'
)
parser.add_argument(
'--quantize',
action='store_true',
help='Use quantized model.'
)
args = update_parser(parser)

if args.onnx or args.quantize:
import onnxruntime

if args.quantize:
WEIGHT_PATH = 'yolov3-tiny_int8_per_tensor.opt.onnx'
MODEL_PATH = 'yolov3-tiny_int8_per_tensor.opt.onnx.prototxt'
else:
WEIGHT_PATH = 'yolov3-tiny.opt.onnx'
MODEL_PATH = 'yolov3-tiny.opt.onnx.prototxt'

# ======================
# Main functions
# Quantized model functions
# ======================
def recognize_from_image():
# net initialize
detector = ailia.Detector(
MODEL_PATH,
WEIGHT_PATH,
len(COCO_CATEGORY),
format=ailia.NETWORK_IMAGE_FORMAT_RGB,
channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
env_id=args.env_id,
)
if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE:
detector.set_input_shape(
args.detection_width, args.detection_height
)
if args.profile:
detector.set_profile_mode(True)

def letterbox_image(image, size):
'''resize image with unchanged aspect ratio using padding'''
ih, iw, c = image.shape
w, h = size
scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)

image = cv2.resize(image, (nw,nh))
new_image = np.zeros((w, h, 3))
new_image[(h-nh)//2:(h-nh)//2+nh,(w-nw)//2:(w-nw)//2+nw,0:3] = image[0:nh,0:nw,0:3]
new_image = new_image[:,:,::-1] # bgr to rgb
return new_image, nw, nh, (w - nw)//2, (h - nh) //2

def detect_quantized_model(detector, image):
model_image_size = [args.detection_width, args.detection_height]
boxed_image, nw, nh, ow, oh = letterbox_image(image, model_image_size)

image_data = np.array(boxed_image, dtype='float32')
image_data /= 255.
image_data = np.transpose(image_data, [2, 0, 1])

image_data = np.expand_dims(image_data, 0) # Add batch dimension.
feed_f = dict(zip(['input_1', 'image_shape', 'iou_threshold', 'layer.score_threshold'],
(image_data, np.array([args.detection_height, args.detection_width],dtype='float32').reshape(1, 2),
np.array([args.iou], dtype='float32').reshape(1),
np.array([args.threshold], dtype='float32').reshape(1))))
all_boxes, all_scores, indices = detector.run(None, input_feed=feed_f)

out_boxes, out_scores, out_classes = [], [], []
for idx_ in indices:
out_classes.append(idx_[1])
out_scores.append(all_scores[tuple(idx_)])
idx_1 = (idx_[0], idx_[2])
out_boxes.append(all_boxes[idx_1])

detections = []
for i, c in reversed(list(enumerate(out_classes))):
box = out_boxes[i]
score = out_scores[i]
top, left, bottom, right = box
top = (top - oh) / nh
left = (left - ow) / nw
bottom = (bottom - oh) / nh
right = (right - ow) / nw

obj = ailia.DetectorObject(
category=c,
prob=score,
x=left,
y=top,
w=right - left,
h=bottom - top)
detections.append(obj)

return detections

# ======================
# Main functions
# ======================
def recognize_from_image(detector):
# input image loop
for image_path in args.input:
# prepare input data
Expand All @@ -117,49 +178,41 @@ def recognize_from_image():
total_time = 0
for i in range(args.benchmark_count):
start = int(round(time.time() * 1000))
detector.compute(img, args.threshold, args.iou)
if args.quantize or args.onnx:
detections = detect_quantized_model(detector, img)
else:
detector.compute(img, args.threshold, args.iou)
detections = detector
end = int(round(time.time() * 1000))
if i != 0:
total_time = total_time + (end - start)
logger.info(f'\tailia processing time {end - start} ms')
logger.info(f'\taverage time {total_time / (args.benchmark_count-1)} ms')
else:
detector.compute(img, args.threshold, args.iou)
if args.quantize or args.onnx:
detections = detect_quantized_model(detector, img)
else:
detector.compute(img, args.threshold, args.iou)
detections = detector

# plot result
res_img = plot_results(detector, img, COCO_CATEGORY)
res_img = plot_results(detections, img, COCO_CATEGORY)
savepath = get_savepath(args.savepath, image_path)
logger.info(f'saved at : {savepath}')
cv2.imwrite(savepath, res_img)

# write prediction
if args.write_prediction:
pred_file = '%s.txt' % savepath.rsplit('.', 1)[0]
write_predictions(pred_file, detector, img, COCO_CATEGORY)
write_predictions(pred_file, detections, img, COCO_CATEGORY)

if args.profile:
print(detector.get_summary())

logger.info('Script finished successfully.')


def recognize_from_video():
# net initialize
detector = ailia.Detector(
MODEL_PATH,
WEIGHT_PATH,
len(COCO_CATEGORY),
format=ailia.NETWORK_IMAGE_FORMAT_RGB,
channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
env_id=args.env_id,
)
if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE:
detector.set_input_shape(
args.detection_width, args.detection_height
)

def recognize_from_video(detector):
capture = webcamera_utils.get_capture(args.video)

# create video writer if savepath is specified as video format
Expand All @@ -184,8 +237,12 @@ def recognize_from_video():
break

img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
detector.compute(img, args.threshold, args.iou)
res_img = plot_results(detector, frame, COCO_CATEGORY, False)
if args.quantize or args.onnx:
detections = detect_quantized_model(detector, img)
else:
detector.compute(img, args.threshold, args.iou)
detections = detector
res_img = plot_results(detections, frame, COCO_CATEGORY, False)
cv2.imshow('frame', res_img)
frame_shown = True

Expand All @@ -197,7 +254,7 @@ def recognize_from_video():
if args.write_prediction:
savepath = get_savepath(args.savepath, video_name, post_fix = '_%s' % (str(frame_count).zfill(frame_digit) + '_res'), ext='.png')
pred_file = '%s.txt' % savepath.rsplit('.', 1)[0]
write_predictions(pred_file, detector, frame, COCO_CATEGORY)
write_predictions(pred_file, detections, frame, COCO_CATEGORY)
frame_count += 1

capture.release()
Expand All @@ -211,12 +268,33 @@ def main():
# model files check and download
check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)

# net initialize
if args.quantize or args.onnx:
detector = onnxruntime.InferenceSession(WEIGHT_PATH)
else:
detector = ailia.Detector(
MODEL_PATH,
WEIGHT_PATH,
len(COCO_CATEGORY),
format=ailia.NETWORK_IMAGE_FORMAT_RGB,
channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
env_id=args.env_id,
)
if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE:
detector.set_input_shape(
args.detection_width, args.detection_height
)
if args.profile:
detector.set_profile_mode(True)

if args.video is not None:
# video mode
recognize_from_video()
recognize_from_video(detector)
else:
# image mode
recognize_from_image()
recognize_from_image(detector)


if __name__ == '__main__':
Expand Down
6 changes: 5 additions & 1 deletion object_detection/yolox/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,8 @@ ONNX opset = 11

[yolox_darknet.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_darknet.opt.onnx.prototxt)

[yolox_x.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_x.opt.onnx.prototxt)
[yolox_x.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_x.opt.onnx.prototxt)

[yolox_tiny_int8_per_tensor.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_tiny_int8_per_tensor.opt.onnx.prototxt)

[yolox_tiny_int8_per_channel.opt.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/yolox/yolox_tiny_int8_per_channel.opt.onnx.prototxt)
32 changes: 24 additions & 8 deletions object_detection/yolox/yolox.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
'yolox_m': {'input_shape': [640, 640]},
'yolox_l': {'input_shape': [640, 640]},
'yolox_darknet': {'input_shape': [640, 640]},
'yolox_x': {'input_shape': [640, 640]}}
'yolox_x': {'input_shape': [640, 640]},
'yolox_tiny_int8_per_tensor': {'input_shape': [416, 416]},
'yolox_tiny_int8_per_channel': {'input_shape': [416, 416]}}

REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/yolox/'

Expand Down Expand Up @@ -101,12 +103,18 @@
default=-1, type=int,
help='The detection height and height for yolo. (default: auto)'
)

args = update_parser(parser)

MODEL_NAME = args.model_name
WEIGHT_PATH = MODEL_NAME + ".opt.onnx"
MODEL_PATH = MODEL_NAME + ".opt.onnx.prototxt"

QUANTIZED = False
if "int8" in MODEL_NAME:
import onnxruntime
QUANTIZED = True

HEIGHT = MODEL_PARAMS[MODEL_NAME]['input_shape'][0]
WIDTH = MODEL_PARAMS[MODEL_NAME]['input_shape'][1]

Expand All @@ -128,7 +136,11 @@ def compute():
detector.compute(raw_img, args.threshold, args.iou)
return None
else:
return detector.run(img[None, :, :, :])
if QUANTIZED:
input_name = detector.get_inputs()[0].name
return detector.run([], {input_name:img[None, :, :, :]})
else:
return detector.run(img[None, :, :, :])

# inference
logger.info('Start inference...')
Expand Down Expand Up @@ -242,12 +254,16 @@ def main():
if args.detection_width!=-1 or args.detection_height!=-1:
detector.set_input_shape(args.detection_width,args.detection_height)
else:
detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
if args.detection_width!=-1 or args.detection_height!=-1:
global WIDTH,HEIGHT
WIDTH=args.detection_width
HEIGHT=args.detection_height
detector.set_input_shape((1,3,HEIGHT,WIDTH))
if QUANTIZED:
detector = onnxruntime.InferenceSession(WEIGHT_PATH)
else:

detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
if args.detection_width!=-1 or args.detection_height!=-1:
global WIDTH,HEIGHT
WIDTH=args.detection_width
HEIGHT=args.detection_height
detector.set_input_shape((1,3,HEIGHT,WIDTH))

if args.video is not None:
# video mode
Expand Down