diff --git a/python/OpenVINO_EP/tiny_yolo_v2_object_detection/README.md b/python/OpenVINO_EP/tiny_yolo_v2_object_detection/README.md
index 276b21fa0..30e40b71f 100644
--- a/python/OpenVINO_EP/tiny_yolo_v2_object_detection/README.md
+++ b/python/OpenVINO_EP/tiny_yolo_v2_object_detection/README.md
@@ -9,13 +9,16 @@ The source code for this sample is available [here](https://github.com/microsoft
# How to build
## Prerequisites
-1. [The Intel® Distribution of OpenVINO toolkit](https://docs.openvinotoolkit.org/latest/index.html)
+1. [The Intel® Distribution of OpenVINO toolkit](https://docs.openvino.ai/latest/openvino_docs_install_guides_install_runtime.html)
+ Please select Install OpenVINO Runtime using an installer
+2. Please check also the documentation link for the [installer](https://docs.openvino.ai/latest/openvino_docs_install_guides_installing_openvino_linux.html#doxid-openvino-docs-install-guides-installing-openvino-linux)
2. Download the latest tinyYOLOv2 model from the ONNX Model Zoo.
This model was adapted from [ONNX Model Zoo](https://github.com/onnx/models).Download the latest version of the [tinyYOLOv2](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/tiny-yolov2) model from here.
## Install ONNX Runtime for OpenVINO Execution Provider
+Please install the onnxruntime-openvino python package from [here](https://github.com/intel/onnxruntime/releases/)
-## Build steps
+## Optional Build steps for ONNX Runtime
[build instructions](https://onnxruntime.ai/docs/build/eps.html#openvino)
## Reference Documentation
@@ -26,18 +29,21 @@ The source code for this sample is available [here](https://github.com/microsoft
* numpy version 1.19.5+
* opencv 4.5.1+
* python 3+
-* use any sample video with objects as test input to this sample
-* Download the tinyYOLOv2 model from the ONNX Model Zoo
+* use any sample video with objects as test input to this sample [Download Sample videos](https://github.com/intel-iot-devkit/sample-videos)
+* Download the tinyYOLOv2 model from the [ONNX Model Zoo](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/tiny-yolov2)
Note: For all the python package dependencies requirements, check 'requirements.txt' file in the sample directory. You may also install these dependencies with:
```bash
pip3 install -r requirements.txt
```
-
+### How to run the sample
+```bash
+python3 tiny_yolov2_obj_detection_sample.py --h
+```
## Running the ONNXRuntime OpenVINO Execution Provider sample
```bash
-python3 tiny_yolov2_obj_detection_sample.py
+python3 tiny_yolov2_obj_detection_sample.py --video face-demographics-walking-and-pause.mp4 --model tinyyolov2.onnx --device CPU_FP32
```
## To stop the sample from running
diff --git a/python/OpenVINO_EP/tiny_yolo_v2_object_detection/tiny_yolov2_obj_detection_sample.py b/python/OpenVINO_EP/tiny_yolo_v2_object_detection/tiny_yolov2_obj_detection_sample.py
index e4dbbe291..ef978462d 100644
--- a/python/OpenVINO_EP/tiny_yolo_v2_object_detection/tiny_yolov2_obj_detection_sample.py
+++ b/python/OpenVINO_EP/tiny_yolo_v2_object_detection/tiny_yolov2_obj_detection_sample.py
@@ -1,5 +1,5 @@
'''
-Copyright (C) 2021, Intel Corporation
+Copyright (C) 2021-2022, Intel Corporation
SPDX-License-Identifier: Apache-2.0
'''
@@ -8,15 +8,36 @@
import cv2
import time
import os
+import argparse
+
+# color look up table for different classes for object detection sample
+clut = [(0,0,0),(255,0,0),(255,0,255),(0,0,255),(0,255,0),(0,255,128),
+ (128,255,0),(128,128,0),(0,128,255),(128,0,128),
+ (255,0,128),(128,0,255),(255,128,128),(128,255,128),(255,255,0),
+ (255,128,128),(128,128,255),(255,128,128),(128,255,128),(128,255,128)]
+
+# 20 labels that the tiny-yolov2 model can do the object_detection on
+label = ["aeroplane","bicycle","bird","boat","bottle",
+ "bus","car","cat","chair","cow","diningtable",
+ "dog","horse","motorbike","person","pottedplant",
+ "sheep","sofa","train","tvmonitor"]
+
+def parse_arguments():
+ parser = argparse.ArgumentParser(description='Object Detection using YOLOv2 in OPENCV using OpenVINO Execution Provider for ONNXRuntime')
+ parser.add_argument('--device', default='CPU_FP32', help="Device to perform inference on 'cpu (MLAS)' or on devices supported by OpenVINO-EP [CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16].")
+ parser.add_argument('--video', help='Path to video file.')
+ parser.add_argument('--model', help='Path to model.')
+ args = parser.parse_args()
+ return args
def sigmoid(x, derivative=False):
return x*(1-x) if derivative else 1/(1+np.exp(-x))
def softmax(x):
- scoreMatExp = np.exp(np.asarray(x))
- return scoreMatExp / scoreMatExp.sum(0)
+ score_mat_exp = np.exp(np.asarray(x))
+ return score_mat_exp / score_mat_exp.sum(0)
-def checkModelExtension(fp):
+def check_model_extension(fp):
# Split the extension from the path and normalise it to lowercase.
ext = os.path.splitext(fp)[-1].lower()
@@ -27,7 +48,7 @@ def checkModelExtension(fp):
if not os.path.exists(fp):
raise Exception("[ ERROR ] Path of the onnx model file is Invalid")
-def checkVideoFileExtension(fp):
+def check_video_file_extension(fp):
# Split the extension from the path and normalise it to lowercase.
ext = os.path.splitext(fp)[-1].lower()
# Now we can simply use != to check for inequality, no need for wildcards.
@@ -40,154 +61,157 @@ def checkVideoFileExtension(fp):
if not os.path.exists(fp):
raise Exception("[ ERROR ] Path of the video file is Invalid")
-# color look up table for different classes for object detection sample
-clut = [(0,0,0),(255,0,0),(255,0,255),(0,0,255),(0,255,0),(0,255,128),
- (128,255,0),(128,128,0),(0,128,255),(128,0,128),
- (255,0,128),(128,0,255),(255,128,128),(128,255,128),(255,255,0),
- (255,128,128),(128,128,255),(255,128,128),(128,255,128),(128,255,128)]
-
-# 20 labels that the tiny-yolov2 model can do the object_detection on
-label = ["aeroplane","bicycle","bird","boat","bottle",
- "bus","car","cat","chair","cow","diningtable",
- "dog","horse","motorbike","person","pottedplant",
- "sheep","sofa","train","tvmonitor"]
-
-model_file_path = "tiny_yolo_v2_zoo_model.onnx"
-# TODO: You need to modify the path to the input onnx model based on where it is located on your device after downloading it from ONNX Model zoo.
-
-# Validate model file path
-checkModelExtension(model_file_path)
-
-device = 'CPU_FP32'
-# Set OpenVINO as the Execution provider to infer this model and load the model
-sess = rt.InferenceSession(model_file_path, providers=['OpenVINOExecutionProvider'], provider_options=[{'device_type' : device}])
-
-# Get the input name of the model
-input_name = sess.get_inputs()[0].name
-
-'''
-other 'device_type' options are: (Any hardware target can be assigned if you have the access to it)
-
-'CPU_FP32', 'GPU_FP32', 'GPU_FP16', 'MYRIAD_FP16', 'VAD-M_FP16', 'VAD-F_FP32',
-'HETERO:MYRIAD,CPU', 'MULTI:MYRIAD,GPU,CPU'
+def image_preprocess(frame):
+ in_frame = cv2.resize(frame, (416, 416))
+ preprocessed_image = np.asarray(in_frame)
+ preprocessed_image = preprocessed_image.astype(np.float32)
+ preprocessed_image = preprocessed_image.transpose(2,0,1)
+ #Reshaping the input array to align with the input shape of the model
+ preprocessed_image = preprocessed_image.reshape(1,3,416,416)
+ return preprocessed_image
+
+def postprocess_output(out, frame, x_scale, y_scale, i):
+ out = out[0][0]
+ num_classes = 20
+ anchors = [1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52]
+ existing_labels = {l: [] for l in label}
+
+ #Inside this loop we compute the bounding box b for grid cell (cy, cx)
+ for cy in range(0,13):
+ for cx in range(0,13):
+ for b in range(0,5):
+ # First we read the tx, ty, width(tw), and height(th) for the bounding box from the out array, as well as the confidence score
+ channel = b*(num_classes+5)
+ tx = out[channel ][cy][cx]
+ ty = out[channel+1][cy][cx]
+ tw = out[channel+2][cy][cx]
+ th = out[channel+3][cy][cx]
+ tc = out[channel+4][cy][cx]
+
+ x = (float(cx) + sigmoid(tx))*32
+ y = (float(cy) + sigmoid(ty))*32
+ w = np.exp(tw) * 32 * anchors[2*b]
+ h = np.exp(th) * 32 * anchors[2*b+1]
+
+ #calculating the confidence score
+ confidence = sigmoid(tc) # The confidence value for the bounding box is given by tc
+ classes = np.zeros(num_classes)
+ for c in range(0,num_classes):
+ classes[c] = out[channel + 5 +c][cy][cx]
+ # we take the softmax to turn the array into a probability distribution. And then we pick the class with the largest score as the winner.
+ classes = softmax(classes)
+ detected_class = classes.argmax()
+ # Now we can compute the final score for this bounding box and we only want to keep the ones whose combined score is over a certain threshold
+ if 0.60 < classes[detected_class]*confidence:
+ color =clut[detected_class]
+ x = (x - w/2)*x_scale
+ y = (y - h/2)*y_scale
+ w *= x_scale
+ h *= y_scale
+
+ labelX = int((x+x+w)/2)
+ labelY = int((y+y+h)/2)
+ addLabel = True
+ lab_threshold = 100
+ for point in existing_labels[label[detected_class]]:
+ if labelX < point[0] + lab_threshold and labelX > point[0] - lab_threshold and \
+ labelY < point[1] + lab_threshold and labelY > point[1] - lab_threshold:
+ addLabel = False
+ #Adding class labels to the output of the frame and also drawing a rectangular bounding box around the object detected.
+ if addLabel:
+ cv2.rectangle(frame, (int(x),int(y)),(int(x+w),int(y+h)),color,2)
+ cv2.rectangle(frame, (int(x),int(y-13)),(int(x)+9*len(label[detected_class]),int(y)),color,-1)
+ cv2.putText(frame,label[detected_class],(int(x)+2,int(y)-3),cv2.FONT_HERSHEY_COMPLEX,0.4,(255,255,255),1)
+ existing_labels[label[detected_class]].append((labelX,labelY))
+ print('{} detected in frame {}'.format(label[detected_class],i))
+
-'''
+def show_bbox(device, frame, inference_time):
+ cv2.putText(frame,device,(10,20),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
+ cv2.putText(frame,'FPS: {}'.format(1.0/inference_time),(10,40),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+ cv2.imshow('frame',frame)
-#Path to video file has to be provided
-video_file_path = "sample_demo_video.mp4"
-# TODO: You need to specify the path to your own sample video based on where it is located on your device.
-
-#validate video file input path
-checkVideoFileExtension(video_file_path)
-
-#Path to video file has to be provided
-cap = cv2.VideoCapture(video_file_path)
-
-# capturing different metrics of the image from the video
-fps = cap.get(cv2.CAP_PROP_FPS)
-width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-x_scale = float(width)/416.0 #In the document of tino-yolo-v2, input shape of this network is (1,3,416,416).
-y_scale = float(height)/416.0
-
-# writing the inferencing output as a video to the local disk
-fourcc = cv2.VideoWriter_fourcc(*'XVID')
-output_video_name = device + "_output.avi"
-output_video = cv2.VideoWriter(output_video_name,fourcc, float(17.0), (640,360))
-
-# capturing one frame at a time from the video feed and performing the inference
-i = 0
-while cap.isOpened():
- l_start = time.time()
- ret, frame = cap.read()
- if not ret:
- break
- initial_w = cap.get(3)
- initial_h = cap.get(4)
+def main():
+
+ # Process arguments
+ args = parse_arguments()
+
+ # Validate model file path
+ check_model_extension(args.model)
+ so = rt.SessionOptions()
+ so.log_severity_level = 3
+ if (args.device == 'cpu'):
+ print("Device type selected is 'cpu' which is the default CPU Execution Provider (MLAS)")
+ #Specify the path to the ONNX model on your machine and register the CPU EP
+ sess = rt.InferenceSession(args.model, so, providers=['CPUExecutionProvider'])
+ elif (args.device == 'CPU_FP32' or args.device == 'GPU_FP32' or args.device == 'GPU_FP16' or args.device == 'MYRIAD_FP16' or args.device == 'VADM_FP16'):
+ #Specify the path to the ONNX model on your machine and register the OpenVINO EP
+ sess = rt.InferenceSession(args.model, so, providers=['OpenVINOExecutionProvider'], provider_options=[{'device_type' : args.device}])
+ print("Device type selected is: " + args.device + " using the OpenVINO Execution Provider")
+ '''
+ other 'device_type' options are: (Any hardware target can be assigned if you have the access to it)
+ 'CPU_FP32', 'GPU_FP32', 'GPU_FP16', 'MYRIAD_FP16', 'VAD-M_FP16'
+ '''
+ else:
+ print("Device type selected is not [cpu, CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VADM_FP16]")
+
+ # Get the input name of the model
+ input_name = sess.get_inputs()[0].name
+
+ #validate video file input path
+ check_video_file_extension(args.video)
+
+ #Path to video file has to be provided
+ cap = cv2.VideoCapture(args.video)
+
+ # capturing different metrics of the image from the video
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ x_scale = float(width)/416.0 #In the document of tino-yolo-v2, input shape of this network is (1,3,416,416).
+ y_scale = float(height)/416.0
+
+ # writing the inferencing output as a video to the local disk
+ fourcc = cv2.VideoWriter_fourcc(*'XVID')
+ output_video_name = args.device + "_output.avi"
+ output_video = cv2.VideoWriter(output_video_name,fourcc, float(17.0), (640,360))
+
+ # capturing one frame at a time from the video feed and performing the inference
+ i = 0
+ while cv2.waitKey(1) < 0:
+ l_start = time.time()
+ ret, frame = cap.read()
+ if not ret:
+ break
+ initial_w = cap.get(3)
+ initial_h = cap.get(4)
- # preprocessing the input frame and reshaping it.
- #In the document of tino-yolo-v2, input shape of this network is (1,3,416,416). so we resize the model frame w.r.t that size.
- in_frame = cv2.resize(frame, (416, 416))
- X = np.asarray(in_frame)
- X = X.astype(np.float32)
- X = X.transpose(2,0,1)
- # Reshaping the input array to align with the input shape of the model
- X = X.reshape(1,3,416,416)
+ # preprocessing the input frame and reshaping it.
+ #In the document of tino-yolo-v2, input shape of this network is (1,3,416,416). so we resize the model frame w.r.t that size.
+ preprocessed_image = image_preprocess(frame)
+
+ start = time.time()
+ #Running the session by passing in the input data of the model
+ out = sess.run(None, {input_name: preprocessed_image})
+ end = time.time()
+ inference_time = end - start
+
+ #Get the output
+ postprocess_output(out, frame, x_scale, y_scale, i)
+
+ #Show the Output
+ output_video.write(frame)
+ show_bbox(args.device, frame, inference_time)
- start = time.time()
- #Running the session by passing in the input data of the model
- out = sess.run(None, {input_name: X})
- end = time.time()
- inference_time = end - start
- out = out[0][0]
-
- numClasses = 20
- anchors = [1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52]
-
- existingLabels = {l: [] for l in label}
-
- #Inside this loop we compute the bounding box b for grid cell (cy, cx)
- for cy in range(0,13):
- for cx in range(0,13):
- for b in range(0,5):
- # First we read the tx, ty, width(tw), and height(th) for the bounding box from the out array, as well as the confidence score
- channel = b*(numClasses+5)
- tx = out[channel ][cy][cx]
- ty = out[channel+1][cy][cx]
- tw = out[channel+2][cy][cx]
- th = out[channel+3][cy][cx]
- tc = out[channel+4][cy][cx]
-
- x = (float(cx) + sigmoid(tx))*32
- y = (float(cy) + sigmoid(ty))*32
-
- w = np.exp(tw) * 32 * anchors[2*b ]
- h = np.exp(th) * 32 * anchors[2*b+1]
-
- #calculating the confidence score
- confidence = sigmoid(tc) # The confidence value for the bounding box is given by tc
-
- classes = np.zeros(numClasses)
- for c in range(0,numClasses):
- classes[c] = out[channel + 5 +c][cy][cx]
- # we take the softmax to turn the array into a probability distribution. And then we pick the class with the largest score as the winner.
- classes = softmax(classes)
- detectedClass = classes.argmax()
-
- # Now we can compute the final score for this bounding box and we only want to keep the ones whose combined score is over a certain threshold
- if 0.45< classes[detectedClass]*confidence:
- color =clut[detectedClass]
- x = (x - w/2)*x_scale
- y = (y - h/2)*y_scale
- w *= x_scale
- h *= y_scale
-
- labelX = int((x+x+w)/2)
- labelY = int((y+y+h)/2)
- addLabel = True
- labThreshold = 40
- for point in existingLabels[label[detectedClass]]:
- if labelX < point[0] + labThreshold and labelX > point[0] - labThreshold and \
- labelY < point[1] + labThreshold and labelY > point[1] - labThreshold:
- addLabel = False
- #Adding class labels to the output of the frame and also drawing a rectangular bounding box around the object detected.
- if addLabel:
- cv2.rectangle(frame, (int(x),int(y)),(int(x+w),int(y+h)),color,2)
- cv2.rectangle(frame, (int(x),int(y-13)),(int(x)+9*len(label[detectedClass]),int(y)),color,-1)
- cv2.putText(frame,label[detectedClass],(int(x)+2,int(y)-3),cv2.FONT_HERSHEY_COMPLEX,0.4,(255,255,255),1)
- existingLabels[label[detectedClass]].append((labelX,labelY))
- print('{} detected in frame {}'.format(label[detectedClass],i))
- output_video.write(frame)
- cv2.putText(frame,device,(10,20),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
- cv2.putText(frame,'FPS: {}'.format(1.0/inference_time),(10,40),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
- cv2.imshow('frame',frame)
-
- #Press 'q' to quit the process
- if cv2.waitKey(1) & 0xFF == ord('q'):
- break
- print('Processed Frame {}'.format(i))
- i += 1
- l_end = time.time()
- print('Loop Time = {}'.format(l_end - l_start))
-output_video.release()
-cv2.destroyAllWindows()
\ No newline at end of file
+ #Press 'q' to quit the process
+ print('Processed Frame {}'.format(i))
+ i += 1
+ l_end = time.time()
+ print('Loop Time = {}'.format(l_end - l_start))
+
+ output_video.release()
+ cv2.destroyAllWindows()
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/python/OpenVINO_EP/yolov4_object_detection/README.md b/python/OpenVINO_EP/yolov4_object_detection/README.md
index ffe9c1ca8..988e26d6e 100644
--- a/python/OpenVINO_EP/yolov4_object_detection/README.md
+++ b/python/OpenVINO_EP/yolov4_object_detection/README.md
@@ -16,13 +16,15 @@ The source code for this sample is available [here](https://github.com/microsoft
# How to build
## Prerequisites
-1. [The Intel® Distribution of OpenVINO toolkit](https://docs.openvinotoolkit.org/latest/index.html)
-2. Download the latest tinyYOLOv2 model from the ONNX Model Zoo.
- This model was adapted from [ONNX Model Zoo](https://github.com/onnx/models).Download the latest version of the [YOLOv4](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov4) model from here.
+1. [The Intel® Distribution of OpenVINO toolkit](https://docs.openvino.ai/latest/openvino_docs_install_guides_install_runtime.html)
+ Please select Install OpenVINO Runtime using an installer
+2. Please check also the documentation link for the [installer](https://docs.openvino.ai/latest/openvino_docs_install_guides_installing_openvino_linux.html#doxid-openvino-docs-install-guides-installing-openvino-linux)
+3. Download the latest version of the [YOLOv4](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov4) model from here.
## Install ONNX Runtime for OpenVINO Execution Provider
+Please install the onnxruntime-openvino python package from [here](https://github.com/intel/onnxruntime/releases/)
-## Build steps
+## Optional Build steps for ONNX Runtime
[build instructions](https://onnxruntime.ai/docs/build/eps.html#openvino)
## Reference Documentation
@@ -33,9 +35,8 @@ The source code for this sample is available [here](https://github.com/microsoft
* numpy version 1.19.5+
* opencv 4.5.1+
* python 3+
-* use any sample video with objects as test input to this sample
-* Download the tinyYOLOv2 model from the ONNX Model Zoo
-[Download Sample videos](https://github.com/intel-iot-devkit/sample-videos)
+* use any sample video with objects as test input to this sample [Download Sample videos](https://github.com/intel-iot-devkit/sample-videos)
+* download the Yolov4 model from the [ONNX Model Zoo](https://github.com/onnx/models/tree/main/vision/object_detection_segmentation/yolov4)
Note: For all the python package dependencies requirements, check 'requirements.txt' file in the sample directory. You may also install these dependencies with:
```bash
@@ -51,28 +52,28 @@ python3 yolov4.py --h
### Run the sample on OpenVINO EP
```bash
-python3 yolov4.py --device CPU_FP32 --video bottle-detection.mp4
+python3 yolov4.py --device CPU_FP32 --video classroom.mp4 --model yolov4.onnx
```
Note: You can pick different device options to run on OpenVINO EP like GPU_FP32, GPU_FP16 and MYRIAD_FP16.
### Run the sample on default CPU EP (MLAS)
```bash
-python3 yolov4.py --device cpu --video bottle-detection.mp4
+python3 yolov4.py --device cpu --video classroom.mp4 --model yolov4.onnx
```
### Run the sample with video as Input
```bash
-python3 yolov4.py --device CPU_FP32 --video bottle-detection.mp4
+python3 yolov4.py --device CPU_FP32 --video classroom.mp4 --model yolov4.onnx
```
### Run the sample with Image as Input
```bash
-python3 yolov4.py --device CPU_FP32 --image cat.jpg
+python3 yolov4.py --device CPU_FP32 --image cat.jpg --model yolov4.onnx
```
### Run the sample with Live Input stream Like webcam
```bash
-python3 yolov4.py --device CPU_FP32
+python3 yolov4.py --device CPU_FP32 --model yolov4.onnx
```
## To stop the sample from running
diff --git a/python/OpenVINO_EP/yolov4_object_detection/yolov4.py b/python/OpenVINO_EP/yolov4_object_detection/yolov4.py
index c519e9cf9..526289381 100644
--- a/python/OpenVINO_EP/yolov4_object_detection/yolov4.py
+++ b/python/OpenVINO_EP/yolov4_object_detection/yolov4.py
@@ -1,17 +1,17 @@
'''
-Copyright (C) 2021, Intel Corporation
+Copyright (C) 2021-2022, Intel Corporation
SPDX-License-Identifier: Apache-2.0
Major Portions of this code are copyright of their respective authors and released under the Apache License Version 2.0:
-- onnx, Copyright 2021. For licensing see https://github.com/onnx/models/blob/master/LICENSE
+- onnx, Copyright 2021-2022. For licensing see https://github.com/onnx/models/blob/master/LICENSE
'''
import cv2
import numpy as np
from onnx import numpy_helper
import onnx
+import onnxruntime as rt
import os
from PIL import Image
-import onnxruntime as rt
from scipy import special
import colorsys
import random
@@ -19,12 +19,6 @@
import sys
import time
-parser = argparse.ArgumentParser(description='Object Detection using YOLOv4 in OPENCV using OpenVINO Execution Provider for ONNXRuntime')
-parser.add_argument('--device', default='cpu', help="Device to perform inference on 'cpu (MLAS)' or on devices supported by OpenVINO-EP [CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16].")
-parser.add_argument('--image', help='Path to image file.')
-parser.add_argument('--video', help='Path to video file.')
-args = parser.parse_args()
-
def image_preprocess(image, target_size, gt_boxes=None):
ih, iw = target_size
@@ -47,60 +41,7 @@ def image_preprocess(image, target_size, gt_boxes=None):
gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
return image_padded, gt_boxes
-# Process inputs
-winName = 'Object detection using ONNXRuntime OpenVINO Execution Provider using YoloV4 model'
-cv2.namedWindow(winName, cv2.WINDOW_NORMAL)
-
-outputFile = "yolo_out_py.avi"
-if (args.image):
- # Open the image file
- if not os.path.isfile(args.image):
- print("Input image file ", args.image, " doesn't exist")
- sys.exit(1)
- cap = cv2.VideoCapture(args.image)
- outputFile = args.image[:-4]+'_yolo_out_py.jpg'
-elif (args.video):
- # Open the video file
- if not os.path.isfile(args.video):
- print("Input video file ", args.video, " doesn't exist")
- sys.exit(1)
- cap = cv2.VideoCapture(args.video)
- outputFile = args.video[:-4]+'_yolo_out_py.avi'
-else:
- # Webcam input
- cap = cv2.VideoCapture(0)
-
-# Get the video writer initialized to save the output video
-if (not args.image):
- vid_writer = cv2.VideoWriter(outputFile, cv2.VideoWriter_fourcc('M','J','P','G'), 30, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))
-
-device = args.device
-
-if(args.device == 'cpu'):
- print("Device type selected is 'cpu' which is the default CPU Execution Provider (MLAS)")
- #Specify the path to the ONNX model on your machine and register the CPU EP
- sess = rt.InferenceSession("yolov4/yolov4.onnx", providers=['CPUExecutionProvider'])
-else:
- #Specify the path to the ONNX model on your machine and register the OpenVINO EP
- sess = rt.InferenceSession("yolov4/yolov4.onnx", providers=['OpenVINOExecutionProvider'], provider_options=[{'device_type' : device}])
- print("Device type selected is: " + device + " using the OpenVINO Execution Provider")
- '''
- other 'device_type' options are: (Any hardware target can be assigned if you have the access to it)
- 'CPU_FP32', 'GPU_FP32', 'GPU_FP16', 'MYRIAD_FP16', 'VAD-M_FP16'
- '''
-
-outputs = sess.get_outputs()
-output_names = list(map(lambda output: output.name, outputs))
-input_name = sess.get_inputs()[0].name
-
-def get_anchors(anchors_path, tiny=False):
- '''loads the anchors from a file'''
- with open(anchors_path) as f:
- anchors = f.readline()
- anchors = np.array(anchors.split(','), dtype=np.float32)
- return anchors.reshape(3, 3, 2)
-
-def postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=[1,1,1]):
+def postprocess_bbbox(pred_bbox):
'''define anchor boxes'''
for i, pred in enumerate(pred_bbox):
conv_shape = pred.shape
@@ -111,7 +52,7 @@ def postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=[1,1,1]):
xy_grid = np.expand_dims(np.stack(xy_grid, axis=-1), axis=2)
xy_grid = np.tile(np.expand_dims(xy_grid, axis=0), [1, 1, 1, 3, 1])
- xy_grid = xy_grid.astype(np.float)
+ xy_grid = xy_grid.astype(float)
pred_xy = ((special.expit(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * STRIDES[i]
pred_wh = (np.exp(conv_raw_dwdh) * ANCHORS[i])
@@ -258,54 +199,139 @@ def draw_bbox(image, bboxes, classes=read_class_names("coco.names"), show_label=
return image
+def get_anchors(anchors_path, tiny=False):
+ '''loads the anchors from a file'''
+ with open(anchors_path) as f:
+ anchors = f.readline()
+ anchors = np.array(anchors.split(','), dtype=np.float32)
+ return anchors.reshape(3, 3, 2)
+
#Specify the path to anchors file on your machine
-ANCHORS = "./yolov4_anchors.txt"
+ANCHORS = "./yolov4_anchors.txt"
STRIDES = [8, 16, 32]
XYSCALE = [1.2, 1.1, 1.05]
-
ANCHORS = get_anchors(ANCHORS)
STRIDES = np.array(STRIDES)
-while cv2.waitKey(1) < 0:
-
- # get frame from the video
- hasFrame, frame = cap.read()
-
- # Stop the program if reached end of video
- if not hasFrame:
- print("Done processing !!!")
- print("Output file is stored as ", outputFile)
- cv2.waitKey(3000)
- # Release device
- cap.release()
- break
-
- input_size = 416
-
- original_image = frame
- original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
- original_image_size = original_image.shape[:2]
-
- image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
- image_data = image_data[np.newaxis, ...].astype(np.float32)
+def parse_arguments():
+ parser = argparse.ArgumentParser(description='Object Detection using YOLOv4 in OPENCV using OpenVINO Execution Provider for ONNXRuntime')
+ parser.add_argument('--device', default='CPU_FP32', help="Device to perform inference on 'cpu (MLAS)' or on devices supported by OpenVINO-EP [CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16].")
+ parser.add_argument('--image', help='Path to image file.')
+ parser.add_argument('--video', help='Path to video file.')
+ parser.add_argument('--model', help='Path to model.')
+ args = parser.parse_args()
+ return args
- start = time.time()
- detections = sess.run(output_names, {input_name: image_data})
- end = time.time()
- inference_time = end - start
+def check_model_extension(fp):
+ # Split the extension from the path and normalise it to lowercase.
+ ext = os.path.splitext(fp)[-1].lower()
- pred_bbox = postprocess_bbbox(detections, ANCHORS, STRIDES, XYSCALE)
- bboxes = postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25)
- bboxes = nms(bboxes, 0.213, method='nms')
- image = draw_bbox(original_image, bboxes)
+ # Now we can simply use != to check for inequality, no need for wildcards.
+ if(ext != ".onnx"):
+ raise Exception(fp, "is an unknown file format. Use the model ending with .onnx format")
+
+ if not os.path.exists(fp):
+ raise Exception("[ ERROR ] Path of the onnx model file is Invalid")
- cv2.putText(image,device,(10,20),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
- cv2.putText(image,'FPS: {}'.format(1.0/inference_time),(10,40),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
+def main():
- # Write the frame with the detection boxes
+ # Process arguments
+ args = parse_arguments()
+
+ # Validate model file path
+ check_model_extension(args.model)
+
+ # Process inputs
+ win_name = 'Object detection using ONNXRuntime OpenVINO Execution Provider using YoloV4 model'
+ cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)
+
+ output_file = "yolo_out_py.avi"
if (args.image):
- cv2.imwrite(outputFile, frame.astype(np.uint8))
+ # Open the image file
+ if not os.path.isfile(args.image):
+ print("Input image file ", args.image, " doesn't exist")
+ sys.exit(1)
+ cap = cv2.VideoCapture(args.image)
+ output_file = args.image[:-4]+'_yolo_out_py.jpg'
+ elif (args.video):
+ # Open the video file
+ if not os.path.isfile(args.video):
+ print("Input video file ", args.video, " doesn't exist")
+ sys.exit(1)
+ cap = cv2.VideoCapture(args.video)
+ output_file = args.video[:-4]+'_yolo_out_py.avi'
else:
- vid_writer.write(frame.astype(np.uint8))
+ # Webcam input
+ cap = cv2.VideoCapture(0)
- cv2.imshow(winName, image)
\ No newline at end of file
+ # Get the video writer initialized to save the output video
+ if (not args.image):
+ vid_writer = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc('M','J','P','G'), 30, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))
+
+ # Check the device information and create a session
+ device = args.device
+ so = rt.SessionOptions()
+ so.log_severity_level = 3
+ if(args.device == 'cpu'):
+ print("Device type selected is 'cpu' which is the default CPU Execution Provider (MLAS)")
+ #Specify the path to the ONNX model on your machine and register the CPU EP
+ sess = rt.InferenceSession(args.model, so, providers=['CPUExecutionProvider'])
+ else:
+ #Specify the path to the ONNX model on your machine and register the OpenVINO EP
+ sess = rt.InferenceSession(args.model, so, providers=['OpenVINOExecutionProvider'], provider_options=[{'device_type' : device}])
+ print("Device type selected is: " + device + " using the OpenVINO Execution Provider")
+ '''
+ other 'device_type' options are: (Any hardware target can be assigned if you have the access to it)
+ 'CPU_FP32', 'GPU_FP32', 'GPU_FP16', 'MYRIAD_FP16', 'VAD-M_FP16'
+ '''
+
+ input_name = sess.get_inputs()[0].name
+
+ while cv2.waitKey(1) < 0:
+ # get frame from the video
+ has_frame, frame = cap.read()
+ # Stop the program if reached end of video
+ if not has_frame:
+ print("Done processing !!!")
+ print("Output file is stored as ", output_file)
+ has_frame=False
+ cv2.waitKey(3000)
+ # Release device
+ cap.release()
+ break
+
+ input_size = 416
+ original_image = frame
+ original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
+ original_image_size = original_image.shape[:2]
+
+ image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
+ image_data = image_data[np.newaxis, ...].astype(np.float32)
+
+ outputs = sess.get_outputs()
+ output_names = list(map(lambda output: output.name, outputs))
+
+ start = time.time()
+ detections = sess.run(output_names, {input_name: image_data})
+ end = time.time()
+ inference_time = end - start
+
+ pred_bbox = postprocess_bbbox(detections)
+ bboxes = postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25)
+ bboxes = nms(bboxes, 0.213, method='nms')
+ image = draw_bbox(original_image, bboxes)
+
+ cv2.putText(image,device,(10,20),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
+ cv2.putText(image,'FPS: {}'.format(1.0/inference_time),(10,40),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
+
+ # Write the frame with the detection boxes
+ if (args.image):
+ cv2.imwrite(output_file, frame.astype(np.uint8))
+ else:
+ vid_writer.write(frame.astype(np.uint8))
+
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ cv2.imshow(win_name, image)
+
+if __name__ == "__main__":
+ main()