From 4e66bbc2555fd934b80890f427757bf5bc48c424 Mon Sep 17 00:00:00 2001
From: Basanta Patra <basantax.patra@intel.com>
Date: Fri, 28 Dec 2018 19:38:18 +0530
Subject: [PATCH 1/2] Python version of object detection module

---
 object-detection/ROIviewer.py | 127 +++++++++++++++++++
 object-detection/tutorial1.py | 225 ++++++++++++++++++++++++++++++++++
 2 files changed, 352 insertions(+)
 create mode 100644 object-detection/ROIviewer.py
 create mode 100644 object-detection/tutorial1.py

diff --git a/object-detection/ROIviewer.py b/object-detection/ROIviewer.py
new file mode 100644
index 00000000..afa3e180
--- /dev/null
+++ b/object-detection/ROIviewer.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python
+"""
+ Copyright (c) 2018 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import sys
+import os
+from argparse import ArgumentParser
+import cv2
+import logging as log
+import struct
+import collections
+
+
+
+def build_argparser():
+    parser = ArgumentParser()
+    parser.add_argument("-i", "--input",
+                        help="Path to video file or image. 'cam' for capturing video stream from camera", required=True,
+                        type=str)    
+    parser.add_argument("-l", "--labels", help="Labels mapping file", required=True, type=str)
+    parser.add_argument("--ROIfile",help="Path to ROI file.",default="ROIs.txt",type=str)
+    parser.add_argument("-b", help="Batch size", default=0, type=int)
+
+    return parser
+
+class ROI_data_type:
+    framenum=""
+    labelnum=""
+    confidence=""
+    xmin=""
+    ymin=""
+    xmax=""
+    ymax=""
+    
+def main():
+    log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
+    args = build_argparser().parse_args()
+    batch=args.b
+    ROIs = collections.deque()
+    assert os.path.isfile(args.ROIfile), "Specified ROIs.txt file doesn't exist"
+    
+    fin=open("ROIs.txt",'r')
+    for l in fin:
+        R=ROI_data_type()
+        batchnum,R.framenum,R.labelnum,R.confidence,R.xmin,R.ymin,R.xmax,R.ymax=l.split()
+        if int(batchnum)==batch:
+            ROIs.append(R)
+
+    if args.input == 'cam':
+        input_stream = 0
+    else:
+        input_stream = args.input
+        assert os.path.isfile(args.input), "Specified input file doesn't exist"
+        
+    print("opening", args.input," batchnum ",args.b,"\n")
+    
+    cap = cv2.VideoCapture(input_stream)
+    if not cap.isOpened():
+        print("could not open input video file")
+    framenum=0
+    if len(ROIs)>1:
+        R=ROIs[0]
+    else:
+        print("empty ROI file");
+    if args.labels:
+        with open(args.labels, 'r') as f:
+            labels_map = [x.strip() for x in f]
+    else:
+        labels_map = None
+
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        ncols=cap.get(3)
+        nrows=cap.get(4)
+        while int(R.framenum)<framenum:
+            if len(ROIs)>1:
+                ROIs.popleft()
+                R=ROIs[0];
+            else:
+                break
+        while int(R.framenum)==framenum:
+            xmin = int(float(R.xmin) * float(ncols))
+            ymin = int(float(R.ymin) * float(nrows))
+            xmax = int(float(R.xmax) * float(ncols))
+            ymax = int(float(R.ymax) * float(nrows))
+            
+            class_id=int(float(R.labelnum)+1)
+            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0),4,16,0)
+
+            if len(labels_map)==0:
+                templabel=int(float(R.labelnum))+":"+int(R.confidence*100.0)
+                print(templabel)
+            else:
+                templabel=str(labels_map[int(float(R.labelnum))])+":"+str(int(float(R.confidence)*100.0))
+                
+            cv2.rectangle(frame, (xmin, ymin+32), (xmax, ymin), (155, 155, 155),-1,0)
+            cv2.putText(frame, templabel, (xmin, ymin+24), cv2.FONT_HERSHEY_COMPLEX, 1.1, (0, 0, 0),3)
+            
+            if len(ROIs)>1:
+                ROIs.popleft()
+                R=ROIs[0]
+            else:
+                break
+            
+        cv2.imshow("Detection Results", frame)
+        if cv2.waitKey(30)>=0:
+            break
+        if len(ROIs)<=1:
+            break
+        framenum+=1
+            
+main()
diff --git a/object-detection/tutorial1.py b/object-detection/tutorial1.py
new file mode 100644
index 00000000..d05fe41b
--- /dev/null
+++ b/object-detection/tutorial1.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python
+"""
+ Copyright (c) 2018 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import print_function
+import sys
+import os
+from argparse import ArgumentParser
+import cv2
+import time
+import logging as log
+from openvino.inference_engine import IENetwork, IEPlugin
+from enum import Enum
+import collections
+import xml.etree.ElementTree as ET
+
+
+
+class output_mode_type(Enum):
+    CLASSIFICATION_MODE=1
+    SSD_MODE=2
+
+
+def build_argparser():
+    parser = ArgumentParser()
+    parser.add_argument("-m", "--model", help="Path to an .xml file with a trained model.", required=True, type=str)
+    parser.add_argument("-i", "--input",
+                        help="Path to video file or image. 'cam' for capturing video stream from camera", required=True,
+                        type=str)
+    parser.add_argument("-l", "--cpu_extension",
+                        help="MKLDNN (CPU)-targeted custom layers.Absolute path to a shared library with the kernels "
+                             "impl.", type=str, default=None)
+    parser.add_argument("-pp", "--plugin_dir", help="Path to a plugin folder", type=str, default=None)
+    parser.add_argument("-d", "--device",
+                        help="Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. Demo "
+                             "will look for a suitable plugin for device specified (CPU by default)", default="CPU",
+                        type=str)
+    parser.add_argument("--labels", help="Labels mapping file", default=None, type=str)
+    parser.add_argument("-pt", "--prob_threshold", help="Probability threshold for detections filtering",
+                        default=0.5, type=float)
+    parser.add_argument("-fr", help="maximum frames to process", default=256, type=int)
+    parser.add_argument("-b", help="Batch size", default=1, type=int)
+    
+    return parser
+
+
+def main():
+    log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
+    args = build_argparser().parse_args()
+    model_xml = args.model
+    model_bin = os.path.splitext(model_xml)[0] + ".bin"
+    args.cpu_extension="/opt/intel/computer_vision_sdk_2018.5.445/inference_engine/samples/build/intel64/Release/lib/libcpu_extension.so"
+    preprocess_times = collections.deque()
+    infer_times = collections.deque()
+    postprocess_times = collections.deque()
+    
+    ROIfile=open("ROIs.txt","w"); # output stored here, view with ROIviewer
+    
+    # Plugin initialization for specified device and load extensions library if specified
+    log.info("Initializing plugin for {} device...".format(args.device))
+    plugin = IEPlugin(device=args.device, plugin_dirs=args.plugin_dir)
+    if args.cpu_extension and 'CPU' in args.device:
+        plugin.add_cpu_extension(args.cpu_extension)
+
+        
+    # Read IR
+    log.info("Reading IR...")
+    net = IENetwork.from_ir(model=model_xml, weights=model_bin)
+
+    if plugin.device == "CPU":
+        supported_layers = plugin.get_supported_layers(net)
+        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
+        if len(not_supported_layers) != 0:
+            log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
+                      format(plugin.device, ', '.join(not_supported_layers)))
+            log.error("Please try to specify cpu extensions library path in demo's command line parameters using -l "
+                      "or --cpu_extension command line argument")
+            sys.exit(1)
+
+    #Set Batch Size
+    batchSize = args.b
+    frameLimit = args.fr
+    assert len(net.inputs.keys()) == 1, "Demo supports only single input topologies"
+    assert len(net.outputs) == 1, "Demo supports only single output topologies"
+    input_blob = next(iter(net.inputs))
+    out_blob = next(iter(net.outputs))
+    log.info("Loading IR to the plugin...")
+    exec_net = plugin.load(network=net, num_requests=2)
+    tree = ET.parse(model_xml)
+    root = tree.getroot()
+    
+    # Read and pre-process input image
+    n, c, h, w = net.inputs[input_blob].shape
+    infer_width=w;
+    infer_height=h;
+    num_channels=c;
+    channel_size=infer_width*infer_height
+    full_image_size=channel_size*num_channels
+    
+    print("inputdims=",w,h,c,n)
+    print("outputdims=",root[0][120][2][0][3].text,root[0][120][2][0][2].text,root[0][120][2][0][1].text,root[0][120][2][0][0].text)
+    if int(root[0][120][2][0][3].text)>1 :
+        print("SSD Mode")
+        output_mode=output_mode_type.SSD_MODE
+    else:
+        print("Single Classification Mode")
+        output_mode=CLASSIFICATION_MODE
+        output_data_size=int(root[0][120][2][0][2].text)*int(root[0][120][2][0][1].text)*int(root[0][120][2][0][0].text)
+    del net
+    if args.input == 'cam':
+        input_stream = 0
+    else:
+        input_stream = args.input
+        assert os.path.isfile(args.input), "Specified input file doesn't exist"
+    if args.labels:
+        with open(args.labels, 'r') as f:
+            labels_map = [x.strip() for x in f]
+    else:
+        labels_map = None
+
+    cap = cv2.VideoCapture(input_stream)
+
+    cur_request_id = 0
+    next_request_id = 1
+
+    log.info("Starting inference in async mode...")
+    is_async_mode = True
+    render_time = 0
+
+    framenum = 0
+    process_more_frames=True
+    frames_in_output=batchSize
+    
+    while process_more_frames:
+        time1 = time.time()
+        for mb in range(0 , batchSize):
+            ret, frame = cap.read()
+            if not ret or (framenum >= frameLimit):
+                process_more_frames=False
+                frames_in_output=mb
+                break
+
+            # convert image to blob
+            # Fill input tensor with planes. First b channel, then g and r channels
+            in_frame = cv2.resize(frame, (w, h))
+            in_frame = in_frame.transpose((2, 0, 1))  # Change data layout from HWC to CHW
+            in_frame = in_frame.reshape((n, c, h, w))
+			
+        time2 = time.time()
+        diffPreProcess = time2 - time1
+        if process_more_frames:
+            preprocess_times.append(diffPreProcess*1000)
+            
+        # Main sync point:
+        # in the truly Async mode we start the NEXT infer request, while waiting for the CURRENT to complete
+        # in the regular mode we start the CURRENT request and immediately wait for it's completion
+        inf_start = time.time()
+        if is_async_mode:
+            exec_net.start_async(request_id=next_request_id, inputs={input_blob: in_frame})
+        else:
+            exec_net.start_async(request_id=cur_request_id, inputs={input_blob: in_frame})
+        if exec_net.requests[cur_request_id].wait(-1) == 0:
+            inf_end = time.time()
+            det_time = inf_end - inf_start
+            infer_times.append(det_time*1000)
+            time1 = time.time()
+
+            # Parse detection results of the current request
+            res = exec_net.requests[cur_request_id].outputs[out_blob]
+            for obj in res[0][0]:
+                # Write into ROIs.txt only objects when probability more than specified threshold
+            	if obj[2] > args.prob_threshold:
+                    confidence=obj[2]
+                    locallabel = obj[1] - 1
+                    print(str(0),str(framenum),str(locallabel),str(confidence),str(obj[3]),str(obj[4]),str(obj[5]),str(obj[6]), file=ROIfile)
+
+        
+        sys.stdout.write("\rframenum:"+str(framenum))
+        sys.stdout.flush()
+        render_start = time.time()
+        framenum = framenum+1   
+        time2 = time.time()
+        diffPostProcess = time2 - time1
+        postprocess_times.append(diffPostProcess*1000)
+
+        if is_async_mode:
+            cur_request_id, next_request_id = next_request_id, cur_request_id
+
+            
+    print("\n")
+    preprocesstime=0
+    inferencetime=0
+    postprocesstime=0
+    
+    for obj in preprocess_times:
+         preprocesstime+=obj
+    for obj in infer_times:
+        inferencetime+=obj
+    for obj in postprocess_times:
+        postprocesstime+=obj
+
+        
+    print("Preprocess: ",preprocesstime/(len(preprocess_times)*batchSize),"\tms/frame")
+    print("Inference:  ",inferencetime/(len(infer_times)*batchSize),"\tms/frame")
+    print("Postprocess:" ,postprocesstime/(len(postprocess_times)*batchSize),"\tms/frame")
+
+    del exec_net
+    del plugin
+
+
+if __name__ == '__main__':
+    sys.exit(main() or 0)

From 1be01b55b09877eff230f10acb8e86754a3e70a9 Mon Sep 17 00:00:00 2001
From: Basanta Patra <basantax.patra@intel.com>
Date: Tue, 29 Jan 2019 19:07:40 +0530
Subject: [PATCH 2/2] Python default API used instead of customized code

---
 object-detection/tutorial1.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/object-detection/tutorial1.py b/object-detection/tutorial1.py
index d05fe41b..70aa514c 100644
--- a/object-detection/tutorial1.py
+++ b/object-detection/tutorial1.py
@@ -25,7 +25,6 @@
 from openvino.inference_engine import IENetwork, IEPlugin
 from enum import Enum
 import collections
-import xml.etree.ElementTree as ET
 
 
 
@@ -62,7 +61,8 @@ def main():
     args = build_argparser().parse_args()
     model_xml = args.model
     model_bin = os.path.splitext(model_xml)[0] + ".bin"
-    args.cpu_extension="/opt/intel/computer_vision_sdk_2018.5.445/inference_engine/samples/build/intel64/Release/lib/libcpu_extension.so"
+    args.cpu_extension="/opt/intel/computer_vision_sdk/deployment_tools/inference_engine/samples/build/intel64/Release/lib/libcpu_extension.so"
+
     preprocess_times = collections.deque()
     infer_times = collections.deque()
     postprocess_times = collections.deque()
@@ -78,7 +78,7 @@ def main():
         
     # Read IR
     log.info("Reading IR...")
-    net = IENetwork.from_ir(model=model_xml, weights=model_bin)
+    net = IENetwork(model=model_xml, weights=model_bin)
 
     if plugin.device == "CPU":
         supported_layers = plugin.get_supported_layers(net)
@@ -99,11 +99,10 @@ def main():
     out_blob = next(iter(net.outputs))
     log.info("Loading IR to the plugin...")
     exec_net = plugin.load(network=net, num_requests=2)
-    tree = ET.parse(model_xml)
-    root = tree.getroot()
-    
+       
     # Read and pre-process input image
     n, c, h, w = net.inputs[input_blob].shape
+    output_dims=net.outputs[out_blob].shape
     infer_width=w;
     infer_height=h;
     num_channels=c;
@@ -111,14 +110,14 @@ def main():
     full_image_size=channel_size*num_channels
     
     print("inputdims=",w,h,c,n)
-    print("outputdims=",root[0][120][2][0][3].text,root[0][120][2][0][2].text,root[0][120][2][0][1].text,root[0][120][2][0][0].text)
-    if int(root[0][120][2][0][3].text)>1 :
+    print("outputdims=",output_dims[3],output_dims[2],output_dims[1],output_dims[0])
+    if int(output_dims[3])>1 :
         print("SSD Mode")
         output_mode=output_mode_type.SSD_MODE
     else:
         print("Single Classification Mode")
         output_mode=CLASSIFICATION_MODE
-        output_data_size=int(root[0][120][2][0][2].text)*int(root[0][120][2][0][1].text)*int(root[0][120][2][0][0].text)
+        output_data_size=int(output_dims[2])*int(output_dims[1])*int(output_dims[0])
     del net
     if args.input == 'cam':
         input_stream = 0