arduino
diff --git a/‎content/hardware/04.pro/shields/portenta-vision-shield/tutorials/user-manual/assets/person-detect-4.gif
3.63 MB b/‎content/hardware/04.pro/shields/portenta-vision-shield/tutorials/user-manual/assets/person-detect-4.gif
3.63 MB
diff --git a/‎content/hardware/04.pro/shields/portenta-vision-shield/tutorials/user-manual/assets/person-detect.gif
-4.52 MB b/‎content/hardware/04.pro/shields/portenta-vision-shield/tutorials/user-manual/assets/person-detect.gif
-4.52 MB
diff --git a/‎content/hardware/04.pro/shields/portenta-vision-shield/tutorials/user-manual/content.md
Lines changed: 75 additions & 36 deletions b/‎content/hardware/04.pro/shields/portenta-vision-shield/tutorials/user-manual/content.md
Lines changed: 75 additions & 36 deletions
@@ -212,7 +212,7 @@ The Portenta Vision Shields's main feature is its onboard camera, based on the H
 
 ![Onboard camera sensor](assets/camera.png)
 
-**Main Camera Features**
+### HM01B0 Camera Features
 
 - Ultra-Low-Power Image Sensor designed for always-on vision devices and applications
 - High-sensitivity 3.6 μ BrightSenseTM pixel technology Window, vertical flip and horizontal mirror readout
@@ -222,15 +222,35 @@ The Portenta Vision Shields's main feature is its onboard camera, based on the H
 
 **Supported Resolutions**
 
-- QQVGA (160x120) at 15, 30, 60 and 120 FPS
+- QQVGA (160x120) at 15, 30, and 60 FPS
 - QVGA (320x240) at 15, 30 and 60 FPS
 - B320X320 (320x320) at 15, 30 and 45 FPS
 
 **Power Consumption**
-- < 1.1 mW QQVGA resolution at 30 FPS,
+- < 1.1 mW QQVGA resolution at 30 FPS
 - < 2 mW QVGA resolution at 30 FPS
 - < 4 mW QVGA resolution at 60 FPS
 
+### HM0360 Camera Features
+
+- Ultra-Low-Power, high sensitivity, low noise VGA sensor
+- On-chip auto exposure / gain and zone detection
+- Automatic wake and sleep operation with programmable event interrupt to host processor
+- Pre-metered exposure provides well exposed first frame and after extended sleep (blanking) period
+- Embedded line provides metadata such as frame count, AE statistics, zone trigger and other interrupt event information
+
+**Supported Resolutions**
+
+- QQVGA (160x120) at 15, 30, and 60 FPS
+- QVGA (320x240) at 15, 30 and 60 FPS
+- VGA (640x480) at 15, 30 and 60 FPS
+
+**Power Consumption**
+
+- 140 µA QVGA resolution at 2 FPS
+- 3.2 mA QVGA resolution at 60 FPS
+- 7.8 mA VGA resolution at 60 FPS
+
 The Vision Shield is primarily intended to be used with the OpenMV MicroPython ecosystem. So, it's recommended to use this IDE for machine vision applications.
 
 ### Snapshot Example
@@ -244,7 +264,7 @@ import machine
 
 sensor.reset()  # Reset and initialize the sensor.
 sensor.set_pixformat(sensor.GRAYSCALE)  # Set pixel format to RGB565 (or GRAYSCALE)
-sensor.set_framesize(sensor.B320X320)  # Set frame size to QVGA (320x240)
+sensor.set_framesize(sensor.QVGA)  # Set frame size to QVGA (320x240)
 sensor.skip_frames(time=2000)  # Wait for settings take effect.
 
 led = machine.LED("LED_BLUE")
@@ -423,7 +443,7 @@ import time
 
 sensor.reset()
 sensor.set_pixformat(sensor.GRAYSCALE)
-sensor.set_framesize(sensor.B320X320)
+sensor.set_framesize(sensor.QVGA)
 sensor.skip_frames(time=2000)
 sensor.set_auto_gain(False)  # must turn this off to prevent image washout...
 clock = time.clock()
@@ -450,8 +470,10 @@ This script will draw a circle on each detected face and will print their coordi
 ```python
 import sensor
 import time
-import tf
+import ml
+from ml.utils import NMS
 import math
+import image
 
 sensor.reset()  # Reset and initialize the sensor.
 sensor.set_pixformat(sensor.GRAYSCALE)  # Set pixel format to RGB565 (or GRAYSCALE)
@@ -460,12 +482,14 @@ sensor.set_windowing((240, 240))  # Set 240x240 window.
 sensor.skip_frames(time=2000)  # Let the camera adjust.
 
 min_confidence = 0.4
+threshold_list = [(math.ceil(min_confidence * 255), 255)]
 
 # Load built-in FOMO face detection model
-labels, net = tf.load_builtin_model("fomo_face_detection")
+model = ml.Model("fomo_face_detection")
+print(model)
 
 # Alternatively, models can be loaded from the filesystem storage.
-# net = tf.load('<object_detection_network>', load_to_fb=True)
+# model = ml.Model('<object_detection_modelwork>.tflite', load_to_fb=True)
 # labels = [line.rstrip('\n') for line in open("labels.txt")]
 
 colors = [  # Add more colors if you are detecting more than 7 types of classes at once.
@@ -478,31 +502,50 @@ colors = [  # Add more colors if you are detecting more than 7 types of classes
     (255, 255, 255),
 ]
 
+
+# FOMO outputs an image per class where each pixel in the image is the centroid of the trained
+# object. So, we will get those output images and then run find_blobs() on them to extract the
+# centroids. We will also run get_stats() on the detected blobs to determine their score.
+# The Non-Max-Supression (NMS) object then filters out overlapping detections and maps their
+# position in the output image back to the original input image. The function then returns a
+# list per class which each contain a list of (rect, score) tuples representing the detected
+# objects.
+def fomo_post_process(model, inputs, outputs):
+    n, oh, ow, oc = model.output_shape[0]
+    nms = NMS(ow, oh, inputs[0].roi)
+    for i in range(oc):
+        img = image.Image(outputs[0][0, :, :, i] * 255)
+        blobs = img.find_blobs(
+            threshold_list, x_stride=1, area_threshold=1, pixels_threshold=1
+        )
+        for b in blobs:
+            rect = b.rect()
+            x, y, w, h = rect
+            score = (
+                img.get_statistics(thresholds=threshold_list, roi=rect).l_mean() / 255.0
+            )
+            nms.add_bounding_box(x, y, x + w, y + h, score, i)
+    return nms.get_bounding_boxes()
+
+
 clock = time.clock()
 while True:
     clock.tick()
 
     img = sensor.snapshot()
 
-    # detect() returns all objects found in the image (split out per class already)
-    # we skip class index 0, as that is the background, and then draw circles of the center
-    # of our objects
-
-    for i, detection_list in enumerate(
-        net.detect(img, thresholds=[(math.ceil(min_confidence * 255), 255)])
-    ):
+    for i, detection_list in enumerate(model.predict([img], callback=fomo_post_process)):
         if i == 0:
             continue  # background class
         if len(detection_list) == 0:
             continue  # no detections for this class?
 
-        print("********** %s **********" % labels[i])
-        for d in detection_list:
-            [x, y, w, h] = d.rect()
+        print("********** %s **********" % model.labels[i])
+        for (x, y, w, h), score in detection_list:
             center_x = math.floor(x + (w / 2))
             center_y = math.floor(y + (h / 2))
-            print(f"x {center_x}\ty {center_y}")
-            img.draw_circle((center_x, center_y, 12), color=colors[i], thickness=2)
+            print(f"x {center_x}\ty {center_y}\tscore {score}")
+            img.draw_circle((center_x, center_y, 12), color=colors[i])
 
     print(clock.fps(), "fps", end="\n")
 ```
@@ -517,40 +560,36 @@ Use the following example script to run the **person detection** model.
 ```python
 import sensor
 import time
-import tf
-import math
-import uos, gc
+import ml
 
 sensor.reset()  # Reset and initialize the sensor.
 sensor.set_pixformat(sensor.GRAYSCALE)  # Set pixel format to RGB565 (or GRAYSCALE)
 sensor.set_framesize(sensor.QVGA)  # Set frame size to QVGA (320x240)
 sensor.set_windowing((240, 240))  # Set 240x240 window.
 sensor.skip_frames(time=2000)  # Let the camera adjust.
 
-net = tf.load('person_detection.tflite', load_to_fb=True)
+model = ml.Model('person_detection.tflite', load_to_fb=True)
 labels = [line.rstrip('\n') for line in open("person_detection.txt")]
-
+sorted_labels = sorted(labels, reverse=False)
 
 clock = time.clock()
 while True:
     clock.tick()
 
     img = sensor.snapshot()
-
-    for obj in net.classify(img, min_scale = 1.0, scale_mul= 0.8, x_overlap = 0.5, y_overlap = 0.5):
-        print("*********** \nDetections at [x=%d,y=%d, w=%d, h=%d]" % obj.rect())
-        img.draw_rectangle(obj.rect())
-        predictions_list = list(zip(labels,obj.output()))
-        
-        for i in range(len(predictions_list)):                 
-            print ("%s = %f" % (predictions_list[i][0], predictions_list[i][1]))
-            
-    print(clock.fps(), "fps", end="\n")
+    
+    sorted_list = sorted(
+        zip(sorted_labels, model.predict([img])[0].flatten().tolist()), key=lambda x: x[1]
+    )
+    for i in range(len(sorted_labels)):
+        print("%s = %f" % (sorted_list[i][0], sorted_list[i][1]))
+    
+    print(clock.fps(), "fps")
 ```
 
 When a person is in the field of view of the camera, you should see the inference result for `person` rising above 70% of certainty.
 
-![Person detection example running](assets/person-detect.gif)
+![Person detection example running](assets/person-detect-4.gif)
 
 ## Microphone