Skip to content

Commit 39a470a

Browse files
committed
user manual update 1
1 parent 6c4c924 commit 39a470a

File tree

3 files changed

+75
-36
lines changed

3 files changed

+75
-36
lines changed
3.63 MB
Loading

content/hardware/04.pro/shields/portenta-vision-shield/tutorials/user-manual/content.md

Lines changed: 75 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ The Portenta Vision Shields's main feature is its onboard camera, based on the H
212212

213213
![Onboard camera sensor](assets/camera.png)
214214

215-
**Main Camera Features**
215+
### HM01B0 Camera Features
216216

217217
- Ultra-Low-Power Image Sensor designed for always-on vision devices and applications
218218
- High-sensitivity 3.6 μ BrightSenseTM pixel technology Window, vertical flip and horizontal mirror readout
@@ -222,15 +222,35 @@ The Portenta Vision Shields's main feature is its onboard camera, based on the H
222222

223223
**Supported Resolutions**
224224

225-
- QQVGA (160x120) at 15, 30, 60 and 120 FPS
225+
- QQVGA (160x120) at 15, 30, and 60 FPS
226226
- QVGA (320x240) at 15, 30 and 60 FPS
227227
- B320X320 (320x320) at 15, 30 and 45 FPS
228228

229229
**Power Consumption**
230-
- < 1.1 mW QQVGA resolution at 30 FPS,
230+
- < 1.1 mW QQVGA resolution at 30 FPS
231231
- < 2 mW QVGA resolution at 30 FPS
232232
- < 4 mW QVGA resolution at 60 FPS
233233

234+
### HM0360 Camera Features
235+
236+
- Ultra-Low-Power, high sensitivity, low noise VGA sensor
237+
- On-chip auto exposure / gain and zone detection
238+
- Automatic wake and sleep operation with programmable event interrupt to host processor
239+
- Pre-metered exposure provides well exposed first frame and after extended sleep (blanking) period
240+
- Embedded line provides metadata such as frame count, AE statistics, zone trigger and other interrupt event information
241+
242+
**Supported Resolutions**
243+
244+
- QQVGA (160x120) at 15, 30, and 60 FPS
245+
- QVGA (320x240) at 15, 30 and 60 FPS
246+
- VGA (640x480) at 15, 30 and 60 FPS
247+
248+
**Power Consumption**
249+
250+
- 140 µA QVGA resolution at 2 FPS
251+
- 3.2 mA QVGA resolution at 60 FPS
252+
- 7.8 mA VGA resolution at 60 FPS
253+
234254
The Vision Shield is primarily intended to be used with the OpenMV MicroPython ecosystem. So, it's recommended to use this IDE for machine vision applications.
235255

236256
### Snapshot Example
@@ -244,7 +264,7 @@ import machine
244264

245265
sensor.reset() # Reset and initialize the sensor.
246266
sensor.set_pixformat(sensor.GRAYSCALE) # Set pixel format to RGB565 (or GRAYSCALE)
247-
sensor.set_framesize(sensor.B320X320) # Set frame size to QVGA (320x240)
267+
sensor.set_framesize(sensor.QVGA) # Set frame size to QVGA (320x240)
248268
sensor.skip_frames(time=2000) # Wait for settings take effect.
249269

250270
led = machine.LED("LED_BLUE")
@@ -423,7 +443,7 @@ import time
423443

424444
sensor.reset()
425445
sensor.set_pixformat(sensor.GRAYSCALE)
426-
sensor.set_framesize(sensor.B320X320)
446+
sensor.set_framesize(sensor.QVGA)
427447
sensor.skip_frames(time=2000)
428448
sensor.set_auto_gain(False) # must turn this off to prevent image washout...
429449
clock = time.clock()
@@ -450,8 +470,10 @@ This script will draw a circle on each detected face and will print their coordi
450470
```python
451471
import sensor
452472
import time
453-
import tf
473+
import ml
474+
from ml.utils import NMS
454475
import math
476+
import image
455477

456478
sensor.reset() # Reset and initialize the sensor.
457479
sensor.set_pixformat(sensor.GRAYSCALE) # Set pixel format to RGB565 (or GRAYSCALE)
@@ -460,12 +482,14 @@ sensor.set_windowing((240, 240)) # Set 240x240 window.
460482
sensor.skip_frames(time=2000) # Let the camera adjust.
461483

462484
min_confidence = 0.4
485+
threshold_list = [(math.ceil(min_confidence * 255), 255)]
463486

464487
# Load built-in FOMO face detection model
465-
labels, net = tf.load_builtin_model("fomo_face_detection")
488+
model = ml.Model("fomo_face_detection")
489+
print(model)
466490

467491
# Alternatively, models can be loaded from the filesystem storage.
468-
# net = tf.load('<object_detection_network>', load_to_fb=True)
492+
# model = ml.Model('<object_detection_modelwork>.tflite', load_to_fb=True)
469493
# labels = [line.rstrip('\n') for line in open("labels.txt")]
470494

471495
colors = [ # Add more colors if you are detecting more than 7 types of classes at once.
@@ -478,31 +502,50 @@ colors = [ # Add more colors if you are detecting more than 7 types of classes
478502
(255, 255, 255),
479503
]
480504

505+
506+
# FOMO outputs an image per class where each pixel in the image is the centroid of the trained
507+
# object. So, we will get those output images and then run find_blobs() on them to extract the
508+
# centroids. We will also run get_stats() on the detected blobs to determine their score.
509+
# The Non-Max-Supression (NMS) object then filters out overlapping detections and maps their
510+
# position in the output image back to the original input image. The function then returns a
511+
# list per class which each contain a list of (rect, score) tuples representing the detected
512+
# objects.
513+
def fomo_post_process(model, inputs, outputs):
514+
n, oh, ow, oc = model.output_shape[0]
515+
nms = NMS(ow, oh, inputs[0].roi)
516+
for i in range(oc):
517+
img = image.Image(outputs[0][0, :, :, i] * 255)
518+
blobs = img.find_blobs(
519+
threshold_list, x_stride=1, area_threshold=1, pixels_threshold=1
520+
)
521+
for b in blobs:
522+
rect = b.rect()
523+
x, y, w, h = rect
524+
score = (
525+
img.get_statistics(thresholds=threshold_list, roi=rect).l_mean() / 255.0
526+
)
527+
nms.add_bounding_box(x, y, x + w, y + h, score, i)
528+
return nms.get_bounding_boxes()
529+
530+
481531
clock = time.clock()
482532
while True:
483533
clock.tick()
484534

485535
img = sensor.snapshot()
486536

487-
# detect() returns all objects found in the image (split out per class already)
488-
# we skip class index 0, as that is the background, and then draw circles of the center
489-
# of our objects
490-
491-
for i, detection_list in enumerate(
492-
net.detect(img, thresholds=[(math.ceil(min_confidence * 255), 255)])
493-
):
537+
for i, detection_list in enumerate(model.predict([img], callback=fomo_post_process)):
494538
if i == 0:
495539
continue # background class
496540
if len(detection_list) == 0:
497541
continue # no detections for this class?
498542

499-
print("********** %s **********" % labels[i])
500-
for d in detection_list:
501-
[x, y, w, h] = d.rect()
543+
print("********** %s **********" % model.labels[i])
544+
for (x, y, w, h), score in detection_list:
502545
center_x = math.floor(x + (w / 2))
503546
center_y = math.floor(y + (h / 2))
504-
print(f"x {center_x}\ty {center_y}")
505-
img.draw_circle((center_x, center_y, 12), color=colors[i], thickness=2)
547+
print(f"x {center_x}\ty {center_y}\tscore {score}")
548+
img.draw_circle((center_x, center_y, 12), color=colors[i])
506549

507550
print(clock.fps(), "fps", end="\n")
508551
```
@@ -517,40 +560,36 @@ Use the following example script to run the **person detection** model.
517560
```python
518561
import sensor
519562
import time
520-
import tf
521-
import math
522-
import uos, gc
563+
import ml
523564

524565
sensor.reset() # Reset and initialize the sensor.
525566
sensor.set_pixformat(sensor.GRAYSCALE) # Set pixel format to RGB565 (or GRAYSCALE)
526567
sensor.set_framesize(sensor.QVGA) # Set frame size to QVGA (320x240)
527568
sensor.set_windowing((240, 240)) # Set 240x240 window.
528569
sensor.skip_frames(time=2000) # Let the camera adjust.
529570

530-
net = tf.load('person_detection.tflite', load_to_fb=True)
571+
model = ml.Model('person_detection.tflite', load_to_fb=True)
531572
labels = [line.rstrip('\n') for line in open("person_detection.txt")]
532-
573+
sorted_labels = sorted(labels, reverse=False)
533574

534575
clock = time.clock()
535576
while True:
536577
clock.tick()
537578

538579
img = sensor.snapshot()
539-
540-
for obj in net.classify(img, min_scale = 1.0, scale_mul= 0.8, x_overlap = 0.5, y_overlap = 0.5):
541-
print("*********** \nDetections at [x=%d,y=%d, w=%d, h=%d]" % obj.rect())
542-
img.draw_rectangle(obj.rect())
543-
predictions_list = list(zip(labels,obj.output()))
544-
545-
for i in range(len(predictions_list)):
546-
print ("%s = %f" % (predictions_list[i][0], predictions_list[i][1]))
547-
548-
print(clock.fps(), "fps", end="\n")
580+
581+
sorted_list = sorted(
582+
zip(sorted_labels, model.predict([img])[0].flatten().tolist()), key=lambda x: x[1]
583+
)
584+
for i in range(len(sorted_labels)):
585+
print("%s = %f" % (sorted_list[i][0], sorted_list[i][1]))
586+
587+
print(clock.fps(), "fps")
549588
```
550589

551590
When a person is in the field of view of the camera, you should see the inference result for `person` rising above 70% of certainty.
552591

553-
![Person detection example running](assets/person-detect.gif)
592+
![Person detection example running](assets/person-detect-4.gif)
554593

555594
## Microphone
556595

0 commit comments

Comments
 (0)