diff --git a/.gitignore b/.gitignore index 0b71a4043..b727e1822 100644 --- a/.gitignore +++ b/.gitignore @@ -161,3 +161,4 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ +/sdk/sdk-class-saver-jpeg/data/ diff --git a/gen2-age-gender/MultiMsgSync.py b/gen2-age-gender/MultiMsgSync.py deleted file mode 100644 index c75fe307b..000000000 --- a/gen2-age-gender/MultiMsgSync.py +++ /dev/null @@ -1,56 +0,0 @@ -# Color frames (ImgFrame), object detection (ImgDetections) and recognition (NNData) -# messages arrive to the host all with some additional delay. -# For each ImgFrame there's one ImgDetections msg, which has multiple detections, and for each -# detection there's a NNData msg which contains recognition results. - -# How it works: -# Every ImgFrame, ImgDetections and NNData message has it's own sequence number, by which we can sync messages. - -class TwoStageHostSeqSync: - def __init__(self): - self.msgs = {} - # name: color, detection, or recognition - def add_msg(self, msg, name): - seq = str(msg.getSequenceNum()) - if seq not in self.msgs: - self.msgs[seq] = {} # Create directory for msgs - if "recognition" not in self.msgs[seq]: - self.msgs[seq]["recognition"] = [] # Create recognition array - - if name == "recognition": - # Append recognition msgs to an array - self.msgs[seq]["recognition"].append(msg) - # print(f'Added recognition seq {seq}, total len {len(self.msgs[seq]["recognition"])}') - - elif name == "detection": - # Save detection msg in the directory - self.msgs[seq][name] = msg - self.msgs[seq]["len"] = len(msg.detections) - # print(f'Added detection seq {seq}') - - elif name == "color": # color - # Save color frame in the directory - self.msgs[seq][name] = msg - # print(f'Added frame seq {seq}') - - - def get_msgs(self): - seq_remove = [] # Arr of sequence numbers to get deleted - - for seq, msgs in self.msgs.items(): - seq_remove.append(seq) # Will get removed from dict if we find synced msgs pair - - # Check if we have both detections and color frame with this sequence number - if "color" in msgs and "len" in msgs: - - # Check if all detected objects (faces) have finished recognition inference - if msgs["len"] == len(msgs["recognition"]): - # print(f"Synced msgs with sequence number {seq}", msgs) - - # We have synced msgs, remove previous msgs (memory cleaning) - for rm in seq_remove: - del self.msgs[rm] - - return msgs # Returned synced msgs - - return None # No synced msgs \ No newline at end of file diff --git a/gen2-age-gender/main.py b/gen2-age-gender/main.py index 62217f6e4..b44486e87 100644 --- a/gen2-age-gender/main.py +++ b/gen2-age-gender/main.py @@ -1,221 +1,37 @@ -from MultiMsgSync import TwoStageHostSeqSync -import blobconverter -import cv2 -import depthai as dai +from depthai_sdk import OakCamera, TwoStagePacket, AspectRatioResizeMode, Visualizer, TextPosition, BboxStyle import numpy as np +import cv2 -print("DepthAI version", dai.__version__) -def frame_norm(frame, bbox): - normVals = np.full(len(bbox), frame.shape[0]) - normVals[::2] = frame.shape[1] - return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) - -def create_pipeline(stereo): - pipeline = dai.Pipeline() - - print("Creating Color Camera...") - cam = pipeline.create(dai.node.ColorCamera) - cam.setPreviewSize(1080, 1080) - cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) - cam.setInterleaved(False) - cam.setBoardSocket(dai.CameraBoardSocket.RGB) - - # Workaround: remove in 2.18, use `cam.setPreviewNumFramesPool(10)` - # This manip uses 15*3.5 MB => 52 MB of RAM. - copy_manip = pipeline.create(dai.node.ImageManip) - copy_manip.setNumFramesPool(15) - copy_manip.setMaxOutputFrameSize(3499200) - cam.preview.link(copy_manip.inputImage) - - cam_xout = pipeline.create(dai.node.XLinkOut) - cam_xout.setStreamName("color") - copy_manip.out.link(cam_xout.input) - - # ImageManip will resize the frame before sending it to the Face detection NN node - face_det_manip = pipeline.create(dai.node.ImageManip) - face_det_manip.initialConfig.setResize(300, 300) - face_det_manip.initialConfig.setFrameType(dai.RawImgFrame.Type.RGB888p) - copy_manip.out.link(face_det_manip.inputImage) - - if stereo: - monoLeft = pipeline.create(dai.node.MonoCamera) - monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT) - - monoRight = pipeline.create(dai.node.MonoCamera) - monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT) - - stereo = pipeline.create(dai.node.StereoDepth) - stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) - stereo.setDepthAlign(dai.CameraBoardSocket.RGB) - monoLeft.out.link(stereo.left) - monoRight.out.link(stereo.right) - - # Spatial Detection network if OAK-D - print("OAK-D detected, app will display spatial coordiantes") - face_det_nn = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork) - face_det_nn.setBoundingBoxScaleFactor(0.8) - face_det_nn.setDepthLowerThreshold(100) - face_det_nn.setDepthUpperThreshold(5000) - stereo.depth.link(face_det_nn.inputDepth) - else: # Detection network if OAK-1 - print("OAK-1 detected, app won't display spatial coordiantes") - face_det_nn = pipeline.create(dai.node.MobileNetDetectionNetwork) - - face_det_nn.setConfidenceThreshold(0.5) - face_det_nn.setBlobPath(blobconverter.from_zoo(name="face-detection-retail-0004", shaves=6)) - face_det_nn.input.setQueueSize(1) - face_det_manip.out.link(face_det_nn.input) - - # Send face detections to the host (for bounding boxes) - face_det_xout = pipeline.create(dai.node.XLinkOut) - face_det_xout.setStreamName("detection") - face_det_nn.out.link(face_det_xout.input) - - # Script node will take the output from the face detection NN as an input and set ImageManipConfig - # to the 'recognition_manip' to crop the initial frame - image_manip_script = pipeline.create(dai.node.Script) - face_det_nn.out.link(image_manip_script.inputs['face_det_in']) - - # Remove in 2.18 and use `imgFrame.getSequenceNum()` in Script node - face_det_nn.passthrough.link(image_manip_script.inputs['passthrough']) - - copy_manip.out.link(image_manip_script.inputs['preview']) - - image_manip_script.setScript(""" - import time - msgs = dict() - - def add_msg(msg, name, seq = None): - global msgs - if seq is None: - seq = msg.getSequenceNum() - seq = str(seq) - # node.warn(f"New msg {name}, seq {seq}") - - # Each seq number has it's own dict of msgs - if seq not in msgs: - msgs[seq] = dict() - msgs[seq][name] = msg - - # To avoid freezing (not necessary for this ObjDet model) - if 15 < len(msgs): - node.warn(f"Removing first element! len {len(msgs)}") - msgs.popitem() # Remove first element - - def get_msgs(): - global msgs - seq_remove = [] # Arr of sequence numbers to get deleted - for seq, syncMsgs in msgs.items(): - seq_remove.append(seq) # Will get removed from dict if we find synced msgs pair - # node.warn(f"Checking sync {seq}") - - # Check if we have both detections and color frame with this sequence number - if len(syncMsgs) == 2: # 1 frame, 1 detection - for rm in seq_remove: - del msgs[rm] - # node.warn(f"synced {seq}. Removed older sync values. len {len(msgs)}") - return syncMsgs # Returned synced msgs - return None - - def correct_bb(bb): - if bb.xmin < 0: bb.xmin = 0.001 - if bb.ymin < 0: bb.ymin = 0.001 - if bb.xmax > 1: bb.xmax = 0.999 - if bb.ymax > 1: bb.ymax = 0.999 - return bb - - while True: - time.sleep(0.001) # Avoid lazy looping - - preview = node.io['preview'].tryGet() - if preview is not None: - add_msg(preview, 'preview') - - face_dets = node.io['face_det_in'].tryGet() - if face_dets is not None: - # TODO: in 2.18.0.0 use face_dets.getSequenceNum() - passthrough = node.io['passthrough'].get() - seq = passthrough.getSequenceNum() - add_msg(face_dets, 'dets', seq) - - sync_msgs = get_msgs() - if sync_msgs is not None: - img = sync_msgs['preview'] - dets = sync_msgs['dets'] - for i, det in enumerate(dets.detections): - cfg = ImageManipConfig() - correct_bb(det) - cfg.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax) - # node.warn(f"Sending {i + 1}. det. Seq {seq}. Det {det.xmin}, {det.ymin}, {det.xmax}, {det.ymax}") - cfg.setResize(62, 62) - cfg.setKeepAspectRatio(False) - node.io['manip_cfg'].send(cfg) - node.io['manip_img'].send(img) - """) - - recognition_manip = pipeline.create(dai.node.ImageManip) - recognition_manip.initialConfig.setResize(62, 62) - recognition_manip.setWaitForConfigInput(True) - image_manip_script.outputs['manip_cfg'].link(recognition_manip.inputConfig) - image_manip_script.outputs['manip_img'].link(recognition_manip.inputImage) - - # Second stange recognition NN - print("Creating recognition Neural Network...") - recognition_nn = pipeline.create(dai.node.NeuralNetwork) - recognition_nn.setBlobPath(blobconverter.from_zoo(name="age-gender-recognition-retail-0013", shaves=6)) - recognition_manip.out.link(recognition_nn.input) - - recognition_xout = pipeline.create(dai.node.XLinkOut) - recognition_xout.setStreamName("recognition") - recognition_nn.out.link(recognition_xout.input) +with OakCamera() as oak: + color = oak.create_camera('color') - return pipeline + det = oak.create_nn('face-detection-retail-0004', color) + # AspectRatioResizeMode has to be CROP for 2-stage pipelines at the moment + det.config_nn(aspect_ratio_resize_mode='stretch') -with dai.Device() as device: - stereo = 1 < len(device.getConnectedCameras()) - device.startPipeline(create_pipeline(stereo)) + age_gender = oak.create_nn('age-gender-recognition-retail-0013', input=det) + # age_gender.config_multistage_nn(show_cropped_frames=True) # For debugging - sync = TwoStageHostSeqSync() - queues = {} - # Create output queues - for name in ["color", "detection", "recognition"]: - queues[name] = device.getOutputQueue(name) + def cb(packet: TwoStagePacket, visualizer: Visualizer): + for det, rec in zip(packet.detections, packet.nnData): + age = int(float(np.squeeze(np.array(rec.getLayerFp16('age_conv3')))) * 100) + gender = np.squeeze(np.array(rec.getLayerFp16('prob'))) + gender_str = "woman" if gender[0] > gender[1] else "man" - while True: - for name, q in queues.items(): - # Add all msgs (color frames, object detections and recognitions) to the Sync class. - if q.has(): - sync.add_msg(q.get(), name) + visualizer.add_text(f'{gender_str}\nage: {age}', + bbox=(*det.top_left, *det.bottom_right), + position=TextPosition.BOTTOM_RIGHT) - msgs = sync.get_msgs() - if msgs is not None: - frame = msgs["color"].getCvFrame() - detections = msgs["detection"].detections - recognitions = msgs["recognition"] + frame = visualizer.draw(packet.frame) + cv2.imshow('Age-gender estimation', frame) - for i, detection in enumerate(detections): - bbox = frame_norm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) - # Decoding of recognition results - rec = recognitions[i] - age = int(float(np.squeeze(np.array(rec.getLayerFp16('age_conv3')))) * 100) - gender = np.squeeze(np.array(rec.getLayerFp16('prob'))) - gender_str = "female" if gender[0] > gender[1] else "male" + # Visualize detections on the frame. Don't show the frame but send the packet + # to the callback function (where it will be displayed) + oak.visualize(age_gender, callback=cb).detections(fill_transparency=0.1) + oak.visualize(det.out.passthrough) + oak.visualize(age_gender.out.twostage_crops, scale=3.0) - cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (10, 245, 10), 2) - y = (bbox[1] + bbox[3]) // 2 - cv2.putText(frame, str(age), (bbox[0], y), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (0, 0, 0), 8) - cv2.putText(frame, str(age), (bbox[0], y), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (255, 255, 255), 2) - cv2.putText(frame, gender_str, (bbox[0], y + 30), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (0, 0, 0), 8) - cv2.putText(frame, gender_str, (bbox[0], y + 30), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (255, 255, 255), 2) - if stereo: - # You could also get detection.spatialCoordinates.x and detection.spatialCoordinates.y coordinates - coords = "Z: {:.2f} m".format(detection.spatialCoordinates.z/1000) - cv2.putText(frame, coords, (bbox[0], y + 60), cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 0, 0), 8) - cv2.putText(frame, coords, (bbox[0], y + 60), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 255), 2) - cv2.imshow("Camera", frame) - if cv2.waitKey(1) == ord('q'): - break + # oak.show_graph() # Show pipeline graph, no need for now + oak.start(blocking=True) # This call will block until the app is stopped (by pressing 'Q' button) diff --git a/gen2-age-gender/requirements.txt b/gen2-age-gender/requirements.txt index 637adc03d..ce7f5340f 100644 --- a/gen2-age-gender/requirements.txt +++ b/gen2-age-gender/requirements.txt @@ -1,3 +1 @@ -opencv-python==4.5.1.48 -blobconverter==1.2.8 -depthai==2.17.0.0 \ No newline at end of file +depthai-sdk==1.9.1 diff --git a/gen2-blur-faces/sdk/main.py b/gen2-blur-faces/sdk/main.py new file mode 100644 index 000000000..7bc08b76f --- /dev/null +++ b/gen2-blur-faces/sdk/main.py @@ -0,0 +1,49 @@ +import cv2 +import numpy as np + +from depthai_sdk import OakCamera + +NN_WIDTH, NN_HEIGHT = 300, 300 + + +def callback(packet, visualizer): + frame = packet.frame + + for det in packet.detections: + # Expand the bounding box + x1, y1, x2, y2 = det.top_left[0], det.top_left[1], det.bottom_right[0], det.bottom_right[1] + x1 *= 0.8 + y1 *= 0.8 + x2 *= 1.2 + y2 *= 1.2 + + bbox = np.int0([x1, y1, x2, y2]) + + face = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] + fh, fw, fc = face.shape + frame_h, frame_w, frame_c = frame.shape + + # Create blur mask around the face + mask = np.zeros((frame_h, frame_w), np.uint8) + polygon = cv2.ellipse2Poly((bbox[0] + int(fw / 2), bbox[1] + int(fh / 2)), (int(fw / 2), int(fh / 2)), 0, 0, + 360, delta=1) + cv2.fillConvexPoly(mask, polygon, 255) + + frame_copy = frame.copy() + frame_copy = cv2.blur(frame_copy, (80, 80)) + face_extracted = cv2.bitwise_and(frame_copy, frame_copy, mask=mask) + background_mask = cv2.bitwise_not(mask) + background = cv2.bitwise_and(frame, frame, mask=background_mask) + # Blur the face + frame = cv2.add(background, face_extracted) + + cv2.imshow('Face blurring', frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', fps=30) + det_nn = oak.create_nn('face-detection-retail-0004', color) + det_nn.config_nn(aspect_ratio_resize_mode='letterbox') + + oak.callback(det_nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-blur-faces/sdk/requirements.txt b/gen2-blur-faces/sdk/requirements.txt new file mode 100644 index 000000000..0635858a0 --- /dev/null +++ b/gen2-blur-faces/sdk/requirements.txt @@ -0,0 +1,5 @@ +opencv-python +depthai +depthai-sdk +numpy +blobconverter \ No newline at end of file diff --git a/gen2-class-saver-jpeg/sdk/main.py b/gen2-class-saver-jpeg/sdk/main.py new file mode 100755 index 000000000..b9155d51e --- /dev/null +++ b/gen2-class-saver-jpeg/sdk/main.py @@ -0,0 +1,68 @@ +import csv +import time +from pathlib import Path + +import cv2 +from depthai_sdk import OakCamera + +labels = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', + 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] + +# Create data folder +Path('data').mkdir(parents=True, exist_ok=True) + + +def create_folders(): + Path(f'data/raw').mkdir(parents=True, exist_ok=True) + for text in labels: + Path(f'data/{text}').mkdir(parents=True, exist_ok=True) + + +def callback(packet, visualizer): + original_frame = packet.frame + + timestamp = int(time.time() * 10000) + raw_frame_path = f'data/raw/{timestamp}.jpg' + cv2.imwrite(raw_frame_path, original_frame) + + frame = visualizer.draw(original_frame.copy()) + + for detection in packet.detections: + bbox = (*detection.top_left, *detection.bottom_right) + + det_frame = original_frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] + + cropped_path = f'data/{detection.label}/{timestamp}_cropped.jpg' + overlay_path = f'data/{detection.label}/{timestamp}_overlay.jpg' + + cv2.imwrite(cropped_path, det_frame) + cv2.imwrite(overlay_path, frame) + + data = { + 'timestamp': timestamp, + 'label': detection.label, + 'left': bbox[0], + 'top': bbox[1], + 'right': bbox[2], + 'bottom': bbox[3], + 'raw_frame': raw_frame_path, + 'overlay_frame': overlay_path, + 'cropped_frame': cropped_path, + } + dataset.writerow(data) + + cv2.imshow('Class saver', frame) + + +with OakCamera() as oak, open('data/dataset.csv', 'w') as csvfile: + create_folders() + + dataset = csv.DictWriter(csvfile, ['timestamp', 'label', 'left', 'top', 'right', 'bottom', + 'raw_frame', 'overlay_frame', 'cropped_frame']) + dataset.writeheader() + + color = oak.create_camera('color') + nn = oak.create_nn('mobilenet-ssd', color) + + oak.visualize(nn.out.main, callback=callback, fps=True) + oak.start(blocking=True) diff --git a/gen2-class-saver-jpeg/sdk/requirements.txt b/gen2-class-saver-jpeg/sdk/requirements.txt new file mode 100644 index 000000000..d5890e433 --- /dev/null +++ b/gen2-class-saver-jpeg/sdk/requirements.txt @@ -0,0 +1,3 @@ +opencv-python +depthai +blobconverter \ No newline at end of file diff --git a/gen2-container-encoding/sdk/main.py b/gen2-container-encoding/sdk/main.py new file mode 100644 index 000000000..44e96739e --- /dev/null +++ b/gen2-container-encoding/sdk/main.py @@ -0,0 +1,7 @@ +from depthai_sdk import OakCamera + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='4K', encode='h264') + oak.record(color.out.main, path='records') + + oak.start(blocking=True) diff --git a/gen2-container-encoding/sdk/requirements.txt b/gen2-container-encoding/sdk/requirements.txt new file mode 100644 index 000000000..482463960 --- /dev/null +++ b/gen2-container-encoding/sdk/requirements.txt @@ -0,0 +1,3 @@ +av +depthai +depthai-sdk \ No newline at end of file diff --git a/gen2-crowdcounting/sdk/main.py b/gen2-crowdcounting/sdk/main.py new file mode 100644 index 000000000..811189923 --- /dev/null +++ b/gen2-crowdcounting/sdk/main.py @@ -0,0 +1,32 @@ +import cv2 +import numpy as np + +from depthai_sdk import OakCamera + +NN_WIDTH = 426 +NN_HEIGHT = 240 + + +def callback(packet, visualizer): + nn_data = packet.img_detections + lay1 = np.array(nn_data.getFirstLayerFp16()).reshape((30, 52)) # density map output is 1/8 of input size + count = np.sum(lay1) # predicted count is the sum of the density map + cv2.putText(packet.frame, f'Predicted count: {count:.2f}', (2, 30), + cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2) + + output = np.array(lay1) * 255 + output = output.astype(np.uint8) + output_colors = cv2.applyColorMap(output, cv2.COLORMAP_VIRIDIS) + output_colors = cv2.resize(output_colors, (packet.frame.shape[1], packet.frame.shape[0]), + interpolation=cv2.INTER_LINEAR) + + cv2.addWeighted(packet.frame, 1.0, output_colors, 0.5, 0, packet.frame) + cv2.imshow('Crowd counting', packet.frame) + + +with OakCamera(replay='../vids/vid1.mp4') as oak: + color = oak.create_camera('color', fps=5) + nn = oak.create_nn('../models/vgg_openvino_2021.4_6shave.blob', color) + + oak.callback(nn.out.main, callback=callback) + oak.start(blocking=True) diff --git a/gen2-crowdcounting/sdk/requirements.txt b/gen2-crowdcounting/sdk/requirements.txt new file mode 100644 index 000000000..f37bad8ef --- /dev/null +++ b/gen2-crowdcounting/sdk/requirements.txt @@ -0,0 +1,5 @@ +opencv-python +depthai +depthai-sdk +gdown +numpy \ No newline at end of file diff --git a/gen2-cumulative-object-counting/api/main-api.py b/gen2-cumulative-object-counting/api/main-api.py new file mode 100644 index 000000000..e9f3618f1 --- /dev/null +++ b/gen2-cumulative-object-counting/api/main-api.py @@ -0,0 +1,266 @@ +import blobconverter +import cv2 +import argparse +import numpy as np +import time +import depthai as dai + + +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('-m', '--model', type=str, help='File path of .blob file.') +parser.add_argument('-v', '--video_path', type=str, default='', + help='Path to video. If empty OAK-RGB camera is used. (default=\'\')') +parser.add_argument('-roi', '--roi_position', type=float, + default=0.5, help='ROI Position (0-1)') +parser.add_argument('-a', '--axis', default=True, action='store_false', + help='Axis for cumulative counting (default=x axis)') +parser.add_argument('-sh', '--show', default=True, + action='store_false', help='Show output') +parser.add_argument('-sp', '--save_path', type=str, default='', + help='Path to save the output. If None output won\'t be saved') +parser.add_argument('-s', '--sync', action="store_true", + help="Sync RGB output with NN output", default=False) +args = parser.parse_args() + +if args.model is None: + args.model = blobconverter.from_zoo(name="mobilenet-ssd", shaves=7) + +# Create pipeline +pipeline = dai.Pipeline() + +# Define a neural network that will make predictions based on the source frames +nn = pipeline.create(dai.node.MobileNetDetectionNetwork) +nn.setConfidenceThreshold(0.5) +nn.setBlobPath(args.model) +nn.setNumInferenceThreads(2) +nn.input.setBlocking(False) + +# Define a source for the neural network input +if args.video_path != '': + # Create XLinkIn object as conduit for sending input video file frames + # to the neural network + xinFrame = pipeline.create(dai.node.XLinkIn) + xinFrame.setStreamName("inFrame") + # Connect (link) the video stream from the input queue to the + # neural network input + xinFrame.out.link(nn.input) +else: + # Create color camera node. + cam = pipeline.create(dai.node.ColorCamera) + cam.setPreviewSize(300, 300) + cam.setInterleaved(False) + # Connect (link) the camera preview output to the neural network input + cam.preview.link(nn.input) + + # Create XLinkOut object as conduit for passing camera frames to the host + xoutFrame = pipeline.create(dai.node.XLinkOut) + xoutFrame.setStreamName("outFrame") + cam.preview.link(xoutFrame.input) + +# Create neural network output (inference) stream +nnOut = pipeline.create(dai.node.XLinkOut) +nnOut.setStreamName("nn") +nn.out.link(nnOut.input) + +# Create and configure the object tracker +objectTracker = pipeline.create(dai.node.ObjectTracker) +# objectTracker.setDetectionLabelsToTrack([0]) # track only person +# possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF +objectTracker.setTrackerType(dai.TrackerType.ZERO_TERM_COLOR_HISTOGRAM) +# take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID +objectTracker.setTrackerIdAssignmentPolicy(dai.TrackerIdAssignmentPolicy.SMALLEST_ID) + +# Link detection networks outputs to the object tracker +nn.passthrough.link(objectTracker.inputTrackerFrame) +nn.passthrough.link(objectTracker.inputDetectionFrame) +nn.out.link(objectTracker.inputDetections) + +# Send tracklets to the host +trackerOut = pipeline.create(dai.node.XLinkOut) +trackerOut.setStreamName("tracklets") +objectTracker.out.link(trackerOut.input) + + +# from https://www.pyimagesearch.com/2018/08/13/opencv-people-counter/ +class TrackableObject: + def __init__(self, objectID, centroid): + # store the object ID, then initialize a list of centroids + # using the current centroid + self.objectID = objectID + self.centroids = [centroid] + + # initialize a boolean used to indicate if the object has + # already been counted or not + self.counted = False + +# Pipeline defined, now the device is connected to +with dai.Device(pipeline) as device: + + # Define queues for image frames + if args.video_path != '': + # Input queue for sending video frames to device + qIn_Frame = device.getInputQueue( + name="inFrame", maxSize=4, blocking=False) + else: + # Output queue for retrieving camera frames from device + qOut_Frame = device.getOutputQueue( + name="outFrame", maxSize=4, blocking=False) + + qDet = device.getOutputQueue(name="nn", maxSize=4, blocking=False) + tracklets = device.getOutputQueue("tracklets", 4, False) + + if args.video_path != '': + cap = cv2.VideoCapture(args.video_path) + + if args.save_path: + if args.video_path != '': + width = int(cap.get(3)) + height = int(cap.get(4)) + fps = cap.get(cv2.CAP_PROP_FPS) + else: + width = 300 + height = 300 + fps = 30 + + out = cv2.VideoWriter(args.save_path, cv2.VideoWriter_fourcc( + 'M', 'J', 'P', 'G'), fps, (width, height)) + + def should_run(): + return cap.isOpened() if args.video_path != '' else True + + def get_frame(): + if args.video_path != '': + return cap.read() + else: + in_Frame = qOut_Frame.get() + frame = in_Frame.getCvFrame() + return True, frame + + startTime = time.monotonic() + detections = [] + frame_count = 0 + counter = [0, 0, 0, 0] # left, right, up, down + + trackableObjects = {} + + def to_planar(arr: np.ndarray, shape: tuple) -> np.ndarray: + return cv2.resize(arr, shape).transpose(2, 0, 1).flatten() + + while should_run(): + # Get image frames from camera or video file + read_correctly, frame = get_frame() + if not read_correctly: + break + + if args.video_path != '': + # Prepare image frame from video for sending to device + img = dai.ImgFrame() + img.setType(dai.ImgFrame.Type.BGR888p) + img.setData(to_planar(frame, (300, 300))) + img.setTimestamp(time.monotonic()) + img.setWidth(300) + img.setHeight(300) + # Use input queue to send video frame to device + qIn_Frame.send(img) + else: + in_Frame = qOut_Frame.tryGet() + + if in_Frame is not None: + frame = in_Frame.getCvFrame() + cv2.putText(frame, "NN fps: {:.2f}".format(frame_count / (time.monotonic() - startTime)), + (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color=(255, 255, 255)) + + inDet = qDet.tryGet() + if inDet is not None: + detections = inDet.detections + frame_count += 1 + + track = tracklets.tryGet() + + if frame is not None: + height = frame.shape[0] + width = frame.shape[1] + + if track: + trackletsData = track.tracklets + for t in trackletsData: + to = trackableObjects.get(t.id, None) + + # calculate centroid + roi = t.roi.denormalize(width, height) + x1 = int(roi.topLeft().x) + y1 = int(roi.topLeft().y) + x2 = int(roi.bottomRight().x) + y2 = int(roi.bottomRight().y) + centroid = (int((x2-x1)/2+x1), int((y2-y1)/2+y1)) + + # If new tracklet, save its centroid + if t.status == dai.Tracklet.TrackingStatus.NEW: + to = TrackableObject(t.id, centroid) + elif to is not None: + if args.axis and not to.counted: + x = [c[0] for c in to.centroids] + direction = centroid[0] - np.mean(x) + + if centroid[0] > args.roi_position*width and direction > 0 and np.mean(x) < args.roi_position*width: + counter[1] += 1 + to.counted = True + elif centroid[0] < args.roi_position*width and direction < 0 and np.mean(x) > args.roi_position*width: + counter[0] += 1 + to.counted = True + + elif not args.axis and not to.counted: + y = [c[1] for c in to.centroids] + direction = centroid[1] - np.mean(y) + + if centroid[1] > args.roi_position*height and direction > 0 and np.mean(y) < args.roi_position*height: + counter[3] += 1 + to.counted = True + elif centroid[1] < args.roi_position*height and direction < 0 and np.mean(y) > args.roi_position*height: + counter[2] += 1 + to.counted = True + + to.centroids.append(centroid) + + trackableObjects[t.id] = to + + if t.status != dai.Tracklet.TrackingStatus.LOST and t.status != dai.Tracklet.TrackingStatus.REMOVED: + text = "ID {}".format(t.id) + cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) + cv2.circle( + frame, (centroid[0], centroid[1]), 4, (255, 255, 255), -1) + + # Draw ROI line + if args.axis: + cv2.line(frame, (int(args.roi_position*width), 0), + (int(args.roi_position*width), height), (0xFF, 0, 0), 5) + else: + cv2.line(frame, (0, int(args.roi_position*height)), + (width, int(args.roi_position*height)), (0xFF, 0, 0), 5) + + # display count and status + font = cv2.FONT_HERSHEY_SIMPLEX + if args.axis: + cv2.putText(frame, f'Left: {counter[0]}; Right: {counter[1]}', ( + 10, 35), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX) + else: + cv2.putText(frame, f'Up: {counter[2]}; Down: {counter[3]}', ( + 10, 35), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX) + + if args.show: + cv2.imshow('cumulative_object_counting', frame) + if cv2.waitKey(25) & 0xFF == ord('q'): + break + + if args.save_path: + out.write(frame) + + cv2.destroyAllWindows() + + if args.video_path != '': + cap.release() + + if args.save_path: + out.release() diff --git a/gen2-cumulative-object-counting/api/requirements.txt b/gen2-cumulative-object-counting/api/requirements.txt new file mode 100644 index 000000000..15b7886e9 --- /dev/null +++ b/gen2-cumulative-object-counting/api/requirements.txt @@ -0,0 +1,5 @@ +numpy>=1.21 +opencv_python==4.5.1.48 +depthai==2.16.0.0 +scipy==1.4.1 +blobconverter==1.2.9 diff --git a/gen2-cumulative-object-counting/main.py b/gen2-cumulative-object-counting/main.py index e9f3618f1..c9c031504 100644 --- a/gen2-cumulative-object-counting/main.py +++ b/gen2-cumulative-object-counting/main.py @@ -1,88 +1,15 @@ -import blobconverter import cv2 -import argparse import numpy as np -import time -import depthai as dai +from depthai import TrackerType, TrackerIdAssignmentPolicy, Tracklet +from depthai_sdk import OakCamera, BboxStyle, TextPosition +tracked_objects = {} +counter = {'up': 0, 'down': 0, 'left': 0, 'right': 0} -parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) -parser.add_argument('-m', '--model', type=str, help='File path of .blob file.') -parser.add_argument('-v', '--video_path', type=str, default='', - help='Path to video. If empty OAK-RGB camera is used. (default=\'\')') -parser.add_argument('-roi', '--roi_position', type=float, - default=0.5, help='ROI Position (0-1)') -parser.add_argument('-a', '--axis', default=True, action='store_false', - help='Axis for cumulative counting (default=x axis)') -parser.add_argument('-sh', '--show', default=True, - action='store_false', help='Show output') -parser.add_argument('-sp', '--save_path', type=str, default='', - help='Path to save the output. If None output won\'t be saved') -parser.add_argument('-s', '--sync', action="store_true", - help="Sync RGB output with NN output", default=False) -args = parser.parse_args() +ROI_POS = 0.5 +AXIS = 0 # 0 - vertical, 1 - horizontal -if args.model is None: - args.model = blobconverter.from_zoo(name="mobilenet-ssd", shaves=7) -# Create pipeline -pipeline = dai.Pipeline() - -# Define a neural network that will make predictions based on the source frames -nn = pipeline.create(dai.node.MobileNetDetectionNetwork) -nn.setConfidenceThreshold(0.5) -nn.setBlobPath(args.model) -nn.setNumInferenceThreads(2) -nn.input.setBlocking(False) - -# Define a source for the neural network input -if args.video_path != '': - # Create XLinkIn object as conduit for sending input video file frames - # to the neural network - xinFrame = pipeline.create(dai.node.XLinkIn) - xinFrame.setStreamName("inFrame") - # Connect (link) the video stream from the input queue to the - # neural network input - xinFrame.out.link(nn.input) -else: - # Create color camera node. - cam = pipeline.create(dai.node.ColorCamera) - cam.setPreviewSize(300, 300) - cam.setInterleaved(False) - # Connect (link) the camera preview output to the neural network input - cam.preview.link(nn.input) - - # Create XLinkOut object as conduit for passing camera frames to the host - xoutFrame = pipeline.create(dai.node.XLinkOut) - xoutFrame.setStreamName("outFrame") - cam.preview.link(xoutFrame.input) - -# Create neural network output (inference) stream -nnOut = pipeline.create(dai.node.XLinkOut) -nnOut.setStreamName("nn") -nn.out.link(nnOut.input) - -# Create and configure the object tracker -objectTracker = pipeline.create(dai.node.ObjectTracker) -# objectTracker.setDetectionLabelsToTrack([0]) # track only person -# possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF -objectTracker.setTrackerType(dai.TrackerType.ZERO_TERM_COLOR_HISTOGRAM) -# take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID -objectTracker.setTrackerIdAssignmentPolicy(dai.TrackerIdAssignmentPolicy.SMALLEST_ID) - -# Link detection networks outputs to the object tracker -nn.passthrough.link(objectTracker.inputTrackerFrame) -nn.passthrough.link(objectTracker.inputDetectionFrame) -nn.out.link(objectTracker.inputDetections) - -# Send tracklets to the host -trackerOut = pipeline.create(dai.node.XLinkOut) -trackerOut.setStreamName("tracklets") -objectTracker.out.link(trackerOut.input) - - -# from https://www.pyimagesearch.com/2018/08/13/opencv-people-counter/ class TrackableObject: def __init__(self, objectID, centroid): # store the object ID, then initialize a list of centroids @@ -94,173 +21,86 @@ def __init__(self, objectID, centroid): # already been counted or not self.counted = False -# Pipeline defined, now the device is connected to -with dai.Device(pipeline) as device: - - # Define queues for image frames - if args.video_path != '': - # Input queue for sending video frames to device - qIn_Frame = device.getInputQueue( - name="inFrame", maxSize=4, blocking=False) - else: - # Output queue for retrieving camera frames from device - qOut_Frame = device.getOutputQueue( - name="outFrame", maxSize=4, blocking=False) - - qDet = device.getOutputQueue(name="nn", maxSize=4, blocking=False) - tracklets = device.getOutputQueue("tracklets", 4, False) - - if args.video_path != '': - cap = cv2.VideoCapture(args.video_path) - - if args.save_path: - if args.video_path != '': - width = int(cap.get(3)) - height = int(cap.get(4)) - fps = cap.get(cv2.CAP_PROP_FPS) - else: - width = 300 - height = 300 - fps = 30 - - out = cv2.VideoWriter(args.save_path, cv2.VideoWriter_fourcc( - 'M', 'J', 'P', 'G'), fps, (width, height)) - - def should_run(): - return cap.isOpened() if args.video_path != '' else True - - def get_frame(): - if args.video_path != '': - return cap.read() - else: - in_Frame = qOut_Frame.get() - frame = in_Frame.getCvFrame() - return True, frame - - startTime = time.monotonic() - detections = [] - frame_count = 0 - counter = [0, 0, 0, 0] # left, right, up, down - - trackableObjects = {} - - def to_planar(arr: np.ndarray, shape: tuple) -> np.ndarray: - return cv2.resize(arr, shape).transpose(2, 0, 1).flatten() - - while should_run(): - # Get image frames from camera or video file - read_correctly, frame = get_frame() - if not read_correctly: - break - - if args.video_path != '': - # Prepare image frame from video for sending to device - img = dai.ImgFrame() - img.setType(dai.ImgFrame.Type.BGR888p) - img.setData(to_planar(frame, (300, 300))) - img.setTimestamp(time.monotonic()) - img.setWidth(300) - img.setHeight(300) - # Use input queue to send video frame to device - qIn_Frame.send(img) - else: - in_Frame = qOut_Frame.tryGet() - - if in_Frame is not None: - frame = in_Frame.getCvFrame() - cv2.putText(frame, "NN fps: {:.2f}".format(frame_count / (time.monotonic() - startTime)), - (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color=(255, 255, 255)) - - inDet = qDet.tryGet() - if inDet is not None: - detections = inDet.detections - frame_count += 1 - - track = tracklets.tryGet() - - if frame is not None: - height = frame.shape[0] - width = frame.shape[1] - - if track: - trackletsData = track.tracklets - for t in trackletsData: - to = trackableObjects.get(t.id, None) - - # calculate centroid - roi = t.roi.denormalize(width, height) - x1 = int(roi.topLeft().x) - y1 = int(roi.topLeft().y) - x2 = int(roi.bottomRight().x) - y2 = int(roi.bottomRight().y) - centroid = (int((x2-x1)/2+x1), int((y2-y1)/2+y1)) - - # If new tracklet, save its centroid - if t.status == dai.Tracklet.TrackingStatus.NEW: - to = TrackableObject(t.id, centroid) - elif to is not None: - if args.axis and not to.counted: - x = [c[0] for c in to.centroids] - direction = centroid[0] - np.mean(x) - - if centroid[0] > args.roi_position*width and direction > 0 and np.mean(x) < args.roi_position*width: - counter[1] += 1 - to.counted = True - elif centroid[0] < args.roi_position*width and direction < 0 and np.mean(x) > args.roi_position*width: - counter[0] += 1 - to.counted = True - - elif not args.axis and not to.counted: - y = [c[1] for c in to.centroids] - direction = centroid[1] - np.mean(y) - - if centroid[1] > args.roi_position*height and direction > 0 and np.mean(y) < args.roi_position*height: - counter[3] += 1 - to.counted = True - elif centroid[1] < args.roi_position*height and direction < 0 and np.mean(y) > args.roi_position*height: - counter[2] += 1 - to.counted = True - - to.centroids.append(centroid) - - trackableObjects[t.id] = to - - if t.status != dai.Tracklet.TrackingStatus.LOST and t.status != dai.Tracklet.TrackingStatus.REMOVED: - text = "ID {}".format(t.id) - cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) - cv2.circle( - frame, (centroid[0], centroid[1]), 4, (255, 255, 255), -1) - - # Draw ROI line - if args.axis: - cv2.line(frame, (int(args.roi_position*width), 0), - (int(args.roi_position*width), height), (0xFF, 0, 0), 5) - else: - cv2.line(frame, (0, int(args.roi_position*height)), - (width, int(args.roi_position*height)), (0xFF, 0, 0), 5) - - # display count and status - font = cv2.FONT_HERSHEY_SIMPLEX - if args.axis: - cv2.putText(frame, f'Left: {counter[0]}; Right: {counter[1]}', ( - 10, 35), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX) - else: - cv2.putText(frame, f'Up: {counter[2]}; Down: {counter[3]}', ( - 10, 35), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX) - - if args.show: - cv2.imshow('cumulative_object_counting', frame) - if cv2.waitKey(25) & 0xFF == ord('q'): - break - - if args.save_path: - out.write(frame) - - cv2.destroyAllWindows() - - if args.video_path != '': - cap.release() - if args.save_path: - out.release() +def get_centroid(roi): + x1 = roi.topLeft().x + y1 = roi.topLeft().y + x2 = roi.bottomRight().x + y2 = roi.bottomRight().y + return ((x2 - x1) / 2 + x1, (y2 - y1) / 2 + y1) + + +def callback(packet): + height, width = packet.frame.shape[:2] + visualizer = packet.visualizer + + for t in packet.daiTracklets.tracklets: + to = tracked_objects.get(t.id, None) + + # calculate centroid + roi = t.roi.denormalize(width, height) + x1 = int(roi.topLeft().x) + y1 = int(roi.topLeft().y) + x2 = int(roi.bottomRight().x) + y2 = int(roi.bottomRight().y) + centroid = (int((x2 - x1) / 2 + x1), int((y2 - y1) / 2 + y1)) + + # If new tracklet, save its centroid + if t.status == Tracklet.TrackingStatus.NEW: + to = TrackableObject(t.id, centroid) + elif to is not None and not to.counted: + if AXIS == 0: + x = [c[0] for c in to.centroids] + direction = centroid[0] - np.mean(x) + + if centroid[0] > ROI_POS * width and direction > 0 and np.mean(x) < ROI_POS * width: + counter['right'] += 1 + to.counted = True + elif centroid[0] < ROI_POS * width and direction < 0 and np.mean(x) > ROI_POS * width: + counter['left'] += 1 + to.counted = True + + elif AXIS == 1: + y = [c[1] for c in to.centroids] + direction = centroid[1] - np.mean(y) + + if centroid[1] > ROI_POS * height and direction > 0 and np.mean(y) < ROI_POS * height: + counter['down'] += 1 + to.counted = True + elif centroid[1] < ROI_POS * height and direction < 0 and np.mean(y) > ROI_POS * height: + counter['up'] += 1 + to.counted = True + + to.centroids.append(centroid) + tracked_objects[t.id] = to + + counter_str = f'Up: {counter["up"]}\nDown: {counter["down"]}\n' \ + if AXIS == 1 \ + else f'Left: {counter["left"]}\nRight: {counter["right"]}' + + pt1 = (0, int(ROI_POS * height)) if AXIS==1 else (int(ROI_POS * width), 0) + pt2 = (width, int(ROI_POS * height)) if AXIS==1 else (int(ROI_POS * width), height) + visualizer.add_line(pt1=pt1, pt2=pt2, color=(255, 255, 255), thickness=1) + + visualizer.add_text(counter_str, outline=True, position=TextPosition.MID_LEFT, size=1.2) + frame = visualizer.draw(packet.frame) + cv2.imshow('People tracking', frame) + + +with OakCamera() as oak: + color = oak.create_camera('color') + nn = oak.create_nn('yolov7tiny_coco_640x352', color, tracker=True) + + nn.config_tracker( + tracker_type=TrackerType.ZERO_TERM_COLOR_HISTOGRAM, + track_labels=[0], + assignment_policy=TrackerIdAssignmentPolicy.UNIQUE_ID + ) + + visualizer = oak.visualize(nn.out.tracker, callback=callback, fps=True) + visualizer.detections( + hide_label=True, + bbox_style=BboxStyle.ROUNDED_RECTANGLE + ) + + oak.start(blocking=True) diff --git a/gen2-cumulative-object-counting/requirements.txt b/gen2-cumulative-object-counting/requirements.txt index 15b7886e9..9ef60c0cc 100644 --- a/gen2-cumulative-object-counting/requirements.txt +++ b/gen2-cumulative-object-counting/requirements.txt @@ -1,5 +1,6 @@ numpy>=1.21 -opencv_python==4.5.1.48 -depthai==2.16.0.0 -scipy==1.4.1 -blobconverter==1.2.9 +opencv_python +depthai +depthai-sdk +scipy +blobconverter diff --git a/gen2-custom-models/concat.py b/gen2-custom-models/concat.py index 45f457695..b0de31f4b 100644 --- a/gen2-custom-models/concat.py +++ b/gen2-custom-models/concat.py @@ -1,63 +1,63 @@ -import numpy as np -import cv2 -import depthai as dai - -SHAPE = 300 - -p = dai.Pipeline() -p.setOpenVINOVersion(dai.OpenVINO.VERSION_2021_4) - -camRgb = p.create(dai.node.ColorCamera) -camRgb.setPreviewSize(SHAPE, SHAPE) -camRgb.setInterleaved(False) -camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) - -left = p.create(dai.node.MonoCamera) -left.setBoardSocket(dai.CameraBoardSocket.LEFT) -left.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - -# ImageManip for cropping (face detection NN requires input image of 300x300) and to change frame type -manipLeft = p.create(dai.node.ImageManip) -manipLeft.initialConfig.setResize(300, 300) -# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case) -manipLeft.initialConfig.setFrameType(dai.RawImgFrame.Type.BGR888p) -left.out.link(manipLeft.inputImage) - -right = p.create(dai.node.MonoCamera) -right.setBoardSocket(dai.CameraBoardSocket.RIGHT) -right.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - -# ImageManip for cropping (face detection NN requires input image of 300x300) and to change frame type -manipRight = p.create(dai.node.ImageManip) -manipRight.initialConfig.setResize(300, 300) -# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case) -manipRight.initialConfig.setFrameType(dai.RawImgFrame.Type.BGR888p) -right.out.link(manipRight.inputImage) - -# NN that detects faces in the image -nn = p.create(dai.node.NeuralNetwork) -nn.setBlobPath("models/concat_openvino_2021.4_6shave.blob") -nn.setNumInferenceThreads(2) - -manipLeft.out.link(nn.inputs['img1']) -camRgb.preview.link(nn.inputs['img2']) -manipRight.out.link(nn.inputs['img3']) - -# Send bouding box from the NN to the host via XLink -nn_xout = p.create(dai.node.XLinkOut) -nn_xout.setStreamName("nn") -nn.out.link(nn_xout.input) - -# Pipeline is defined, now we can connect to the device -with dai.Device(p) as device: - qNn = device.getOutputQueue(name="nn", maxSize=4, blocking=False) - shape = (3, SHAPE, SHAPE * 3) - - while True: - inNn = np.array(qNn.get().getData()) - frame = inNn.view(np.float16).reshape(shape).transpose(1, 2, 0).astype(np.uint8).copy() - - cv2.imshow("Concat", frame) - - if cv2.waitKey(1) == ord('q'): - break +import numpy as np +import cv2 +import depthai as dai + +SHAPE = 300 + +p = dai.Pipeline() +p.setOpenVINOVersion(dai.OpenVINO.VERSION_2021_4) + +camRgb = p.create(dai.node.ColorCamera) +camRgb.setPreviewSize(SHAPE, SHAPE) +camRgb.setInterleaved(False) +camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) + +left = p.create(dai.node.MonoCamera) +left.setBoardSocket(dai.CameraBoardSocket.LEFT) +left.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) + +# ImageManip for cropping (face detection NN requires input image of 300x300) and to change frame type +manipLeft = p.create(dai.node.ImageManip) +manipLeft.initialConfig.setResize(300, 300) +# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case) +manipLeft.initialConfig.setFrameType(dai.RawImgFrame.Type.BGR888p) +left.out.link(manipLeft.inputImage) + +right = p.create(dai.node.MonoCamera) +right.setBoardSocket(dai.CameraBoardSocket.RIGHT) +right.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) + +# ImageManip for cropping (face detection NN requires input image of 300x300) and to change frame type +manipRight = p.create(dai.node.ImageManip) +manipRight.initialConfig.setResize(300, 300) +# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case) +manipRight.initialConfig.setFrameType(dai.RawImgFrame.Type.BGR888p) +right.out.link(manipRight.inputImage) + +# NN that detects faces in the image +nn = p.create(dai.node.NeuralNetwork) +nn.setBlobPath("models/concat_openvino_2021.4_6shave.blob") +nn.setNumInferenceThreads(2) + +manipLeft.out.link(nn.inputs['img1']) +camRgb.preview.link(nn.inputs['img2']) +manipRight.out.link(nn.inputs['img3']) + +# Send bouding box from the NN to the host via XLink +nn_xout = p.create(dai.node.XLinkOut) +nn_xout.setStreamName("nn") +nn.out.link(nn_xout.input) + +# Pipeline is defined, now we can connect to the device +with dai.Device(p) as device: + qNn = device.getOutputQueue(name="nn", maxSize=4, blocking=False) + shape = (3, SHAPE, SHAPE * 3) + + while True: + inNn = np.array(qNn.get().getData()) + frame = inNn.view(np.float16).reshape(shape).transpose(1, 2, 0).astype(np.uint8).copy() + + cv2.imshow("Concat", frame) + + if cv2.waitKey(1) == ord('q'): + break diff --git a/gen2-custom-models/sdk/blur.py b/gen2-custom-models/sdk/blur.py new file mode 100644 index 000000000..1de0038f6 --- /dev/null +++ b/gen2-custom-models/sdk/blur.py @@ -0,0 +1,23 @@ +import cv2 +import numpy as np + +from depthai_sdk import OakCamera + + +def get_frame(img_frame, shape): + return np.array(img_frame.getData()).view(np.float16).reshape(shape).transpose(1, 2, 0).astype(np.uint8) + + +def callback(packet, visualizer): + edge_frame = get_frame(packet.img_detections, (3, 300, 300)) + + cv2.imshow("Blurred image", edge_frame) + cv2.imshow("Color", packet.frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + nn = oak.create_nn('../models/blur_simplified_openvino_2021.4_6shave.blob', color) + + oak.callback(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-custom-models/sdk/concat.py b/gen2-custom-models/sdk/concat.py new file mode 100644 index 000000000..5d0c5c0ec --- /dev/null +++ b/gen2-custom-models/sdk/concat.py @@ -0,0 +1,29 @@ +import cv2 +import depthai as dai +import numpy as np + +from depthai_sdk import OakCamera, DetectionPacket + + +def get_frame(data: dai.NNData, shape): + diff = np.array(data.getFirstLayerFp16()).reshape(shape) + colorize = cv2.normalize(diff, None, 255, 0, cv2.NORM_INF, cv2.CV_8UC1) + return cv2.applyColorMap(colorize, cv2.COLORMAP_JET) + + +def callback(packet: DetectionPacket, visualizer): + edge_frame = get_frame(packet.img_detections, (3, 300, 300)) + + cv2.imshow("Laplacian edge detection", edge_frame) + cv2.imshow("Color", packet.frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + + raise NotImplementedError('TODO: Multiple NN inputs') + + nn = oak.create_nn('../models/concat_openvino_2021.4_6shave.blob', color) + + oak.callback(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-custom-models/sdk/diff.py b/gen2-custom-models/sdk/diff.py new file mode 100644 index 000000000..5b78e956b --- /dev/null +++ b/gen2-custom-models/sdk/diff.py @@ -0,0 +1,29 @@ +import cv2 +import depthai as dai +import numpy as np + +from depthai_sdk import OakCamera, DetectionPacket + + +def get_frame(data: dai.NNData, shape): + diff = np.array(data.getFirstLayerFp16()).reshape(shape) + colorize = cv2.normalize(diff, None, 255, 0, cv2.NORM_INF, cv2.CV_8UC1) + return cv2.applyColorMap(colorize, cv2.COLORMAP_JET) + + +def callback(packet: DetectionPacket, visualizer): + edge_frame = get_frame(packet.img_detections, (3, 300, 300)) + + cv2.imshow("Diff", edge_frame) + cv2.imshow("Color", packet.frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + + raise NotImplementedError('TODO') + + nn = oak.create_nn('../models/diff_openvino_2021.4_6shave.blob', color) + + oak.callback(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-custom-models/sdk/edge.py b/gen2-custom-models/sdk/edge.py new file mode 100644 index 000000000..5266514bb --- /dev/null +++ b/gen2-custom-models/sdk/edge.py @@ -0,0 +1,23 @@ +import cv2 +import numpy as np + +from depthai_sdk import OakCamera, DetectionPacket + + +def get_frame(img, shape): + return np.array(img.getData()).view(np.float16).reshape(shape).transpose(1, 2, 0).astype(np.uint8) + + +def callback(packet: DetectionPacket): + edge_frame = get_frame(packet.img_detections, (3, 300, 300)) + + cv2.imshow("Laplacian edge detection", edge_frame) + cv2.imshow("Color", packet.frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + nn = oak.create_nn('../models/edge_simplified_openvino_2021.4_6shave.blob', color) + + oak.callback(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-custom-models/sdk/requirements.txt b/gen2-custom-models/sdk/requirements.txt new file mode 100644 index 000000000..6ed8c140d --- /dev/null +++ b/gen2-custom-models/sdk/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +numpy +depthai +depthai-sdk \ No newline at end of file diff --git a/gen2-deeplabv3_depth/sdk/main.py b/gen2-deeplabv3_depth/sdk/main.py new file mode 100755 index 000000000..453237ab3 --- /dev/null +++ b/gen2-deeplabv3_depth/sdk/main.py @@ -0,0 +1,94 @@ +from functools import partial + +import blobconverter +import cv2 +import numpy as np + +from depthai_sdk import OakCamera + +INPUT_SHAPE = (256, 256) +TARGET_SHAPE = (400, 400) + +jet_custom = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_JET) +jet_custom[0] = [0, 0, 0] + + +def decode_deeplabv3p(output_tensor): + class_colors = [[0, 0, 0], [0, 255, 0]] + class_colors = np.asarray(class_colors, dtype=np.uint8) + + output = output_tensor.reshape(*INPUT_SHAPE) + output_colors = np.take(class_colors, output, axis=0) + return output_colors + + +def get_multiplier(output_tensor): + class_binary = [[0], [1]] + class_binary = np.asarray(class_binary, dtype=np.uint8) + output = output_tensor.reshape(*INPUT_SHAPE) + output_colors = np.take(class_binary, output, axis=0) + return output_colors + + +def crop_to_square(frame): + height = frame.shape[0] + width = frame.shape[1] + delta = int((width - height) / 2) + # print(height, width, delta) + return frame[0:height, delta:width - delta] + + +def callback(disp_multiplier, sync_dict): + frames = {} + + rgb_packet = sync_dict['0_preview'] + disparity_packet = sync_dict['1_disparity;1_out'] + nn_packet = sync_dict['2_out;0_preview'] + + nn_data = nn_packet.img_detections + + layer1 = nn_data.getFirstLayerInt32() + # reshape to numpy array + lay1 = np.asarray(layer1, dtype=np.int32).reshape(*INPUT_SHAPE) + output_colors = decode_deeplabv3p(lay1) + + # To match depth frames + output_colors = cv2.resize(output_colors, TARGET_SHAPE) + + frame = rgb_packet.frame + frame = crop_to_square(frame) + frame = cv2.resize(frame, TARGET_SHAPE) + frames['frame'] = frame + frame = cv2.addWeighted(frame, 1, output_colors, 0.5, 0) + frames['colored_frame'] = frame + + disparity_frame = disparity_packet.frame + disparity_frame = (disparity_frame * disp_multiplier).astype(np.uint8) + disparity_frame = crop_to_square(disparity_frame) + disparity_frame = cv2.resize(disparity_frame, TARGET_SHAPE) + + # Colorize the disparity + frames['depth'] = cv2.applyColorMap(disparity_frame, jet_custom) + + multiplier = get_multiplier(lay1) + multiplier = cv2.resize(multiplier, TARGET_SHAPE) + depth_overlay = disparity_frame * multiplier[..., None] + frames['cutout'] = cv2.applyColorMap(depth_overlay, jet_custom) + + show = np.concatenate((frames['colored_frame'], frames['cutout'], frames['depth']), axis=1) + cv2.imshow("Combined frame", show) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + color.config_color_camera(interleaved=False) + + stereo = oak.create_stereo('400p') + + disp_multiplier = 255 / stereo.node.initialConfig.getMaxDisparity() + + nn_path = blobconverter.from_zoo(name="deeplab_v3_mnv2_256x256", zoo_type="depthai", shaves=6) + nn = oak.create_nn(nn_path, color) + + oak.sync([color.out.main, stereo.out.disparity, nn.out.main], callback=partial(callback, disp_multiplier)) + oak.start(blocking=True) diff --git a/gen2-deeplabv3_depth/sdk/requirements.txt b/gen2-deeplabv3_depth/sdk/requirements.txt new file mode 100644 index 000000000..b8052493c --- /dev/null +++ b/gen2-deeplabv3_depth/sdk/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +depthai +depthai-sdk +blobconverter \ No newline at end of file diff --git a/gen2-deeplabv3_multiclass/sdk/main.py b/gen2-deeplabv3_multiclass/sdk/main.py new file mode 100644 index 000000000..af7e23dfd --- /dev/null +++ b/gen2-deeplabv3_multiclass/sdk/main.py @@ -0,0 +1,52 @@ +import cv2 +import numpy as np + +from depthai_sdk import OakCamera + +NN_WIDTH, NN_HEIGHT = 256, 256 +N_CLASSES = 21 + + +def decode_deeplabv3p(output_tensor): + output = output_tensor.reshape(NN_WIDTH, NN_HEIGHT) + + # scale to [0 ... 255] and apply colormap + output = np.array(output) * (255 / N_CLASSES) + output = output.astype(np.uint8) + output_colors = cv2.applyColorMap(output, cv2.COLORMAP_JET) + + # reset the color of 0 class + output_colors[output == 0] = [0, 0, 0] + + return output_colors + + +def show_deeplabv3p(output_colors, frame): + return cv2.addWeighted(frame, 1, output_colors, 0.4, 0) + + +def callback(packet, visualizer): + frame = packet.frame + nn_data = packet.img_detections + + # reshape to numpy array + layer1 = np.array(nn_data.getFirstLayerInt32()).reshape(NN_WIDTH, NN_HEIGHT) + + found_classes = np.unique(layer1) + output_colors = decode_deeplabv3p(layer1) + output_colors = cv2.resize(output_colors, (frame.shape[1], frame.shape[0])) + + frame = show_deeplabv3p(output_colors, frame) + cv2.putText(frame, "Found classes {}".format(found_classes), (10, 20), cv2.FONT_HERSHEY_TRIPLEX, 1.0, (255, 255, 255)) + + cv2.imshow("DeepLabV3 segmentation", frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + + nn = oak.create_nn('../models/deeplab_v3_plus_mnv2_decoder_256_openvino_2021.4.blob', color) + nn.config_nn(aspect_ratio_resize_mode='stretch') + + oak.callback(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-deeplabv3_multiclass/sdk/requirements.txt b/gen2-deeplabv3_multiclass/sdk/requirements.txt new file mode 100644 index 000000000..0277f416f --- /dev/null +++ b/gen2-deeplabv3_multiclass/sdk/requirements.txt @@ -0,0 +1,3 @@ +opencv-python +depthai +depthai-sdk \ No newline at end of file diff --git a/gen2-deeplabv3_person/sdk/main.py b/gen2-deeplabv3_person/sdk/main.py new file mode 100755 index 000000000..e91f084fc --- /dev/null +++ b/gen2-deeplabv3_person/sdk/main.py @@ -0,0 +1,44 @@ +import blobconverter +import cv2 +import numpy as np +from depthai import NNData + +from depthai_sdk import OakCamera, AspectRatioResizeMode, SemanticSegmentation + +NN_WIDTH, NN_HEIGHT = 513, 513 +N_CLASSES = 21 + + +def decode(nn_data: NNData) -> SemanticSegmentation: + mask = np.array(nn_data.getFirstLayerInt32()).reshape(NN_WIDTH, NN_HEIGHT) + return SemanticSegmentation(nn_data, mask) + + +def process_mask(output_tensor): + class_colors = [[0, 0, 0], [0, 255, 0]] + class_colors = np.asarray(class_colors, dtype=np.uint8) + output = output_tensor.reshape(NN_WIDTH, NN_HEIGHT) + output_colors = np.take(class_colors, output, axis=0) + return output_colors + + +def callback(packet, visualizer): + frame = packet.frame + mask = packet.img_detections.mask + + output_colors = process_mask(mask) + output_colors = cv2.resize(output_colors, (frame.shape[1], frame.shape[0])) + + frame = cv2.addWeighted(frame, 1, output_colors, 0.2, 0) + cv2.imshow('DeepLabV3 person segmentation', frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + nn_path = blobconverter.from_zoo(name='deeplab_v3_mnv2_513x513', zoo_type='depthai') + nn = oak.create_nn(nn_path, color, decode_fn=decode) + + nn.config_nn(aspect_ratio_resize_mode=AspectRatioResizeMode.STRETCH) + + oak.callback(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-deeplabv3_person/sdk/requirements.txt b/gen2-deeplabv3_person/sdk/requirements.txt new file mode 100644 index 000000000..101c5f3ad --- /dev/null +++ b/gen2-deeplabv3_person/sdk/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +depthai +deothai-sdk +blobconverter \ No newline at end of file diff --git a/gen2-depth-mbnv2/sdk/main.py b/gen2-depth-mbnv2/sdk/main.py new file mode 100644 index 000000000..3b46764b0 --- /dev/null +++ b/gen2-depth-mbnv2/sdk/main.py @@ -0,0 +1,41 @@ +import blobconverter +import cv2 +import numpy as np + +from depthai_sdk import OakCamera, DetectionPacket, AspectRatioResizeMode + +NN_WIDTH, NN_HEIGHT = 320, 240 + + +def callback(packet, visualizer): + nn_data = packet.img_detections + + # Get output layer + pred = np.array(nn_data.getFirstLayerFp16()).reshape((NN_HEIGHT // 2, NN_WIDTH // 2)) + + # Scale depth to get relative depth + d_min = np.min(pred) + d_max = np.max(pred) + depth_relative = (pred - d_min) / (d_max - d_min) + + # Color it + depth_relative = np.array(depth_relative) * 255 + depth_relative = depth_relative.astype(np.uint8) + depth_relative = cv2.applyColorMap(depth_relative, cv2.COLORMAP_INFERNO) + + # Resize to match color frame + depth_relative = cv2.resize(depth_relative, (packet.frame.shape[1], packet.frame.shape[0])) + + # Concatenate NN input and produced depth + cv2.imshow('Depth MobileNetV2', cv2.hconcat([packet.frame, depth_relative])) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + + nn_path = blobconverter.from_zoo(name="depth_estimation_mbnv2_240x320", zoo_type="depthai", shaves=6) + nn = oak.create_nn(nn_path, color) + nn.config_nn(aspect_ratio_resize_mode=AspectRatioResizeMode.STRETCH) + + oak.callback(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-depth-mbnv2/sdk/requirements.txt b/gen2-depth-mbnv2/sdk/requirements.txt new file mode 100644 index 000000000..0a30656d7 --- /dev/null +++ b/gen2-depth-mbnv2/sdk/requirements.txt @@ -0,0 +1,5 @@ +opencv-python~=4.5.1.48 +depthai +depthai-sdk +numpy~=1.22.0 +blobconverter \ No newline at end of file diff --git a/gen2-efficientDet/main.py b/gen2-efficientDet/main.py index e040a7f05..85c4fe039 100644 --- a/gen2-efficientDet/main.py +++ b/gen2-efficientDet/main.py @@ -1,86 +1,86 @@ -from pathlib import Path -import numpy as np -import cv2 -import depthai as dai -import time - -CONF_THRESHOLD = 0.4 -SHAPE = 320 -coco_90 = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "12", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "26", "backpack", "umbrella", "29", "30", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "45", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "66", "dining table", "68", "69", "toilet", "71", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "83", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"] - -p = dai.Pipeline() -p.setOpenVINOVersion(dai.OpenVINO.VERSION_2021_3) - -class FPSHandler: - def __init__(self, cap=None): - self.timestamp = time.time() - self.start = time.time() - self.frame_cnt = 0 - def next_iter(self): - self.timestamp = time.time() - self.frame_cnt += 1 - def fps(self): - return self.frame_cnt / (self.timestamp - self.start) - -camRgb = p.create(dai.node.ColorCamera) -camRgb.setPreviewSize(SHAPE, SHAPE) -camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) -camRgb.setInterleaved(False) -camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) -camRgb.setFp16(True) # Model requires FP16 input - -# NN that detects faces in the image -nn = p.create(dai.node.NeuralNetwork) -nn.setBlobPath(str(Path("models/efficientdet_lite0_2021.3_6shaves.blob").resolve().absolute())) -nn.setNumInferenceThreads(2) -camRgb.preview.link(nn.input) - -# Send bouding box from the NN to the host via XLink -nn_xout = p.create(dai.node.XLinkOut) -nn_xout.setStreamName("nn") -nn.out.link(nn_xout.input) -# Send rgb frames to the host -rgb_xout = p.create(dai.node.XLinkOut) -rgb_xout.setStreamName("rgb") -nn.passthrough.link(rgb_xout.input) - -# Pipeline is defined, now we can connect to the device -with dai.Device(p) as device: - qRgb = device.getOutputQueue(name="rgb", maxSize=4, blocking=False) - qNn = device.getOutputQueue(name="nn", maxSize=4, blocking=False) - fps = FPSHandler() - shape = (3, SHAPE, SHAPE) - - while True: - inRgb = qRgb.get() - # Model needs FP16 so we have to convert color frame back to U8 on the host - frame = np.array(inRgb.getData()).view(np.float16).reshape(shape).transpose(1, 2, 0).astype(np.uint8).copy() - - in_nn = qNn.tryGet() - if in_nn is not None: - fps.next_iter() - cv2.putText(frame, "Fps: {:.2f}".format(fps.fps()), (2, SHAPE - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color=(255, 255, 255)) - - # You can use the line below to print all the (output) layers and their info - # [print(f"Layer name: {l.name}, Type: {l.dataType}, Dimensions: {l.dims}") for l in in_nn.getAllLayers()] - - bb = np.array(in_nn.getLayerFp16('Identity')).reshape(25, 4) - label = in_nn.getLayerInt32('Identity_1') - conf = in_nn.getLayerFp16('Identity_2') - - for i in range(len(conf)): - if CONF_THRESHOLD < conf[i]: - bb_det = bb[i] - # Limit the bounding box to 0..1 - bb_det[bb_det > 1] = 1 - bb_det[bb_det < 0] = 0 - xy_min = (int(bb_det[1]*SHAPE), int(bb_det[0]*SHAPE)) - xy_max = (int(bb_det[3]*SHAPE), int(bb_det[2]*SHAPE)) - # Display detection's BB, label and confidence on the frame - cv2.rectangle(frame, xy_min , xy_max, (255, 0, 0), 2) - cv2.putText(frame, coco_90[label[i]], (xy_min[0] + 10, xy_min[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) - cv2.putText(frame, f"{int(conf[i] * 100)}%", (xy_min[0] + 10, xy_min[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) - - cv2.imshow("rgb", frame) - if cv2.waitKey(1) == ord('q'): - break +from pathlib import Path +import numpy as np +import cv2 +import depthai as dai +import time + +CONF_THRESHOLD = 0.4 +SHAPE = 320 +coco_90 = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "12", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "26", "backpack", "umbrella", "29", "30", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "45", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "66", "dining table", "68", "69", "toilet", "71", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "83", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"] + +p = dai.Pipeline() +p.setOpenVINOVersion(dai.OpenVINO.VERSION_2021_3) + +class FPSHandler: + def __init__(self, cap=None): + self.timestamp = time.time() + self.start = time.time() + self.frame_cnt = 0 + def next_iter(self): + self.timestamp = time.time() + self.frame_cnt += 1 + def fps(self): + return self.frame_cnt / (self.timestamp - self.start) + +camRgb = p.create(dai.node.ColorCamera) +camRgb.setPreviewSize(SHAPE, SHAPE) +camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) +camRgb.setInterleaved(False) +camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) +camRgb.setFp16(True) # Model requires FP16 input + +# NN that detects faces in the image +nn = p.create(dai.node.NeuralNetwork) +nn.setBlobPath(str(Path("models/efficientdet_lite0_2021.3_6shaves.blob").resolve().absolute())) +nn.setNumInferenceThreads(2) +camRgb.preview.link(nn.input) + +# Send bouding box from the NN to the host via XLink +nn_xout = p.create(dai.node.XLinkOut) +nn_xout.setStreamName("nn") +nn.out.link(nn_xout.input) +# Send rgb frames to the host +rgb_xout = p.create(dai.node.XLinkOut) +rgb_xout.setStreamName("rgb") +nn.passthrough.link(rgb_xout.input) + +# Pipeline is defined, now we can connect to the device +with dai.Device(p) as device: + qRgb = device.getOutputQueue(name="rgb", maxSize=4, blocking=False) + qNn = device.getOutputQueue(name="nn", maxSize=4, blocking=False) + fps = FPSHandler() + shape = (3, SHAPE, SHAPE) + + while True: + inRgb = qRgb.get() + # Model needs FP16 so we have to convert color frame back to U8 on the host + frame = np.array(inRgb.getData()).view(np.float16).reshape(shape).transpose(1, 2, 0).astype(np.uint8).copy() + + in_nn = qNn.tryGet() + if in_nn is not None: + fps.next_iter() + cv2.putText(frame, "Fps: {:.2f}".format(fps.fps()), (2, SHAPE - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color=(255, 255, 255)) + + # You can use the line below to print all the (output) layers and their info + # [print(f"Layer name: {l.name}, Type: {l.dataType}, Dimensions: {l.dims}") for l in in_nn.getAllLayers()] + + bb = np.array(in_nn.getLayerFp16('Identity')).reshape(25, 4) + label = in_nn.getLayerInt32('Identity_1') + conf = in_nn.getLayerFp16('Identity_2') + + for i in range(len(conf)): + if CONF_THRESHOLD < conf[i]: + bb_det = bb[i] + # Limit the bounding box to 0..1 + bb_det[bb_det > 1] = 1 + bb_det[bb_det < 0] = 0 + xy_min = (int(bb_det[1]*SHAPE), int(bb_det[0]*SHAPE)) + xy_max = (int(bb_det[3]*SHAPE), int(bb_det[2]*SHAPE)) + # Display detection's BB, label and confidence on the frame + cv2.rectangle(frame, xy_min , xy_max, (255, 0, 0), 2) + cv2.putText(frame, coco_90[label[i]], (xy_min[0] + 10, xy_min[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(frame, f"{int(conf[i] * 100)}%", (xy_min[0] + 10, xy_min[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + + cv2.imshow("rgb", frame) + if cv2.waitKey(1) == ord('q'): + break diff --git a/gen2-efficientnet-classification/sdk/classes.py b/gen2-efficientnet-classification/sdk/classes.py new file mode 100644 index 000000000..14ea0faae --- /dev/null +++ b/gen2-efficientnet-classification/sdk/classes.py @@ -0,0 +1,1003 @@ +def class_names(): + class_names = [ 'tench, Tinca tinca', + 'goldfish, Carassius auratus', + 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias', + 'tiger shark, Galeocerdo cuvieri', + 'hammerhead, hammerhead shark', + 'electric ray, crampfish, numbfish, torpedo', + 'stingray', + 'cock', + 'hen', + 'ostrich, Struthio camelus', + 'brambling, Fringilla montifringilla', + 'goldfinch, Carduelis carduelis', + 'house finch, linnet, Carpodacus mexicanus', + 'junco, snowbird', + 'indigo bunting, indigo finch, indigo bird, Passerina cyanea', + 'robin, American robin, Turdus migratorius', + 'bulbul', + 'jay', + 'magpie', + 'chickadee', + 'water ouzel, dipper', + 'kite', + 'bald eagle, American eagle, Haliaeetus leucocephalus', + 'vulture', + 'great grey owl, great gray owl, Strix nebulosa', + 'European fire salamander, Salamandra salamandra', + 'common newt, Triturus vulgaris', + 'eft', + 'spotted salamander, Ambystoma maculatum', + 'axolotl, mud puppy, Ambystoma mexicanum', + 'bullfrog, Rana catesbeiana', + 'tree frog, tree-frog', + 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui', + 'loggerhead, loggerhead turtle, Caretta caretta', + 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea', + 'mud turtle', + 'terrapin', + 'box turtle, box tortoise', + 'banded gecko', + 'common iguana, iguana, Iguana iguana', + 'American chameleon, anole, Anolis carolinensis', + 'whiptail, whiptail lizard', + 'agama', + 'frilled lizard, Chlamydosaurus kingi', + 'alligator lizard', + 'Gila monster, Heloderma suspectum', + 'green lizard, Lacerta viridis', + 'African chameleon, Chamaeleo chamaeleon', + 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis', + 'African crocodile, Nile crocodile, Crocodylus niloticus', + 'American alligator, Alligator mississipiensis', + 'triceratops', + 'thunder snake, worm snake, Carphophis amoenus', + 'ringneck snake, ring-necked snake, ring snake', + 'hognose snake, puff adder, sand viper', + 'green snake, grass snake', + 'king snake, kingsnake', + 'garter snake, grass snake', + 'water snake', + 'vine snake', + 'night snake, Hypsiglena torquata', + 'boa constrictor, Constrictor constrictor', + 'rock python, rock snake, Python sebae', + 'Indian cobra, Naja naja', + 'green mamba', + 'sea snake', + 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus', + 'diamondback, diamondback rattlesnake, Crotalus adamanteus', + 'sidewinder, horned rattlesnake, Crotalus cerastes', + 'trilobite', + 'harvestman, daddy longlegs, Phalangium opilio', + 'scorpion', + 'black and gold garden spider, Argiope aurantia', + 'barn spider, Araneus cavaticus', + 'garden spider, Aranea diademata', + 'black widow, Latrodectus mactans', + 'tarantula', + 'wolf spider, hunting spider', + 'tick', + 'centipede', + 'black grouse', + 'ptarmigan', + 'ruffed grouse, partridge, Bonasa umbellus', + 'prairie chicken, prairie grouse, prairie fowl', + 'peacock', + 'quail', + 'partridge', + 'African grey, African gray, Psittacus erithacus', + 'macaw', + 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita', + 'lorikeet', + 'coucal', + 'bee eater', + 'hornbill', + 'hummingbird', + 'jacamar', + 'toucan', + 'drake', + 'red-breasted merganser, Mergus serrator', + 'goose', + 'black swan, Cygnus atratus', + 'tusker', + 'echidna, spiny anteater, anteater', + 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus', + 'wallaby, brush kangaroo', + 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus', + 'wombat', + 'jellyfish', + 'sea anemone, anemone', + 'brain coral', + 'flatworm, platyhelminth', + 'nematode, nematode worm, roundworm', + 'conch', + 'snail', + 'slug', + 'sea slug, nudibranch', + 'chiton, coat-of-mail shell, sea cradle, polyplacophore', + 'chambered nautilus, pearly nautilus, nautilus', + 'Dungeness crab, Cancer magister', + 'rock crab, Cancer irroratus', + 'fiddler crab', + 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica', + 'American lobster, Northern lobster, Maine lobster, Homarus americanus', + 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish', + 'crayfish, crawfish, crawdad, crawdaddy', + 'hermit crab', + 'isopod', + 'white stork, Ciconia ciconia', + 'black stork, Ciconia nigra', + 'spoonbill', + 'flamingo', + 'little blue heron, Egretta caerulea', + 'American egret, great white heron, Egretta albus', + 'bittern', + 'crane', + 'limpkin, Aramus pictus', + 'European gallinule, Porphyrio porphyrio', + 'American coot, marsh hen, mud hen, water hen, Fulica americana', + 'bustard', + 'ruddy turnstone, Arenaria interpres', + 'red-backed sandpiper, dunlin, Erolia alpina', + 'redshank, Tringa totanus', + 'dowitcher', + 'oystercatcher, oyster catcher', + 'pelican', + 'king penguin, Aptenodytes patagonica', + 'albatross, mollymawk', + 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus', + 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca', + 'dugong, Dugong dugon', + 'sea lion', + 'Chihuahua', + 'Japanese spaniel', + 'Maltese dog, Maltese terrier, Maltese', + 'Pekinese, Pekingese, Peke', + 'Shih-Tzu', + 'Blenheim spaniel', + 'papillon', + 'toy terrier', + 'Rhodesian ridgeback', + 'Afghan hound, Afghan', + 'basset, basset hound', + 'beagle', + 'bloodhound, sleuthhound', + 'bluetick', + 'black-and-tan coonhound', + 'Walker hound, Walker foxhound', + 'English foxhound', + 'redbone', + 'borzoi, Russian wolfhound', + 'Irish wolfhound', + 'Italian greyhound', + 'whippet', + 'Ibizan hound, Ibizan Podenco', + 'Norwegian elkhound, elkhound', + 'otterhound, otter hound', + 'Saluki, gazelle hound', + 'Scottish deerhound, deerhound', + 'Weimaraner', + 'Staffordshire bullterrier, Staffordshire bull terrier', + 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier', + 'Bedlington terrier', + 'Border terrier', + 'Kerry blue terrier', + 'Irish terrier', + 'Norfolk terrier', + 'Norwich terrier', + 'Yorkshire terrier', + 'wire-haired fox terrier', + 'Lakeland terrier', + 'Sealyham terrier, Sealyham', + 'Airedale, Airedale terrier', + 'cairn, cairn terrier', + 'Australian terrier', + 'Dandie Dinmont, Dandie Dinmont terrier', + 'Boston bull, Boston terrier', + 'miniature schnauzer', + 'giant schnauzer', + 'standard schnauzer', + 'Scotch terrier, Scottish terrier, Scottie', + 'Tibetan terrier, chrysanthemum dog', + 'silky terrier, Sydney silky', + 'soft-coated wheaten terrier', + 'West Highland white terrier', + 'Lhasa, Lhasa apso', + 'flat-coated retriever', + 'curly-coated retriever', + 'golden retriever', + 'Labrador retriever', + 'Chesapeake Bay retriever', + 'German short-haired pointer', + 'vizsla, Hungarian pointer', + 'English setter', + 'Irish setter, red setter', + 'Gordon setter', + 'Brittany spaniel', + 'clumber, clumber spaniel', + 'English springer, English springer spaniel', + 'Welsh springer spaniel', + 'cocker spaniel, English cocker spaniel, cocker', + 'Sussex spaniel', + 'Irish water spaniel', + 'kuvasz', + 'schipperke', + 'groenendael', + 'malinois', + 'briard', + 'kelpie', + 'komondor', + 'Old English sheepdog, bobtail', + 'Shetland sheepdog, Shetland sheep dog, Shetland', + 'collie', + 'Border collie', + 'Bouvier des Flandres, Bouviers des Flandres', + 'Rottweiler', + 'German shepherd, German shepherd dog, German police dog, alsatian', + 'Doberman, Doberman pinscher', + 'miniature pinscher', + 'Greater Swiss Mountain dog', + 'Bernese mountain dog', + 'Appenzeller', + 'EntleBucher', + 'boxer', + 'bull mastiff', + 'Tibetan mastiff', + 'French bulldog', + 'Great Dane', + 'Saint Bernard, St Bernard', + 'Eskimo dog, husky', + 'malamute, malemute, Alaskan malamute', + 'Siberian husky', + 'dalmatian, coach dog, carriage dog', + 'affenpinscher, monkey pinscher, monkey dog', + 'basenji', + 'pug, pug-dog', + 'Leonberg', + 'Newfoundland, Newfoundland dog', + 'Great Pyrenees', + 'Samoyed, Samoyede', + 'Pomeranian', + 'chow, chow chow', + 'keeshond', + 'Brabancon griffon', + 'Pembroke, Pembroke Welsh corgi', + 'Cardigan, Cardigan Welsh corgi', + 'toy poodle', + 'miniature poodle', + 'standard poodle', + 'Mexican hairless', + 'timber wolf, grey wolf, gray wolf, Canis lupus', + 'white wolf, Arctic wolf, Canis lupus tundrarum', + 'red wolf, maned wolf, Canis rufus, Canis niger', + 'coyote, prairie wolf, brush wolf, Canis latrans', + 'dingo, warrigal, warragal, Canis dingo', + 'dhole, Cuon alpinus', + 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus', + 'hyena, hyaena', + 'red fox, Vulpes vulpes', + 'kit fox, Vulpes macrotis', + 'Arctic fox, white fox, Alopex lagopus', + 'grey fox, gray fox, Urocyon cinereoargenteus', + 'tabby, tabby cat', + 'tiger cat', + 'Persian cat', + 'Siamese cat, Siamese', + 'Egyptian cat', + 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor', + 'lynx, catamount', + 'leopard, Panthera pardus', + 'snow leopard, ounce, Panthera uncia', + 'jaguar, panther, Panthera onca, Felis onca', + 'lion, king of beasts, Panthera leo', + 'tiger, Panthera tigris', + 'cheetah, chetah, Acinonyx jubatus', + 'brown bear, bruin, Ursus arctos', + 'American black bear, black bear, Ursus americanus, Euarctos americanus', + 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus', + 'sloth bear, Melursus ursinus, Ursus ursinus', + 'mongoose', + 'meerkat, mierkat', + 'tiger beetle', + 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle', + 'ground beetle, carabid beetle', + 'long-horned beetle, longicorn, longicorn beetle', + 'leaf beetle, chrysomelid', + 'dung beetle', + 'rhinoceros beetle', + 'weevil', + 'fly', + 'bee', + 'ant, emmet, pismire', + 'grasshopper, hopper', + 'cricket', + 'walking stick, walkingstick, stick insect', + 'cockroach, roach', + 'mantis, mantid', + 'cicada, cicala', + 'leafhopper', + 'lacewing, lacewing fly', + "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk", + 'damselfly', + 'admiral', + 'ringlet, ringlet butterfly', + 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus', + 'cabbage butterfly', + 'sulphur butterfly, sulfur butterfly', + 'lycaenid, lycaenid butterfly', + 'starfish, sea star', + 'sea urchin', + 'sea cucumber, holothurian', + 'wood rabbit, cottontail, cottontail rabbit', + 'hare', + 'Angora, Angora rabbit', + 'hamster', + 'porcupine, hedgehog', + 'fox squirrel, eastern fox squirrel, Sciurus niger', + 'marmot', + 'beaver', + 'guinea pig, Cavia cobaya', + 'sorrel', + 'zebra', + 'hog, pig, grunter, squealer, Sus scrofa', + 'wild boar, boar, Sus scrofa', + 'warthog', + 'hippopotamus, hippo, river horse, Hippopotamus amphibius', + 'ox', + 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis', + 'bison', + 'ram, tup', + 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis', + 'ibex, Capra ibex', + 'hartebeest', + 'impala, Aepyceros melampus', + 'gazelle', + 'Arabian camel, dromedary, Camelus dromedarius', + 'llama', + 'weasel', + 'mink', + 'polecat, fitch, foulmart, foumart, Mustela putorius', + 'black-footed ferret, ferret, Mustela nigripes', + 'otter', + 'skunk, polecat, wood pussy', + 'badger', + 'armadillo', + 'three-toed sloth, ai, Bradypus tridactylus', + 'orangutan, orang, orangutang, Pongo pygmaeus', + 'gorilla, Gorilla gorilla', + 'chimpanzee, chimp, Pan troglodytes', + 'gibbon, Hylobates lar', + 'siamang, Hylobates syndactylus, Symphalangus syndactylus', + 'guenon, guenon monkey', + 'patas, hussar monkey, Erythrocebus patas', + 'baboon', + 'macaque', + 'langur', + 'colobus, colobus monkey', + 'proboscis monkey, Nasalis larvatus', + 'marmoset', + 'capuchin, ringtail, Cebus capucinus', + 'howler monkey, howler', + 'titi, titi monkey', + 'spider monkey, Ateles geoffroyi', + 'squirrel monkey, Saimiri sciureus', + 'Madagascar cat, ring-tailed lemur, Lemur catta', + 'indri, indris, Indri indri, Indri brevicaudatus', + 'Indian elephant, Elephas maximus', + 'African elephant, Loxodonta africana', + 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens', + 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca', + 'barracouta, snoek', + 'eel', + 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch', + 'rock beauty, Holocanthus tricolor', + 'anemone fish', + 'sturgeon', + 'gar, garfish, garpike, billfish, Lepisosteus osseus', + 'lionfish', + 'puffer, pufferfish, blowfish, globefish', + 'abacus', + 'abaya', + "academic gown, academic robe, judge's robe", + 'accordion, piano accordion, squeeze box', + 'acoustic guitar', + 'aircraft carrier, carrier, flattop, attack aircraft carrier', + 'airliner', + 'airship, dirigible', + 'altar', + 'ambulance', + 'amphibian, amphibious vehicle', + 'analog clock', + 'apiary, bee house', + 'apron', + 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin', + 'assault rifle, assault gun', + 'backpack, back pack, knapsack, packsack, rucksack, haversack', + 'bakery, bakeshop, bakehouse', + 'balance beam, beam', + 'balloon', + 'ballpoint, ballpoint pen, ballpen, Biro', + 'Band Aid', + 'banjo', + 'bannister, banister, balustrade, balusters, handrail', + 'barbell', + 'barber chair', + 'barbershop', + 'barn', + 'barometer', + 'barrel, cask', + 'barrow, garden cart, lawn cart, wheelbarrow', + 'baseball', + 'basketball', + 'bassinet', + 'bassoon', + 'bathing cap, swimming cap', + 'bath towel', + 'bathtub, bathing tub, bath, tub', + 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon', + 'beacon, lighthouse, beacon light, pharos', + 'beaker', + 'bearskin, busby, shako', + 'beer bottle', + 'beer glass', + 'bell cote, bell cot', + 'bib', + 'bicycle-built-for-two, tandem bicycle, tandem', + 'bikini, two-piece', + 'binder, ring-binder', + 'binoculars, field glasses, opera glasses', + 'birdhouse', + 'boathouse', + 'bobsled, bobsleigh, bob', + 'bolo tie, bolo, bola tie, bola', + 'bonnet, poke bonnet', + 'bookcase', + 'bookshop, bookstore, bookstall', + 'bottlecap', + 'bow', + 'bow tie, bow-tie, bowtie', + 'brass, memorial tablet, plaque', + 'brassiere, bra, bandeau', + 'breakwater, groin, groyne, mole, bulwark, seawall, jetty', + 'breastplate, aegis, egis', + 'broom', + 'bucket, pail', + 'buckle', + 'bulletproof vest', + 'bullet train, bullet', + 'butcher shop, meat market', + 'cab, hack, taxi, taxicab', + 'caldron, cauldron', + 'candle, taper, wax light', + 'cannon', + 'canoe', + 'can opener, tin opener', + 'cardigan', + 'car mirror', + 'carousel, carrousel, merry-go-round, roundabout, whirligig', + "carpenter's kit, tool kit", + 'carton', + 'car wheel', + 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM', + 'cassette', + 'cassette player', + 'castle', + 'catamaran', + 'CD player', + 'cello, violoncello', + 'cellular telephone, cellular phone, cellphone, cell, mobile phone', + 'chain', + 'chainlink fence', + 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour', + 'chain saw, chainsaw', + 'chest', + 'chiffonier, commode', + 'chime, bell, gong', + 'china cabinet, china closet', + 'Christmas stocking', + 'church, church building', + 'cinema, movie theater, movie theatre, movie house, picture palace', + 'cleaver, meat cleaver, chopper', + 'cliff dwelling', + 'cloak', + 'clog, geta, patten, sabot', + 'cocktail shaker', + 'coffee mug', + 'coffeepot', + 'coil, spiral, volute, whorl, helix', + 'combination lock', + 'computer keyboard, keypad', + 'confectionery, confectionary, candy store', + 'container ship, containership, container vessel', + 'convertible', + 'corkscrew, bottle screw', + 'cornet, horn, trumpet, trump', + 'cowboy boot', + 'cowboy hat, ten-gallon hat', + 'cradle', + 'crane', + 'crash helmet', + 'crate', + 'crib, cot', + 'Crock Pot', + 'croquet ball', + 'crutch', + 'cuirass', + 'dam, dike, dyke', + 'desk', + 'desktop computer', + 'dial telephone, dial phone', + 'diaper, nappy, napkin', + 'digital clock', + 'digital watch', + 'dining table, board', + 'dishrag, dishcloth', + 'dishwasher, dish washer, dishwashing machine', + 'disk brake, disc brake', + 'dock, dockage, docking facility', + 'dogsled, dog sled, dog sleigh', + 'dome', + 'doormat, welcome mat', + 'drilling platform, offshore rig', + 'drum, membranophone, tympan', + 'drumstick', + 'dumbbell', + 'Dutch oven', + 'electric fan, blower', + 'electric guitar', + 'electric locomotive', + 'entertainment center', + 'envelope', + 'espresso maker', + 'face powder', + 'feather boa, boa', + 'file, file cabinet, filing cabinet', + 'fireboat', + 'fire engine, fire truck', + 'fire screen, fireguard', + 'flagpole, flagstaff', + 'flute, transverse flute', + 'folding chair', + 'football helmet', + 'forklift', + 'fountain', + 'fountain pen', + 'four-poster', + 'freight car', + 'French horn, horn', + 'frying pan, frypan, skillet', + 'fur coat', + 'garbage truck, dustcart', + 'gasmask, respirator, gas helmet', + 'gas pump, gasoline pump, petrol pump, island dispenser', + 'goblet', + 'go-kart', + 'golf ball', + 'golfcart, golf cart', + 'gondola', + 'gong, tam-tam', + 'gown', + 'grand piano, grand', + 'greenhouse, nursery, glasshouse', + 'grille, radiator grille', + 'grocery store, grocery, food market, market', + 'guillotine', + 'hair slide', + 'hair spray', + 'half track', + 'hammer', + 'hamper', + 'hand blower, blow dryer, blow drier, hair dryer, hair drier', + 'hand-held computer, hand-held microcomputer', + 'handkerchief, hankie, hanky, hankey', + 'hard disc, hard disk, fixed disk', + 'harmonica, mouth organ, harp, mouth harp', + 'harp', + 'harvester, reaper', + 'hatchet', + 'holster', + 'home theater, home theatre', + 'honeycomb', + 'hook, claw', + 'hoopskirt, crinoline', + 'horizontal bar, high bar', + 'horse cart, horse-cart', + 'hourglass', + 'iPod', + 'iron, smoothing iron', + "jack-o'-lantern", + 'jean, blue jean, denim', + 'jeep, landrover', + 'jersey, T-shirt, tee shirt', + 'jigsaw puzzle', + 'jinrikisha, ricksha, rickshaw', + 'joystick', + 'kimono', + 'knee pad', + 'knot', + 'lab coat, laboratory coat', + 'ladle', + 'lampshade, lamp shade', + 'laptop, laptop computer', + 'lawn mower, mower', + 'lens cap, lens cover', + 'letter opener, paper knife, paperknife', + 'library', + 'lifeboat', + 'lighter, light, igniter, ignitor', + 'limousine, limo', + 'liner, ocean liner', + 'lipstick, lip rouge', + 'Loafer', + 'lotion', + 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system', + "loupe, jeweler's loupe", + 'lumbermill, sawmill', + 'magnetic compass', + 'mailbag, postbag', + 'mailbox, letter box', + 'maillot', + 'maillot, tank suit', + 'manhole cover', + 'maraca', + 'marimba, xylophone', + 'mask', + 'matchstick', + 'maypole', + 'maze, labyrinth', + 'measuring cup', + 'medicine chest, medicine cabinet', + 'megalith, megalithic structure', + 'microphone, mike', + 'microwave, microwave oven', + 'military uniform', + 'milk can', + 'minibus', + 'miniskirt, mini', + 'minivan', + 'missile', + 'mitten', + 'mixing bowl', + 'mobile home, manufactured home', + 'Model T', + 'modem', + 'monastery', + 'monitor', + 'moped', + 'mortar', + 'mortarboard', + 'mosque', + 'mosquito net', + 'motor scooter, scooter', + 'mountain bike, all-terrain bike, off-roader', + 'mountain tent', + 'mouse, computer mouse', + 'mousetrap', + 'moving van', + 'muzzle', + 'nail', + 'neck brace', + 'necklace', + 'nipple', + 'notebook, notebook computer', + 'obelisk', + 'oboe, hautboy, hautbois', + 'ocarina, sweet potato', + 'odometer, hodometer, mileometer, milometer', + 'oil filter', + 'organ, pipe organ', + 'oscilloscope, scope, cathode-ray oscilloscope, CRO', + 'overskirt', + 'oxcart', + 'oxygen mask', + 'packet', + 'paddle, boat paddle', + 'paddlewheel, paddle wheel', + 'padlock', + 'paintbrush', + "pajama, pyjama, pj's, jammies", + 'palace', + 'panpipe, pandean pipe, syrinx', + 'paper towel', + 'parachute, chute', + 'parallel bars, bars', + 'park bench', + 'parking meter', + 'passenger car, coach, carriage', + 'patio, terrace', + 'pay-phone, pay-station', + 'pedestal, plinth, footstall', + 'pencil box, pencil case', + 'pencil sharpener', + 'perfume, essence', + 'Petri dish', + 'photocopier', + 'pick, plectrum, plectron', + 'pickelhaube', + 'picket fence, paling', + 'pickup, pickup truck', + 'pier', + 'piggy bank, penny bank', + 'pill bottle', + 'pillow', + 'ping-pong ball', + 'pinwheel', + 'pirate, pirate ship', + 'pitcher, ewer', + "plane, carpenter's plane, woodworking plane", + 'planetarium', + 'plastic bag', + 'plate rack', + 'plow, plough', + "plunger, plumber's helper", + 'Polaroid camera, Polaroid Land camera', + 'pole', + 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria', + 'poncho', + 'pool table, billiard table, snooker table', + 'pop bottle, soda bottle', + 'pot, flowerpot', + "potter's wheel", + 'power drill', + 'prayer rug, prayer mat', + 'printer', + 'prison, prison house', + 'projectile, missile', + 'projector', + 'puck, hockey puck', + 'punching bag, punch bag, punching ball, punchball', + 'purse', + 'quill, quill pen', + 'quilt, comforter, comfort, puff', + 'racer, race car, racing car', + 'racket, racquet', + 'radiator', + 'radio, wireless', + 'radio telescope, radio reflector', + 'rain barrel', + 'recreational vehicle, RV, R.V.', + 'reel', + 'reflex camera', + 'refrigerator, icebox', + 'remote control, remote', + 'restaurant, eating house, eating place, eatery', + 'revolver, six-gun, six-shooter', + 'rifle', + 'rocking chair, rocker', + 'rotisserie', + 'rubber eraser, rubber, pencil eraser', + 'rugby ball', + 'rule, ruler', + 'running shoe', + 'safe', + 'safety pin', + 'saltshaker, salt shaker', + 'sandal', + 'sarong', + 'sax, saxophone', + 'scabbard', + 'scale, weighing machine', + 'school bus', + 'schooner', + 'scoreboard', + 'screen, CRT screen', + 'screw', + 'screwdriver', + 'seat belt, seatbelt', + 'sewing machine', + 'shield, buckler', + 'shoe shop, shoe-shop, shoe store', + 'shoji', + 'shopping basket', + 'shopping cart', + 'shovel', + 'shower cap', + 'shower curtain', + 'ski', + 'ski mask', + 'sleeping bag', + 'slide rule, slipstick', + 'sliding door', + 'slot, one-armed bandit', + 'snorkel', + 'snowmobile', + 'snowplow, snowplough', + 'soap dispenser', + 'soccer ball', + 'sock', + 'solar dish, solar collector, solar furnace', + 'sombrero', + 'soup bowl', + 'space bar', + 'space heater', + 'space shuttle', + 'spatula', + 'speedboat', + "spider web, spider's web", + 'spindle', + 'sports car, sport car', + 'spotlight, spot', + 'stage', + 'steam locomotive', + 'steel arch bridge', + 'steel drum', + 'stethoscope', + 'stole', + 'stone wall', + 'stopwatch, stop watch', + 'stove', + 'strainer', + 'streetcar, tram, tramcar, trolley, trolley car', + 'stretcher', + 'studio couch, day bed', + 'stupa, tope', + 'submarine, pigboat, sub, U-boat', + 'suit, suit of clothes', + 'sundial', + 'sunglass', + 'sunglasses, dark glasses, shades', + 'sunscreen, sunblock, sun blocker', + 'suspension bridge', + 'swab, swob, mop', + 'sweatshirt', + 'swimming trunks, bathing trunks', + 'swing', + 'switch, electric switch, electrical switch', + 'syringe', + 'table lamp', + 'tank, army tank, armored combat vehicle, armoured combat vehicle', + 'tape player', + 'teapot', + 'teddy, teddy bear', + 'television, television system', + 'tennis ball', + 'thatch, thatched roof', + 'theater curtain, theatre curtain', + 'thimble', + 'thresher, thrasher, threshing machine', + 'throne', + 'tile roof', + 'toaster', + 'tobacco shop, tobacconist shop, tobacconist', + 'toilet seat', + 'torch', + 'totem pole', + 'tow truck, tow car, wrecker', + 'toyshop', + 'tractor', + 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi', + 'tray', + 'trench coat', + 'tricycle, trike, velocipede', + 'trimaran', + 'tripod', + 'triumphal arch', + 'trolleybus, trolley coach, trackless trolley', + 'trombone', + 'tub, vat', + 'turnstile', + 'typewriter keyboard', + 'umbrella', + 'unicycle, monocycle', + 'upright, upright piano', + 'vacuum, vacuum cleaner', + 'vase', + 'vault', + 'velvet', + 'vending machine', + 'vestment', + 'viaduct', + 'violin, fiddle', + 'volleyball', + 'waffle iron', + 'wall clock', + 'wallet, billfold, notecase, pocketbook', + 'wardrobe, closet, press', + 'warplane, military plane', + 'washbasin, handbasin, washbowl, lavabo, wash-hand basin', + 'washer, automatic washer, washing machine', + 'water bottle', + 'water jug', + 'water tower', + 'whiskey jug', + 'whistle', + 'wig', + 'window screen', + 'window shade', + 'Windsor tie', + 'wine bottle', + 'wing', + 'wok', + 'wooden spoon', + 'wool, woolen, woollen', + 'worm fence, snake fence, snake-rail fence, Virginia fence', + 'wreck', + 'yawl', + 'yurt', + 'web site, website, internet site, site', + 'comic book', + 'crossword puzzle, crossword', + 'street sign', + 'traffic light, traffic signal, stoplight', + 'book jacket, dust cover, dust jacket, dust wrapper', + 'menu', + 'plate', + 'guacamole', + 'consomme', + 'hot pot, hotpot', + 'trifle', + 'ice cream, icecream', + 'ice lolly, lolly, lollipop, popsicle', + 'French loaf', + 'bagel, beigel', + 'pretzel', + 'cheeseburger', + 'hotdog, hot dog, red hot', + 'mashed potato', + 'head cabbage', + 'broccoli', + 'cauliflower', + 'zucchini, courgette', + 'spaghetti squash', + 'acorn squash', + 'butternut squash', + 'cucumber, cuke', + 'artichoke, globe artichoke', + 'bell pepper', + 'cardoon', + 'mushroom', + 'Granny Smith', + 'strawberry', + 'orange', + 'lemon', + 'fig', + 'pineapple, ananas', + 'banana', + 'jackfruit, jak, jack', + 'custard apple', + 'pomegranate', + 'hay', + 'carbonara', + 'chocolate sauce, chocolate syrup', + 'dough', + 'meat loaf, meatloaf', + 'pizza, pizza pie', + 'potpie', + 'burrito', + 'red wine', + 'espresso', + 'cup', + 'eggnog', + 'alp', + 'bubble', + 'cliff, drop, drop-off', + 'coral reef', + 'geyser', + 'lakeside, lakeshore', + 'promontory, headland, head, foreland', + 'sandbar, sand bar', + 'seashore, coast, seacoast, sea-coast', + 'valley, vale', + 'volcano', + 'ballplayer, baseball player', + 'groom, bridegroom', + 'scuba diver', + 'rapeseed', + 'daisy', + "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum", + 'corn', + 'acorn', + 'hip, rose hip, rosehip', + 'buckeye, horse chestnut, conker', + 'coral fungus', + 'agaric', + 'gyromitra', + 'stinkhorn, carrion fungus', + 'earthstar', + 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa', + 'bolete', + 'ear, spike, capitulum', + 'toilet tissue, toilet paper, bathroom tissue'] + return class_names + diff --git a/gen2-efficientnet-classification/sdk/main.py b/gen2-efficientnet-classification/sdk/main.py new file mode 100644 index 000000000..d74971d01 --- /dev/null +++ b/gen2-efficientnet-classification/sdk/main.py @@ -0,0 +1,38 @@ +import cv2 +import numpy as np + +from classes import class_names +from depthai_sdk import OakCamera + +labels = class_names() + + +def softmax(x): + """Compute softmax values for each sets of scores in x.""" + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum(axis=0) + + +def callback(packet, visualizer): + nn_data = packet.img_detections + + data = softmax(nn_data.getFirstLayerFp16()) + result_conf = np.max(data) + + result = { + "name": labels[np.argmax(data)], + "conf": round(100 * result_conf, 2) + } + cv2.putText(packet.frame, f"{result['name']} {result['conf']:.2f}%", + (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) + + cv2.imshow('EfficientNet-b0 classification', packet.frame) + + +with OakCamera(replay='../animals.mp4') as oak: + color = oak.create_camera('color') + + nn = oak.create_nn('efficientnet-b0', color) + + oak.callback(nn.out.main, callback) + oak.start(blocking=True) diff --git a/gen2-efficientnet-classification/sdk/requirements.txt b/gen2-efficientnet-classification/sdk/requirements.txt new file mode 100644 index 000000000..bb652a32f --- /dev/null +++ b/gen2-efficientnet-classification/sdk/requirements.txt @@ -0,0 +1,5 @@ +opencv-python +numpy +depthai +depthai-sdk +blobconverter \ No newline at end of file diff --git a/gen2-emotion-recognition/MultiMsgSync.py b/gen2-emotion-recognition/MultiMsgSync.py deleted file mode 100644 index c75fe307b..000000000 --- a/gen2-emotion-recognition/MultiMsgSync.py +++ /dev/null @@ -1,56 +0,0 @@ -# Color frames (ImgFrame), object detection (ImgDetections) and recognition (NNData) -# messages arrive to the host all with some additional delay. -# For each ImgFrame there's one ImgDetections msg, which has multiple detections, and for each -# detection there's a NNData msg which contains recognition results. - -# How it works: -# Every ImgFrame, ImgDetections and NNData message has it's own sequence number, by which we can sync messages. - -class TwoStageHostSeqSync: - def __init__(self): - self.msgs = {} - # name: color, detection, or recognition - def add_msg(self, msg, name): - seq = str(msg.getSequenceNum()) - if seq not in self.msgs: - self.msgs[seq] = {} # Create directory for msgs - if "recognition" not in self.msgs[seq]: - self.msgs[seq]["recognition"] = [] # Create recognition array - - if name == "recognition": - # Append recognition msgs to an array - self.msgs[seq]["recognition"].append(msg) - # print(f'Added recognition seq {seq}, total len {len(self.msgs[seq]["recognition"])}') - - elif name == "detection": - # Save detection msg in the directory - self.msgs[seq][name] = msg - self.msgs[seq]["len"] = len(msg.detections) - # print(f'Added detection seq {seq}') - - elif name == "color": # color - # Save color frame in the directory - self.msgs[seq][name] = msg - # print(f'Added frame seq {seq}') - - - def get_msgs(self): - seq_remove = [] # Arr of sequence numbers to get deleted - - for seq, msgs in self.msgs.items(): - seq_remove.append(seq) # Will get removed from dict if we find synced msgs pair - - # Check if we have both detections and color frame with this sequence number - if "color" in msgs and "len" in msgs: - - # Check if all detected objects (faces) have finished recognition inference - if msgs["len"] == len(msgs["recognition"]): - # print(f"Synced msgs with sequence number {seq}", msgs) - - # We have synced msgs, remove previous msgs (memory cleaning) - for rm in seq_remove: - del self.msgs[rm] - - return msgs # Returned synced msgs - - return None # No synced msgs \ No newline at end of file diff --git a/gen2-emotion-recognition/main.py b/gen2-emotion-recognition/main.py index 798856f99..f3b739a8a 100644 --- a/gen2-emotion-recognition/main.py +++ b/gen2-emotion-recognition/main.py @@ -1,218 +1,35 @@ -from MultiMsgSync import TwoStageHostSeqSync -import blobconverter -import cv2 -import depthai as dai +from depthai_sdk import OakCamera, TwoStagePacket, AspectRatioResizeMode, TextPosition import numpy as np - -def frame_norm(frame, bbox): - normVals = np.full(len(bbox), frame.shape[0]) - normVals[::2] = frame.shape[1] - return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) +import cv2 emotions = ['neutral', 'happy', 'sad', 'surprise', 'anger'] -def create_pipeline(stereo): - pipeline = dai.Pipeline() - - print("Creating Color Camera...") - cam = pipeline.create(dai.node.ColorCamera) - cam.setPreviewSize(1080, 1080) - cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) - cam.setInterleaved(False) - cam.setBoardSocket(dai.CameraBoardSocket.RGB) - - cam_xout = pipeline.create(dai.node.XLinkOut) - cam_xout.setStreamName("color") - cam.preview.link(cam_xout.input) - - # Workaround: remove in 2.18, use `cam.setPreviewNumFramesPool(10)` - # This manip uses 15*3.5 MB => 52 MB of RAM. - copy_manip = pipeline.create(dai.node.ImageManip) - copy_manip.setNumFramesPool(15) - copy_manip.setMaxOutputFrameSize(3499200) - cam.preview.link(copy_manip.inputImage) - - # ImageManip that will crop the frame before sending it to the Face detection NN node - face_det_manip = pipeline.create(dai.node.ImageManip) - face_det_manip.initialConfig.setResize(300, 300) - face_det_manip.initialConfig.setFrameType(dai.RawImgFrame.Type.RGB888p) - copy_manip.out.link(face_det_manip.inputImage) - - if stereo: - monoLeft = pipeline.create(dai.node.MonoCamera) - monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT) - - monoRight = pipeline.create(dai.node.MonoCamera) - monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT) - - stereo = pipeline.create(dai.node.StereoDepth) - stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) - stereo.setDepthAlign(dai.CameraBoardSocket.RGB) - monoLeft.out.link(stereo.left) - monoRight.out.link(stereo.right) - - # Spatial Detection network if OAK-D - print("OAK-D detected, app will display spatial coordiantes") - face_det_nn = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork) - face_det_nn.setBoundingBoxScaleFactor(0.8) - face_det_nn.setDepthLowerThreshold(100) - face_det_nn.setDepthUpperThreshold(5000) - stereo.depth.link(face_det_nn.inputDepth) - else: # Detection network if OAK-1 - print("OAK-1 detected, app won't display spatial coordiantes") - face_det_nn = pipeline.create(dai.node.MobileNetDetectionNetwork) - - face_det_nn.setConfidenceThreshold(0.5) - face_det_nn.setBlobPath(blobconverter.from_zoo(name="face-detection-retail-0004", shaves=6)) - face_det_manip.out.link(face_det_nn.input) - - # Send face detections to the host (for bounding boxes) - face_det_xout = pipeline.create(dai.node.XLinkOut) - face_det_xout.setStreamName("detection") - face_det_nn.out.link(face_det_xout.input) - - # Script node will take the output from the face detection NN as an input and set ImageManipConfig - # to the 'age_gender_manip' to crop the initial frame - image_manip_script = pipeline.create(dai.node.Script) - face_det_nn.out.link(image_manip_script.inputs['face_det_in']) - - # Only send metadata, we are only interested in timestamp, so we can sync - # depth frames with NN output - face_det_nn.passthrough.link(image_manip_script.inputs['passthrough']) - copy_manip.out.link(image_manip_script.inputs['preview']) - - image_manip_script.setScript(""" - import time - msgs = dict() - - def add_msg(msg, name, seq = None): - global msgs - if seq is None: - seq = msg.getSequenceNum() - seq = str(seq) - # node.warn(f"New msg {name}, seq {seq}") - - # Each seq number has it's own dict of msgs - if seq not in msgs: - msgs[seq] = dict() - msgs[seq][name] = msg - - # To avoid freezing (not necessary for this ObjDet model) - if 15 < len(msgs): - node.warn(f"Removing first element! len {len(msgs)}") - msgs.popitem() # Remove first element - - def get_msgs(): - global msgs - seq_remove = [] # Arr of sequence numbers to get deleted - for seq, syncMsgs in msgs.items(): - seq_remove.append(seq) # Will get removed from dict if we find synced msgs pair - # node.warn(f"Checking sync {seq}") - - # Check if we have both detections and color frame with this sequence number - if len(syncMsgs) == 2: # 1 frame, 1 detection - for rm in seq_remove: - del msgs[rm] - # node.warn(f"synced {seq}. Removed older sync values. len {len(msgs)}") - return syncMsgs # Returned synced msgs - return None - - def correct_bb(bb): - if bb.xmin < 0: bb.xmin = 0.001 - if bb.ymin < 0: bb.ymin = 0.001 - if bb.xmax > 1: bb.xmax = 0.999 - if bb.ymax > 1: bb.ymax = 0.999 - return bb - - while True: - time.sleep(0.001) # Avoid lazy looping - - preview = node.io['preview'].tryGet() - if preview is not None: - add_msg(preview, 'preview') - - face_dets = node.io['face_det_in'].tryGet() - if face_dets is not None: - # TODO: in 2.18.0.0 use face_dets.getSequenceNum() - passthrough = node.io['passthrough'].get() - seq = passthrough.getSequenceNum() - add_msg(face_dets, 'dets', seq) - - sync_msgs = get_msgs() - if sync_msgs is not None: - img = sync_msgs['preview'] - dets = sync_msgs['dets'] - for i, det in enumerate(dets.detections): - cfg = ImageManipConfig() - correct_bb(det) - cfg.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax) - # node.warn(f"Sending {i + 1}. age/gender det. Seq {seq}. Det {det.xmin}, {det.ymin}, {det.xmax}, {det.ymax}") - cfg.setResize(64, 64) - cfg.setKeepAspectRatio(False) - node.io['manip_cfg'].send(cfg) - node.io['manip_img'].send(img) - """) - - manip_manip = pipeline.create(dai.node.ImageManip) - manip_manip.initialConfig.setResize(64, 64) - manip_manip.setWaitForConfigInput(True) - image_manip_script.outputs['manip_cfg'].link(manip_manip.inputConfig) - image_manip_script.outputs['manip_img'].link(manip_manip.inputImage) - - # This ImageManip will crop the mono frame based on the NN detections. Resulting image will be the cropped - # face that was detected by the face-detection NN. - emotions_nn = pipeline.create(dai.node.NeuralNetwork) - emotions_nn.setBlobPath(blobconverter.from_zoo(name="emotions-recognition-retail-0003", shaves=6)) - manip_manip.out.link(emotions_nn.input) - - recognition_xout = pipeline.create(dai.node.XLinkOut) - recognition_xout.setStreamName("recognition") - emotions_nn.out.link(recognition_xout.input) - - return pipeline - -with dai.Device() as device: - stereo = 1 < len(device.getConnectedCameras()) - device.startPipeline(create_pipeline(stereo)) - - sync = TwoStageHostSeqSync() - queues = {} - # Create output queues - for name in ["color", "detection", "recognition"]: - queues[name] = device.getOutputQueue(name) - - while True: - for name, q in queues.items(): - # Add all msgs (color frames, object detections and age/gender recognitions) to the Sync class. - if q.has(): - sync.add_msg(q.get(), name) +with OakCamera() as oak: + color = oak.create_camera('color') + det = oak.create_nn('face-detection-retail-0004', color) + # Passthrough is enabled for debugging purposes + # AspectRatioResizeMode has to be CROP for 2-stage pipelines at the moment + det.config_nn(aspect_ratio_resize_mode=AspectRatioResizeMode.CROP) - msgs = sync.get_msgs() - if msgs is not None: - frame = msgs["color"].getCvFrame() - detections = msgs["detection"].detections - recognitions = msgs["recognition"] + emotion_nn = oak.create_nn('emotions-recognition-retail-0003', input=det) + # emotion_nn.config_multistage_nn(show_cropped_frames=True) # For debugging - for i, detection in enumerate(detections): - bbox = frame_norm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) + def cb(packet: TwoStagePacket, visualizer): + for det, rec in zip(packet.detections, packet.nnData): + emotion_results = np.array(rec.getFirstLayerFp16()) + emotion_name = emotions[np.argmax(emotion_results)] - rec = recognitions[i] + visualizer.add_text(emotion_name, + bbox=(*det.top_left, *det.bottom_right), + position=TextPosition.BOTTOM_RIGHT) - emotion_results = np.array(rec.getFirstLayerFp16()) - emotion_name = emotions[np.argmax(emotion_results)] + visualizer.draw(packet.frame) + cv2.imshow(packet.name, packet.frame) - cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (10, 245, 10), 2) - y = (bbox[1] + bbox[3]) // 2 - cv2.putText(frame, emotion_name, (bbox[0], y), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (0, 0, 0), 8) - cv2.putText(frame, emotion_name, (bbox[0], y), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (255, 255, 255), 2) - if stereo: - # You could also get detection.spatialCoordinates.x and detection.spatialCoordinates.y coordinates - coords = "Z: {:.2f} m".format(detection.spatialCoordinates.z/1000) - cv2.putText(frame, coords, (bbox[0], y + 35), cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 0, 0), 8) - cv2.putText(frame, coords, (bbox[0], y + 35), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 255), 2) - cv2.imshow("Camera", frame) - if cv2.waitKey(1) == ord('q'): - break + # Visualize detections on the frame. Also display FPS on the frame. Don't show the frame but send the packet + # to the callback function (where it will be displayed) + oak.visualize(emotion_nn, callback=cb, fps=True) + oak.visualize(det.out.passthrough) + # oak.show_graph() # Show pipeline graph, no need for now + oak.start(blocking=True) # This call will block until the app is stopped (by pressing 'Q' button) diff --git a/gen2-emotion-recognition/requirements.txt b/gen2-emotion-recognition/requirements.txt index 14c0db9ac..ce7f5340f 100644 --- a/gen2-emotion-recognition/requirements.txt +++ b/gen2-emotion-recognition/requirements.txt @@ -1,4 +1 @@ -opencv-python==4.5.1.48 -depthai==2.17.0.0 -blobconverter==1.3.0 -numpy \ No newline at end of file +depthai-sdk==1.9.1 diff --git a/gen2-facemesh/sdk/main.py b/gen2-facemesh/sdk/main.py new file mode 100644 index 000000000..d8c558c6d --- /dev/null +++ b/gen2-facemesh/sdk/main.py @@ -0,0 +1,55 @@ +import cv2 +import depthai as dai +import numpy as np + +from depthai_sdk import OakCamera +from utils.effect import EffectRenderer2D + +CONF_THRESH = 0.5 +NN_WIDTH, NN_HEIGHT = 192, 192 +PREVIEW_WIDTH, PREVIEW_HEIGHT = 416, 416 +OVERLAY_IMAGE = '../mask/facepaint.png' + +effect_rendered = EffectRenderer2D(OVERLAY_IMAGE) + + +def callback(packet, visualizer): + frame = packet.frame + nn_data = packet.img_detections + + score = np.array(nn_data.getLayerFp16('conv2d_31')).reshape((1,)) + score = 1 / (1 + np.exp(-score[0])) # sigmoid on score + landmarks = np.array(nn_data.getLayerFp16('conv2d_21')).reshape((468, 3)) + + if score > CONF_THRESH: + # scale landmarks + ldms = landmarks # .copy() + ar_diff = (PREVIEW_WIDTH / PREVIEW_HEIGHT) / (frame.shape[1] / frame.shape[0]) + + ldms *= np.array([frame.shape[0] / NN_HEIGHT, frame.shape[0] / NN_HEIGHT, 1]) + ldms[:, 0] += (frame.shape[1] - frame.shape[1] * ar_diff) / 2 + + # render frame + target_frame = frame.copy() + applied_effect = effect_rendered.render_effect(target_frame, ldms) + + # show landmarks on frame + for ldm in ldms: + col = (0, 0, int(ldm[2]) * 5 + 100) + cv2.circle(frame, (int(ldm[0]), int(ldm[1])), 1, col, 1) + + else: + applied_effect = frame + + cv2.imshow("FaceMesh", np.hstack([frame, applied_effect])) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + color.config_color_camera(interleaved=False, color_order=dai.ColorCameraProperties.ColorOrder.RGB) + + nn = oak.create_nn('../models/face_landmark_openvino_2021.4_6shave.blob', color) + nn.config_nn(aspect_ratio_resize_mode='crop') + + oak.callback(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-facemesh/sdk/requirements.txt b/gen2-facemesh/sdk/requirements.txt new file mode 100644 index 000000000..6ed8c140d --- /dev/null +++ b/gen2-facemesh/sdk/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +numpy +depthai +depthai-sdk \ No newline at end of file diff --git a/gen2-facemesh/sdk/utils/effect.py b/gen2-facemesh/sdk/utils/effect.py new file mode 100644 index 000000000..7031ae342 --- /dev/null +++ b/gen2-facemesh/sdk/utils/effect.py @@ -0,0 +1,92 @@ +import cv2 +import numpy as np + + +class EffectRenderer2D: + def __init__(self, + effect_image_path, + src_points_path="../res/source_landmarks.npy", + filter_points_path="../res/filter_points.npy"): + self.effect_image = cv2.imread(effect_image_path, cv2.IMREAD_UNCHANGED) + self.src_points = np.load(src_points_path) + + self.filter_points = np.load(filter_points_path) + self.src_points = self.src_points[self.filter_points] + + self.subdiv = cv2.Subdiv2D((0, 0, self.effect_image.shape[1], self.effect_image.shape[0])) + self.subdiv.insert(self.src_points.tolist()) + + self.triangles = np.array([(self.src_points == value).all(axis=1).nonzero() + for element in self.subdiv.getTriangleList() + for value in element.reshape((3, 2))]) + self.triangles = self.triangles.reshape(len(self.triangles) // 3, 3) + + def render_effect(self, target_image, target_landmarks): + ldms = target_landmarks[self.filter_points] + effect = self.create_effect(target_image, ldms) + return self.overlay_image(target_image, effect) + + def create_effect(self, target_image, dst_points): + """ + Creates effect image that can be rendered on the target image + :param target_image: target image + :param dst_points: landmarks on the target image, should be of the same size and order as self.filter_points + :return: effect image + """ + + # create empty overlay + overlay = np.zeros((target_image.shape[0], target_image.shape[1], 4), np.uint8) + + for idx_tri in self.triangles: + src_tri = self.src_points[idx_tri] + dst_tri_full = dst_points[idx_tri] + dst_tri = dst_tri_full[:, :2].astype(np.int32) + + src_tri_crop, src_crop = self.crop_triangle_bb(self.effect_image, src_tri) + dst_tri_crop, overlay_crop = self.crop_triangle_bb(overlay, dst_tri) + + warp_mat = cv2.getAffineTransform(np.float32(src_tri_crop), np.float32(dst_tri_crop)) + warp = cv2.warpAffine(src_crop, warp_mat, (overlay_crop.shape[1], overlay_crop.shape[0]), + None, flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101) + + mask = np.zeros((overlay_crop.shape[0], overlay_crop.shape[1], 4), dtype=np.uint8) + cv2.fillConvexPoly(mask, np.int32(dst_tri_crop), (1.0, 1.0, 1.0, 1.0), 16, 0) + mask[np.where(overlay_crop > 0)] = 0 + + cropped_triangle = warp * mask + overlay_crop += cropped_triangle + + return overlay + + def overlay_image(self, background_image, foreground_image, blur=0): + """ + Take the two images, and produce an image where foreground image overlays the background image + :param background_image: background BRG or BGRA image with 0-255 values, transparency will be ignored in the result + :param foreground_image: foreground BGRA image with 0-255 values + :return: BGR image with foreground image overlaying the background image + """ + + mask = foreground_image[:, :, 3] + + if blur > 0: + mask = cv2.medianBlur(mask, blur) + + mask_inv = 255 - mask + + overlayed = foreground_image[:, :, :3] * np.dstack([mask / 255.0] * 3) + \ + background_image[:, :, :3] * np.dstack([mask_inv / 255.0] * 3) + overlayed = overlayed.astype(np.uint8) + + return overlayed + + def crop_triangle_bb(self, image, triangle): + """ + Create a trinagle bounding box and return cropped image. + :param image: Target image + :param triangle: Triangle coordinates (3x2 array) + :return: Tupple (Triangle crop coordinates relative to the cropped image, cropped image) + """ + x, y, w, h = cv2.boundingRect(triangle) + crop = image[y:y + h, x:x + w] + triangle_crop = triangle - np.array([x, y]) + return triangle_crop, crop diff --git a/gen2-fast-depth/sdk/main.py b/gen2-fast-depth/sdk/main.py new file mode 100644 index 000000000..8767ba300 --- /dev/null +++ b/gen2-fast-depth/sdk/main.py @@ -0,0 +1,41 @@ +import blobconverter +import cv2 +import numpy as np + +from depthai_sdk import OakCamera + +NN_WIDTH, NN_HEIGHT = 640, 480 + + +def callback(packet, visualizer): + nn_data = packet.img_detections + pred = np.array(nn_data.getFirstLayerFp16()).reshape((NN_HEIGHT, NN_WIDTH)) + + d_min = np.min(pred) + d_max = np.max(pred) + depth_relative = (pred - d_min) / (d_max - d_min) + + # Color it + depth_relative = np.array(depth_relative) * 255 + depth_relative = depth_relative.astype(np.uint8) + depth_relative = 255 - depth_relative + depth_relative = cv2.applyColorMap(depth_relative, cv2.COLORMAP_INFERNO) + + target_width = int(NN_WIDTH) + target_height = int(NN_HEIGHT * (NN_WIDTH / NN_HEIGHT) / (16 / 9)) + + frame = cv2.resize(packet.frame, (target_width, target_height)) + depth_relative = cv2.resize(depth_relative, (target_width, target_height)) + + cv2.imshow('Fast depth', cv2.hconcat([frame, depth_relative])) + + +with OakCamera() as oak: + color = oak.create_camera('color') + + fast_depth_path = blobconverter.from_zoo(name='fast_depth_480x640', zoo_type='depthai') + fast_depth_nn = oak.create_nn(fast_depth_path, color) + fast_depth_nn.config_nn(aspect_ratio_resize_mode='stretch') + + oak.callback(fast_depth_nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-fast-depth/sdk/requirements.txt b/gen2-fast-depth/sdk/requirements.txt new file mode 100644 index 000000000..5a80639ac --- /dev/null +++ b/gen2-fast-depth/sdk/requirements.txt @@ -0,0 +1,5 @@ +opencv-python~=4.5.1.48 +depthai +depthai-sdk +numpy~=1.19.5 +blobconverter \ No newline at end of file diff --git a/gen2-fatigue-detection/sdk/face_landmarks.py b/gen2-fatigue-detection/sdk/face_landmarks.py new file mode 100644 index 000000000..224f184a1 --- /dev/null +++ b/gen2-fatigue-detection/sdk/face_landmarks.py @@ -0,0 +1,184 @@ +import math + +import cv2 +import numpy as np +from scipy.spatial.distance import euclidean + + +class FaceLandmarks: + def __init__(self): + self.COUNTER = 0 + self.mCOUNTER = 0 + self.hCOUNTER = 0 + self.TOTAL = 0 + self.mTOTAL = 0 + self.hTOTAL = 0 + self.frame = None + + def run_land68(self, face_frame, nndata, face_coords_float): + # try: + out = np.array(nndata.getFirstLayerFp16()) + face_coords = self.normalize_face_coords(face_frame, face_coords_float) + + self.frame = face_frame + result = self.frame_norm(face_frame, *out) + eye_left = [] + eye_right = [] + mouth = [] + hand_points = [] + for i in range(72, 84, 2): + eye_left.append((out[i], out[i + 1])) + for i in range(84, 96, 2): + eye_right.append((out[i], out[i + 1])) + for i in range(96, len(result), 2): + if i == 100 or i == 116 or i == 104 or i == 112 or i == 96 or i == 108: + mouth.append(np.array([out[i], out[i + 1]])) + + for i in range(16, 110, 2): + if i == 16 or i == 60 or i == 72 or i == 90 or i == 96 or i == 108: + hand_points.append((result[i] + face_coords[0], result[i + 1] + face_coords[1])) + + # Whether dead is tilted downwards + ret, rotation_vector, translation_vector, camera_matrix, dist_coeffs = self.get_pose_estimation( + self.frame.shape, np.array(hand_points, dtype='double') + ) + ret, pitch, yaw, roll = self.get_euler_angle(rotation_vector) + if pitch < 0: + self.hCOUNTER += 1 + if self.hCOUNTER >= 20: + cv2.putText(self.frame, "SLEEP!!!", (face_coords[0], face_coords[1] - 10), cv2.FONT_HERSHEY_COMPLEX, 1, + (0, 0, 255), 3) + else: + if self.hCOUNTER >= 3: + self.hTOTAL += 1 + self.hCOUNTER = 0 + + # Whether eyes are closed (based on eye aspect ratio) + + left_ear = self.eye_aspect_ratio(eye_left) + right_ear = self.eye_aspect_ratio(eye_right) + ear = (left_ear + right_ear) / 2.0 + if ear < 0.15: # Eye aspect ratio:0.15 + self.COUNTER += 1 + if self.COUNTER >= 20: + cv2.putText(self.frame, "SLEEP!!!", (face_coords[0], face_coords[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, + (0, 0, 255), 3) + else: + # If it is less than the threshold 3 times in a row, it means that an eye blink has been performed + if self.COUNTER >= 3: # Threshold: 3 + self.TOTAL += 1 + # Reset the eye frame counter + self.COUNTER = 0 + + # Yawning detection - based on mouth aspect ratio + mouth_ratio = self.mouth_aspect_ratio(mouth) + if mouth_ratio > 0.5: + self.mCOUNTER += 1 + else: + if self.mCOUNTER >= 3: + self.mTOTAL += 1 + self.mCOUNTER = 0 + + cv2.putText(self.frame, "Eye:{:d}, mouth:{:d}, head:{:d}".format(self.TOTAL, self.mTOTAL, self.hTOTAL), + (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2) + if self.TOTAL >= 50 or self.mTOTAL >= 15 or self.hTOTAL >= 10: + cv2.putText(self.frame, "Danger!!!", (100, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3) + + def frame_norm(self, frame, *xy_vals): + height, width = frame.shape[:2] + result = [] + for i, val in enumerate(xy_vals): + if i % 2 == 0: + result.append(max(0, min(width, int(val * width)))) + else: + result.append(max(0, min(height, int(val * height)))) + return result + + def normalize_face_coords(self, frame, bbox): + normVals = np.full(len(bbox), frame.shape[0]) + normVals[::2] = frame.shape[1] + return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) + + def mouth_aspect_ratio(self, mouth): + A = np.linalg.norm(mouth[1] - mouth[5]) # 51, 59 + B = np.linalg.norm(mouth[2] - mouth[4]) # 53, 57 + C = np.linalg.norm(mouth[0] - mouth[3]) # 49, 55 + mar = (A + B) / (2.0 * C) + return mar + + def eye_aspect_ratio(self, eye): + A = euclidean(eye[1], eye[5]) + B = euclidean(eye[2], eye[4]) + C = euclidean(eye[0], eye[3]) + return (A + B) / (2.0 * C) + + # Get rotation vector and translation vector + + def get_pose_estimation(self, img_size, image_points): + # 3D model points. + model_points = np.array([ + (0.0, -330.0, -65.0), # Chin + (0.0, 0.0, 0.0), # Nose tip + (-225.0, 170.0, -135.0), # Left eye left corner + (225.0, 170.0, -135.0), # Right eye right corne + (-150.0, -150.0, -125.0), # Left Mouth corner + (150.0, -150.0, -125.0) # Right mouth corner + ]) + + # Camera internals + focal_length = img_size[1] + center = (img_size[1] / 2, img_size[0] / 2) + camera_matrix = np.array( + [[focal_length, 0, center[0]], + [0, focal_length, center[1]], + [0, 0, 1]], dtype="double" + ) + + # print("Camera Matrix :{}".format(camera_matrix)) + # print(image_points) + dist_coeffs = np.zeros((4, 1)) # Assuming no lens distortion + (success, rotation_vector, translation_vector) = cv2.solvePnP(model_points, image_points, camera_matrix, + dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE) + # print("Rotation Vector:\n {}".format(rotation_vector)) + # print("Translation Vector:\n {}".format(translation_vector)) + return success, rotation_vector, translation_vector, camera_matrix, dist_coeffs + + # Convert from rotation vector to Euler angle + def get_euler_angle(self, rotation_vector): + # calculate rotation angles + theta = cv2.norm(rotation_vector, cv2.NORM_L2) + + # transformed to quaterniond + w = math.cos(theta / 2) + x = math.sin(theta / 2) * rotation_vector[0][0] / theta + y = math.sin(theta / 2) * rotation_vector[1][0] / theta + z = math.sin(theta / 2) * rotation_vector[2][0] / theta + + ysqr = y * y + # pitch (x-axis rotation) + t0 = 2.0 * (w * x + y * z) + t1 = 1.0 - 2.0 * (x * x + ysqr) + # print('t0:{}, t1:{}'.format(t0, t1)) + pitch = math.atan2(t0, t1) + + # yaw (y-axis rotation) + t2 = 2.0 * (w * y - z * x) + if t2 > 1.0: + t2 = 1.0 + if t2 < -1.0: + t2 = -1.0 + yaw = math.asin(t2) + + # roll (z-axis rotation) + t3 = 2.0 * (w * z + x * y) + t4 = 1.0 - 2.0 * (ysqr + z * z) + roll = math.atan2(t3, t4) + + # print('pitch:{}, yaw:{}, roll:{}'.format(pitch, yaw, roll)) + + # Unit conversion: convert radians to degrees + Y = int((pitch / math.pi) * 180) + X = int((yaw / math.pi) * 180) + Z = int((roll / math.pi) * 180) + + return 0, Y, X, Z diff --git a/gen2-fatigue-detection/sdk/main.py b/gen2-fatigue-detection/sdk/main.py new file mode 100644 index 000000000..eff135b0c --- /dev/null +++ b/gen2-fatigue-detection/sdk/main.py @@ -0,0 +1,30 @@ +import blobconverter +import cv2 + +import face_landmarks +from depthai_sdk import OakCamera + +decode = face_landmarks.FaceLandmarks() + + +def callback(packet, visualizer): + for nn_data, detection in zip(packet.nnData, packet.img_detections.detections): + bbox = detection.xmin, detection.ymin, detection.xmax, detection.ymax + decode.run_land68(packet.frame, nn_data, bbox) + + cv2.imshow('Fatigue Detection', packet.frame) + + +with OakCamera() as oak: + color = oak.create_camera('color') + + face_detection = oak.create_nn('face-detection-retail-0004', color) + + landmark_nn_path = blobconverter.from_zoo(name='facial_landmarks_68_160x160', + shaves=6, + zoo_type='depthai', + version='2021.4') + landmark_detection = oak.create_nn(landmark_nn_path, face_detection) + + oak.callback(landmark_detection, callback=callback) + oak.start(blocking=True) diff --git a/gen2-fatigue-detection/sdk/requirements.txt b/gen2-fatigue-detection/sdk/requirements.txt new file mode 100644 index 000000000..fc0c401ad --- /dev/null +++ b/gen2-fatigue-detection/sdk/requirements.txt @@ -0,0 +1,6 @@ +scipy +numpy>=1.21 +opencv-python +depthai +depthai-sdk +blobconverter diff --git a/gen2-full-fov-nn/README.md b/gen2-full-fov-nn/README.md index 7d4d31922..1035201a2 100644 --- a/gen2-full-fov-nn/README.md +++ b/gen2-full-fov-nn/README.md @@ -24,14 +24,6 @@ Here we have 3 options: ![stretched](https://user-images.githubusercontent.com/18037362/180607962-e616cdc7-fcad-4bc8-a15f-617b89a2c047.jpg) -## ISP vs Video vs Preview at 12MP - -![Isp vs Video vs Preview](https://user-images.githubusercontent.com/18037362/180610776-854c5215-8b59-4300-81d8-0014847a04bc.jpg) - -Image above is the `isp` output frame from the `ColorCamera` (12MP from IMX378). Blue rectangle represents the cropped 4K -`video` output, and yellow rectangle represents cropped `preview` output when preview size is set to 1:1 aspect ratio -(eg. when using 300x300 MobileNet-SSD NN model). [Source code here](https://gist.github.com/Erol444/56e23ec203a122d540ebc4d01d894d44). - ## Installation ``` diff --git a/gen2-full-fov-nn/api-cropping.py b/gen2-full-fov-nn/api-cropping.py new file mode 100644 index 000000000..f814f63f4 --- /dev/null +++ b/gen2-full-fov-nn/api-cropping.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +import cv2 +import depthai as dai +import blobconverter +import numpy as np + +# MobilenetSSD label texts +labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", + "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + +# Create pipeline +pipeline = dai.Pipeline() + +# Define source and output +camRgb = pipeline.create(dai.node.ColorCamera) +camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP) +camRgb.setInterleaved(False) +camRgb.setIspScale(1,5) # 4056x3040 -> 812x608 +camRgb.setPreviewSize(608, 608) +camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB) +# Slightly lower FPS to avoid lag, as ISP takes more resources at 12MP +camRgb.setFps(25) + +xoutIsp = pipeline.create(dai.node.XLinkOut) +xoutIsp.setStreamName("isp") +camRgb.isp.link(xoutIsp.input) + +# Use ImageManip to resize to 300x300 with letterboxing +manip = pipeline.createImageManip() +manip.setResize(300,300) +manip.setMaxOutputFrameSize(270000) # 300x300x3 +camRgb.preview.link(manip.inputImage) + +# NN to demonstrate how to run inference on full FOV frames +nn = pipeline.create(dai.node.MobileNetDetectionNetwork) +nn.setConfidenceThreshold(0.5) +nn.setBlobPath(str(blobconverter.from_zoo(name="mobilenet-ssd", shaves=6))) +manip.out.link(nn.input) + +xoutNn = pipeline.create(dai.node.XLinkOut) +xoutNn.setStreamName("nn") +nn.out.link(xoutNn.input) + +xoutRgb = pipeline.create(dai.node.XLinkOut) +xoutRgb.setStreamName("rgb") +nn.passthrough.link(xoutRgb.input) + + +with dai.Device(pipeline) as device: + qRgb = device.getOutputQueue(name='rgb') + qNn = device.getOutputQueue(name='nn') + qIsp = device.getOutputQueue(name='isp') + + def frameNorm(frame, bbox): + normVals = np.full(len(bbox), frame.shape[0]) + normVals[::2] = frame.shape[1] + return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) + + def displayFrame(name, frame, detections): + color = (255, 0, 0) + for detection in detections: + bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) + cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) + cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) + cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) + cv2.imshow(name, frame) + + while True: + if qNn.has(): + dets = qNn.get().detections + frame = qRgb.get() + f = frame.getCvFrame() + displayFrame("Cropping", f, dets) + if qIsp.has(): + frame = qIsp.get() + f = frame.getCvFrame() + cv2.putText(f, str(f.shape), (20, 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255,255,255)) + cv2.imshow("ISP", f) + + if cv2.waitKey(1) == ord('q'): + break diff --git a/gen2-full-fov-nn/api-letterboxing.py b/gen2-full-fov-nn/api-letterboxing.py new file mode 100644 index 000000000..319df8e11 --- /dev/null +++ b/gen2-full-fov-nn/api-letterboxing.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +import cv2 +import depthai as dai +import blobconverter +import numpy as np + +# MobilenetSSD label texts +labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", + "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + +# Create pipeline +pipeline = dai.Pipeline() + +# Define source and output +camRgb = pipeline.create(dai.node.ColorCamera) +camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP) +camRgb.setInterleaved(False) +camRgb.setIspScale(1,5) # 4056x3040 -> 812x608 +camRgb.setPreviewSize(812, 608) +camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB) +# Slightly lower FPS to avoid lag, as ISP takes more resources at 12MP +camRgb.setFps(25) + +xoutIsp = pipeline.create(dai.node.XLinkOut) +xoutIsp.setStreamName("isp") +camRgb.isp.link(xoutIsp.input) + +# Use ImageManip to resize to 300x300 with letterboxing +manip = pipeline.create(dai.node.ImageManip) +manip.setMaxOutputFrameSize(270000) # 300x300x3 +manip.initialConfig.setResizeThumbnail(300, 300) +camRgb.preview.link(manip.inputImage) + +# NN to demonstrate how to run inference on full FOV frames +nn = pipeline.create(dai.node.MobileNetDetectionNetwork) +nn.setConfidenceThreshold(0.5) +nn.setBlobPath(str(blobconverter.from_zoo(name="mobilenet-ssd", shaves=6))) +manip.out.link(nn.input) + +xoutNn = pipeline.create(dai.node.XLinkOut) +xoutNn.setStreamName("nn") +nn.out.link(xoutNn.input) + +xoutRgb = pipeline.create(dai.node.XLinkOut) +xoutRgb.setStreamName("rgb") +nn.passthrough.link(xoutRgb.input) + + +with dai.Device(pipeline) as device: + qRgb = device.getOutputQueue(name='rgb') + qNn = device.getOutputQueue(name='nn') + qIsp = device.getOutputQueue(name='isp') + + def frameNorm(frame, bbox): + normVals = np.full(len(bbox), frame.shape[0]) + normVals[::2] = frame.shape[1] + return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) + + def displayFrame(name, frame, detections): + color = (255, 0, 0) + for detection in detections: + bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) + cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) + cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) + cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) + cv2.imshow(name, frame) + + while True: + if qNn.has(): + dets = qNn.get().detections + frame = qRgb.get() + f = frame.getCvFrame() + displayFrame("Letterboxing", f, dets) + if qIsp.has(): + frame = qIsp.get() + f = frame.getCvFrame() + cv2.putText(f, str(f.shape), (20, 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255,255,255)) + cv2.imshow("ISP", f) + + if cv2.waitKey(1) == ord('q'): + break diff --git a/gen2-full-fov-nn/api-stretching.py b/gen2-full-fov-nn/api-stretching.py new file mode 100644 index 000000000..2f9b6b7f7 --- /dev/null +++ b/gen2-full-fov-nn/api-stretching.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +import cv2 +import depthai as dai +import blobconverter +import numpy as np + +# MobilenetSSD label texts +labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", + "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + +# Create pipeline +pipeline = dai.Pipeline() + +# Define source and output +camRgb = pipeline.create(dai.node.ColorCamera) +camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP) +camRgb.setInterleaved(False) +camRgb.setIspScale(1,5) # 4056x3040 -> 812x608 +camRgb.setPreviewSize(812, 608) +camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB) +# Slightly lower FPS to avoid lag, as ISP takes more resources at 12MP +camRgb.setFps(25) + +xoutIsp = pipeline.create(dai.node.XLinkOut) +xoutIsp.setStreamName("isp") +camRgb.isp.link(xoutIsp.input) + +# Use ImageManip to resize to 300x300 with letterboxing +manip = pipeline.createImageManip() +manip.setResize(300,300) +manip.setMaxOutputFrameSize(270000) # 300x300x3 +manip.initialConfig.setKeepAspectRatio(False) # Stretching the image +camRgb.preview.link(manip.inputImage) + +# NN to demonstrate how to run inference on full FOV frames +nn = pipeline.create(dai.node.MobileNetDetectionNetwork) +nn.setConfidenceThreshold(0.5) +nn.setBlobPath(str(blobconverter.from_zoo(name="mobilenet-ssd", shaves=6))) +manip.out.link(nn.input) + +xoutNn = pipeline.create(dai.node.XLinkOut) +xoutNn.setStreamName("nn") +nn.out.link(xoutNn.input) + +xoutRgb = pipeline.create(dai.node.XLinkOut) +xoutRgb.setStreamName("rgb") +nn.passthrough.link(xoutRgb.input) + + +with dai.Device(pipeline) as device: + qRgb = device.getOutputQueue(name='rgb') + qNn = device.getOutputQueue(name='nn') + qIsp = device.getOutputQueue(name='isp') + + def frameNorm(frame, bbox): + normVals = np.full(len(bbox), frame.shape[0]) + normVals[::2] = frame.shape[1] + return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) + + def displayFrame(name, frame, detections): + color = (255, 0, 0) + for detection in detections: + bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) + cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) + cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) + cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) + cv2.imshow(name, frame) + dets = [] + while True: + if qNn.has(): + dets = qNn.get().detections + if qRgb.has(): + frame = qRgb.get() + f = frame.getCvFrame() + displayFrame("Stretched frame", f, dets) + if qIsp.has(): + frame = qIsp.get() + f = frame.getCvFrame() + cv2.putText(f, str(f.shape), (20, 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255,255,255)) + displayFrame("ISP", f, dets) + + if cv2.waitKey(1) == ord('q'): + break diff --git a/gen2-full-fov-nn/cropping.py b/gen2-full-fov-nn/cropping.py index f814f63f4..4a5980ca3 100644 --- a/gen2-full-fov-nn/cropping.py +++ b/gen2-full-fov-nn/cropping.py @@ -1,82 +1,8 @@ -#!/usr/bin/env python3 - -import cv2 -import depthai as dai -import blobconverter -import numpy as np - -# MobilenetSSD label texts -labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", - "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] - -# Create pipeline -pipeline = dai.Pipeline() - -# Define source and output -camRgb = pipeline.create(dai.node.ColorCamera) -camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP) -camRgb.setInterleaved(False) -camRgb.setIspScale(1,5) # 4056x3040 -> 812x608 -camRgb.setPreviewSize(608, 608) -camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB) -# Slightly lower FPS to avoid lag, as ISP takes more resources at 12MP -camRgb.setFps(25) - -xoutIsp = pipeline.create(dai.node.XLinkOut) -xoutIsp.setStreamName("isp") -camRgb.isp.link(xoutIsp.input) - -# Use ImageManip to resize to 300x300 with letterboxing -manip = pipeline.createImageManip() -manip.setResize(300,300) -manip.setMaxOutputFrameSize(270000) # 300x300x3 -camRgb.preview.link(manip.inputImage) - -# NN to demonstrate how to run inference on full FOV frames -nn = pipeline.create(dai.node.MobileNetDetectionNetwork) -nn.setConfidenceThreshold(0.5) -nn.setBlobPath(str(blobconverter.from_zoo(name="mobilenet-ssd", shaves=6))) -manip.out.link(nn.input) - -xoutNn = pipeline.create(dai.node.XLinkOut) -xoutNn.setStreamName("nn") -nn.out.link(xoutNn.input) - -xoutRgb = pipeline.create(dai.node.XLinkOut) -xoutRgb.setStreamName("rgb") -nn.passthrough.link(xoutRgb.input) - - -with dai.Device(pipeline) as device: - qRgb = device.getOutputQueue(name='rgb') - qNn = device.getOutputQueue(name='nn') - qIsp = device.getOutputQueue(name='isp') - - def frameNorm(frame, bbox): - normVals = np.full(len(bbox), frame.shape[0]) - normVals[::2] = frame.shape[1] - return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) - - def displayFrame(name, frame, detections): - color = (255, 0, 0) - for detection in detections: - bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) - cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) - cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) - cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) - cv2.imshow(name, frame) - - while True: - if qNn.has(): - dets = qNn.get().detections - frame = qRgb.get() - f = frame.getCvFrame() - displayFrame("Cropping", f, dets) - if qIsp.has(): - frame = qIsp.get() - f = frame.getCvFrame() - cv2.putText(f, str(f.shape), (20, 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255,255,255)) - cv2.imshow("ISP", f) - - if cv2.waitKey(1) == ord('q'): - break +from depthai_sdk import OakCamera, AspectRatioResizeMode + +with OakCamera() as oak: + color = oak.create_camera('color') + nn = oak.create_nn('mobilenet-ssd', color) + nn.config_nn(aspect_ratio_resize_mode=AspectRatioResizeMode.CROP) + oak.visualize([nn, nn.out.passthrough], fps=True) + oak.start(blocking=True) diff --git a/gen2-full-fov-nn/letterboxing.py b/gen2-full-fov-nn/letterboxing.py index 319df8e11..ae9b51cee 100644 --- a/gen2-full-fov-nn/letterboxing.py +++ b/gen2-full-fov-nn/letterboxing.py @@ -1,82 +1,8 @@ -#!/usr/bin/env python3 - -import cv2 -import depthai as dai -import blobconverter -import numpy as np - -# MobilenetSSD label texts -labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", - "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] - -# Create pipeline -pipeline = dai.Pipeline() - -# Define source and output -camRgb = pipeline.create(dai.node.ColorCamera) -camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP) -camRgb.setInterleaved(False) -camRgb.setIspScale(1,5) # 4056x3040 -> 812x608 -camRgb.setPreviewSize(812, 608) -camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB) -# Slightly lower FPS to avoid lag, as ISP takes more resources at 12MP -camRgb.setFps(25) - -xoutIsp = pipeline.create(dai.node.XLinkOut) -xoutIsp.setStreamName("isp") -camRgb.isp.link(xoutIsp.input) - -# Use ImageManip to resize to 300x300 with letterboxing -manip = pipeline.create(dai.node.ImageManip) -manip.setMaxOutputFrameSize(270000) # 300x300x3 -manip.initialConfig.setResizeThumbnail(300, 300) -camRgb.preview.link(manip.inputImage) - -# NN to demonstrate how to run inference on full FOV frames -nn = pipeline.create(dai.node.MobileNetDetectionNetwork) -nn.setConfidenceThreshold(0.5) -nn.setBlobPath(str(blobconverter.from_zoo(name="mobilenet-ssd", shaves=6))) -manip.out.link(nn.input) - -xoutNn = pipeline.create(dai.node.XLinkOut) -xoutNn.setStreamName("nn") -nn.out.link(xoutNn.input) - -xoutRgb = pipeline.create(dai.node.XLinkOut) -xoutRgb.setStreamName("rgb") -nn.passthrough.link(xoutRgb.input) - - -with dai.Device(pipeline) as device: - qRgb = device.getOutputQueue(name='rgb') - qNn = device.getOutputQueue(name='nn') - qIsp = device.getOutputQueue(name='isp') - - def frameNorm(frame, bbox): - normVals = np.full(len(bbox), frame.shape[0]) - normVals[::2] = frame.shape[1] - return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) - - def displayFrame(name, frame, detections): - color = (255, 0, 0) - for detection in detections: - bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) - cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) - cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) - cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) - cv2.imshow(name, frame) - - while True: - if qNn.has(): - dets = qNn.get().detections - frame = qRgb.get() - f = frame.getCvFrame() - displayFrame("Letterboxing", f, dets) - if qIsp.has(): - frame = qIsp.get() - f = frame.getCvFrame() - cv2.putText(f, str(f.shape), (20, 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255,255,255)) - cv2.imshow("ISP", f) - - if cv2.waitKey(1) == ord('q'): - break +from depthai_sdk import OakCamera, AspectRatioResizeMode + +with OakCamera() as oak: + color = oak.create_camera('color') + nn = oak.create_nn('mobilenet-ssd', color) + nn.config_nn(aspect_ratio_resize_mode=AspectRatioResizeMode.LETTERBOX) + oak.visualize([nn, nn.out.passthrough], fps=True) + oak.start(blocking=True) \ No newline at end of file diff --git a/gen2-full-fov-nn/requirements.txt b/gen2-full-fov-nn/requirements.txt index 578fbd8cf..ce7f5340f 100644 --- a/gen2-full-fov-nn/requirements.txt +++ b/gen2-full-fov-nn/requirements.txt @@ -1,4 +1 @@ -opencv-python==4.5.1.48 -blobconverter==1.2.9 -numpy -depthai==2.16.0.0 +depthai-sdk==1.9.1 diff --git a/gen2-full-fov-nn/stretching.py b/gen2-full-fov-nn/stretching.py index 2f9b6b7f7..2950aa63f 100644 --- a/gen2-full-fov-nn/stretching.py +++ b/gen2-full-fov-nn/stretching.py @@ -1,84 +1,8 @@ -#!/usr/bin/env python3 - -import cv2 -import depthai as dai -import blobconverter -import numpy as np - -# MobilenetSSD label texts -labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", - "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] - -# Create pipeline -pipeline = dai.Pipeline() - -# Define source and output -camRgb = pipeline.create(dai.node.ColorCamera) -camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP) -camRgb.setInterleaved(False) -camRgb.setIspScale(1,5) # 4056x3040 -> 812x608 -camRgb.setPreviewSize(812, 608) -camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB) -# Slightly lower FPS to avoid lag, as ISP takes more resources at 12MP -camRgb.setFps(25) - -xoutIsp = pipeline.create(dai.node.XLinkOut) -xoutIsp.setStreamName("isp") -camRgb.isp.link(xoutIsp.input) - -# Use ImageManip to resize to 300x300 with letterboxing -manip = pipeline.createImageManip() -manip.setResize(300,300) -manip.setMaxOutputFrameSize(270000) # 300x300x3 -manip.initialConfig.setKeepAspectRatio(False) # Stretching the image -camRgb.preview.link(manip.inputImage) - -# NN to demonstrate how to run inference on full FOV frames -nn = pipeline.create(dai.node.MobileNetDetectionNetwork) -nn.setConfidenceThreshold(0.5) -nn.setBlobPath(str(blobconverter.from_zoo(name="mobilenet-ssd", shaves=6))) -manip.out.link(nn.input) - -xoutNn = pipeline.create(dai.node.XLinkOut) -xoutNn.setStreamName("nn") -nn.out.link(xoutNn.input) - -xoutRgb = pipeline.create(dai.node.XLinkOut) -xoutRgb.setStreamName("rgb") -nn.passthrough.link(xoutRgb.input) - - -with dai.Device(pipeline) as device: - qRgb = device.getOutputQueue(name='rgb') - qNn = device.getOutputQueue(name='nn') - qIsp = device.getOutputQueue(name='isp') - - def frameNorm(frame, bbox): - normVals = np.full(len(bbox), frame.shape[0]) - normVals[::2] = frame.shape[1] - return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) - - def displayFrame(name, frame, detections): - color = (255, 0, 0) - for detection in detections: - bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) - cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) - cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) - cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) - cv2.imshow(name, frame) - dets = [] - while True: - if qNn.has(): - dets = qNn.get().detections - if qRgb.has(): - frame = qRgb.get() - f = frame.getCvFrame() - displayFrame("Stretched frame", f, dets) - if qIsp.has(): - frame = qIsp.get() - f = frame.getCvFrame() - cv2.putText(f, str(f.shape), (20, 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255,255,255)) - displayFrame("ISP", f, dets) - - if cv2.waitKey(1) == ord('q'): - break +from depthai_sdk import OakCamera, AspectRatioResizeMode + +with OakCamera() as oak: + color = oak.create_camera('color') + nn = oak.create_nn('mobilenet-ssd', color) + nn.config_nn(aspect_ratio_resize_mode=AspectRatioResizeMode.STRETCH) + oak.visualize([nn, nn.out.passthrough], fps=True) + oak.start(blocking=True) \ No newline at end of file diff --git a/gen2-head-posture-detection/MultiMsgSync.py b/gen2-head-posture-detection/MultiMsgSync.py deleted file mode 100644 index c75fe307b..000000000 --- a/gen2-head-posture-detection/MultiMsgSync.py +++ /dev/null @@ -1,56 +0,0 @@ -# Color frames (ImgFrame), object detection (ImgDetections) and recognition (NNData) -# messages arrive to the host all with some additional delay. -# For each ImgFrame there's one ImgDetections msg, which has multiple detections, and for each -# detection there's a NNData msg which contains recognition results. - -# How it works: -# Every ImgFrame, ImgDetections and NNData message has it's own sequence number, by which we can sync messages. - -class TwoStageHostSeqSync: - def __init__(self): - self.msgs = {} - # name: color, detection, or recognition - def add_msg(self, msg, name): - seq = str(msg.getSequenceNum()) - if seq not in self.msgs: - self.msgs[seq] = {} # Create directory for msgs - if "recognition" not in self.msgs[seq]: - self.msgs[seq]["recognition"] = [] # Create recognition array - - if name == "recognition": - # Append recognition msgs to an array - self.msgs[seq]["recognition"].append(msg) - # print(f'Added recognition seq {seq}, total len {len(self.msgs[seq]["recognition"])}') - - elif name == "detection": - # Save detection msg in the directory - self.msgs[seq][name] = msg - self.msgs[seq]["len"] = len(msg.detections) - # print(f'Added detection seq {seq}') - - elif name == "color": # color - # Save color frame in the directory - self.msgs[seq][name] = msg - # print(f'Added frame seq {seq}') - - - def get_msgs(self): - seq_remove = [] # Arr of sequence numbers to get deleted - - for seq, msgs in self.msgs.items(): - seq_remove.append(seq) # Will get removed from dict if we find synced msgs pair - - # Check if we have both detections and color frame with this sequence number - if "color" in msgs and "len" in msgs: - - # Check if all detected objects (faces) have finished recognition inference - if msgs["len"] == len(msgs["recognition"]): - # print(f"Synced msgs with sequence number {seq}", msgs) - - # We have synced msgs, remove previous msgs (memory cleaning) - for rm in seq_remove: - del self.msgs[rm] - - return msgs # Returned synced msgs - - return None # No synced msgs \ No newline at end of file diff --git a/gen2-head-posture-detection/main.py b/gen2-head-posture-detection/main.py index a1432aef0..b4b95b504 100644 --- a/gen2-head-posture-detection/main.py +++ b/gen2-head-posture-detection/main.py @@ -1,210 +1,57 @@ -from MultiMsgSync import TwoStageHostSeqSync -import blobconverter -import cv2 -import depthai as dai -from tools import * - -def create_pipeline(stereo): - pipeline = dai.Pipeline() - - print("Creating Color Camera...") - cam = pipeline.create(dai.node.ColorCamera) - cam.setPreviewSize(1080, 1080) - cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) - cam.setInterleaved(False) - cam.setBoardSocket(dai.CameraBoardSocket.RGB) - - cam_xout = pipeline.create(dai.node.XLinkOut) - cam_xout.setStreamName("color") - cam.preview.link(cam_xout.input) - - # Workaround: remove in 2.18, use `cam.setPreviewNumFramesPool(10)` - # This manip uses 15*3.5 MB => 52 MB of RAM. - copy_manip = pipeline.create(dai.node.ImageManip) - copy_manip.setNumFramesPool(15) - copy_manip.setMaxOutputFrameSize(3499200) - cam.preview.link(copy_manip.inputImage) - - # ImageManip will resize the frame before sending it to the Face detection NN node - face_det_manip = pipeline.create(dai.node.ImageManip) - face_det_manip.initialConfig.setResize(300, 300) - face_det_manip.initialConfig.setFrameType(dai.RawImgFrame.Type.RGB888p) - copy_manip.out.link(face_det_manip.inputImage) - - if stereo: - monoLeft = pipeline.create(dai.node.MonoCamera) - monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT) - - monoRight = pipeline.create(dai.node.MonoCamera) - monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT) - - stereo = pipeline.create(dai.node.StereoDepth) - stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) - stereo.setDepthAlign(dai.CameraBoardSocket.RGB) - monoLeft.out.link(stereo.left) - monoRight.out.link(stereo.right) - - # Spatial Detection network if OAK-D - print("OAK-D detected, app will display spatial coordiantes") - face_det_nn = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork) - face_det_nn.setBoundingBoxScaleFactor(0.8) - face_det_nn.setDepthLowerThreshold(100) - face_det_nn.setDepthUpperThreshold(5000) - stereo.depth.link(face_det_nn.inputDepth) - else: # Detection network if OAK-1 - print("OAK-1 detected, app won't display spatial coordiantes") - face_det_nn = pipeline.create(dai.node.MobileNetDetectionNetwork) - - face_det_nn.setConfidenceThreshold(0.5) - face_det_nn.setBlobPath(blobconverter.from_zoo(name="face-detection-retail-0004", shaves=6)) - face_det_manip.out.link(face_det_nn.input) - - # Send face detections to the host (for bounding boxes) - face_det_xout = pipeline.create(dai.node.XLinkOut) - face_det_xout.setStreamName("detection") - face_det_nn.out.link(face_det_xout.input) - - # Script node will take the output from the face detection NN as an input and set ImageManipConfig - # to the 'recognition_manip' to crop the initial frame - image_manip_script = pipeline.create(dai.node.Script) - face_det_nn.out.link(image_manip_script.inputs['face_det_in']) - - # Only send metadata, we are only interested in timestamp, so we can sync - # depth frames with NN output - face_det_nn.passthrough.link(image_manip_script.inputs['passthrough']) - copy_manip.out.link(image_manip_script.inputs['preview']) - - image_manip_script.setScript(""" - import time - msgs = dict() - - def add_msg(msg, name, seq = None): - global msgs - if seq is None: - seq = msg.getSequenceNum() - seq = str(seq) - # node.warn(f"New msg {name}, seq {seq}") - - # Each seq number has it's own dict of msgs - if seq not in msgs: - msgs[seq] = dict() - msgs[seq][name] = msg - - # To avoid freezing (not necessary for this ObjDet model) - if 15 < len(msgs): - node.warn(f"Removing first element! len {len(msgs)}") - msgs.popitem() # Remove first element - def get_msgs(): - global msgs - seq_remove = [] # Arr of sequence numbers to get deleted - for seq, syncMsgs in msgs.items(): - seq_remove.append(seq) # Will get removed from dict if we find synced msgs pair - # node.warn(f"Checking sync {seq}") - - # Check if we have both detections and color frame with this sequence number - if len(syncMsgs) == 2: # 1 frame, 1 detection - for rm in seq_remove: - del msgs[rm] - # node.warn(f"synced {seq}. Removed older sync values. len {len(msgs)}") - return syncMsgs # Returned synced msgs - return None - - def correct_bb(xmin,ymin,xmax,ymax): - if xmin < 0: xmin = 0.001 - if ymin < 0: ymin = 0.001 - if xmax > 1: xmax = 0.999 - if ymax > 1: ymax = 0.999 - return [xmin,ymin,xmax,ymax] - - while True: - time.sleep(0.001) # Avoid lazy looping - - preview = node.io['preview'].tryGet() - if preview is not None: - add_msg(preview, 'preview') - - face_dets = node.io['face_det_in'].tryGet() - if face_dets is not None: - # TODO: in 2.18.0.0 use face_dets.getSequenceNum() - passthrough = node.io['passthrough'].get() - seq = passthrough.getSequenceNum() - add_msg(face_dets, 'dets', seq) - - sync_msgs = get_msgs() - if sync_msgs is not None: - img = sync_msgs['preview'] - dets = sync_msgs['dets'] - for i, det in enumerate(dets.detections): - cfg = ImageManipConfig() - bb = correct_bb(det.xmin-0.03, det.ymin-0.03, det.xmax+0.03, det.ymax+0.03) - cfg.setCropRect(*bb) - # node.warn(f"Sending {i + 1}. det. Seq {seq}. Det {det.xmin}, {det.ymin}, {det.xmax}, {det.ymax}") - cfg.setResize(60, 60) - cfg.setKeepAspectRatio(False) - node.io['manip_cfg'].send(cfg) - node.io['manip_img'].send(img) - """) - recognition_manip = pipeline.create(dai.node.ImageManip) - recognition_manip.initialConfig.setResize(60, 60) - recognition_manip.setWaitForConfigInput(True) - image_manip_script.outputs['manip_cfg'].link(recognition_manip.inputConfig) - image_manip_script.outputs['manip_img'].link(recognition_manip.inputImage) - - # Second stange recognition NN - print("Creating recognition Neural Network...") - recognition_nn = pipeline.create(dai.node.NeuralNetwork) - recognition_nn.setBlobPath(blobconverter.from_zoo(name="head-pose-estimation-adas-0001", shaves=6)) - recognition_manip.out.link(recognition_nn.input) - - recognition_xout = pipeline.create(dai.node.XLinkOut) - recognition_xout.setStreamName("recognition") - recognition_nn.out.link(recognition_xout.input) - - return pipeline - -with dai.Device() as device: - stereo = 1 < len(device.getConnectedCameras()) - device.startPipeline(create_pipeline(stereo)) - - sync = TwoStageHostSeqSync() - queues = {} - # Create output queues - for name in ["color", "detection", "recognition"]: - queues[name] = device.getOutputQueue(name) - - while True: - for name, q in queues.items(): - # Add all msgs (color frames, object detections and recognitions) to the Sync class. - if q.has(): - sync.add_msg(q.get(), name) - - msgs = sync.get_msgs() - if msgs is not None: - frame = msgs["color"].getCvFrame() - detections = msgs["detection"].detections - recognitions = msgs["recognition"] - - for i, detection in enumerate(detections): - bbox = frame_norm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) - - # Decoding of recognition results - rec = recognitions[i] - yaw = rec.getLayerFp16('angle_y_fc')[0] - pitch = rec.getLayerFp16('angle_p_fc')[0] - roll = rec.getLayerFp16('angle_r_fc')[0] - decode_pose(yaw, pitch, roll, bbox, frame) - - cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (10, 245, 10), 2) - y = (bbox[1] + bbox[3]) // 2 - if stereo: - # You could also get detection.spatialCoordinates.x and detection.spatialCoordinates.y coordinates - coords = "Z: {:.2f} m".format(detection.spatialCoordinates.z/1000) - cv2.putText(frame, coords, (bbox[0], y + 60), cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 0, 0), 8) - cv2.putText(frame, coords, (bbox[0], y + 60), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 255), 2) +from depthai_sdk import OakCamera, TwoStagePacket, AspectRatioResizeMode, TextPosition +import numpy as np +import cv2 +MIN_THRESHOLD = 15. # Degrees in yaw/pitch/roll to be considered as head movement + +with OakCamera() as oak: + color = oak.create_camera('color') + det = oak.create_nn('face-detection-retail-0004', color) + # Passthrough is enabled for debugging purposes + # AspectRatioResizeMode has to be CROP for 2-stage pipelines at the moment + det.config_nn(aspect_ratio_resize_mode=AspectRatioResizeMode.CROP) + + emotion_nn = oak.create_nn('head-pose-estimation-adas-0001', input=det) + # emotion_nn.config_multistage_nn(show_cropped_frames=True) # For debugging + + def cb(packet: TwoStagePacket, visualizer): + for det, rec in zip(packet.detections, packet.nnData): + yaw = rec.getLayerFp16('angle_y_fc')[0] + pitch = rec.getLayerFp16('angle_p_fc')[0] + roll = rec.getLayerFp16('angle_r_fc')[0] + print("pitch:{:.0f}, yaw:{:.0f}, roll:{:.0f}".format(pitch,yaw,roll)) + + vals = np.array([abs(pitch),abs(yaw),abs(roll)]) + max_index = np.argmax(vals) + + if vals[max_index] < MIN_THRESHOLD: continue + """ + pitch > 0 Head down, < 0 look up + yaw > 0 Turn right < 0 Turn left + roll > 0 Tilt right, < 0 Tilt left + """ + if max_index == 0: + if pitch > 0: txt = "Look down" + else: txt = "Look up" + elif max_index == 1: + if yaw > 0: txt = "Turn right" + else: txt = "Turn left" + elif max_index == 2: + if roll > 0: txt = "Tilt right" + else: txt = "Tilt left" + + visualizer.add_text(txt, + bbox=(*det.top_left, *det.bottom_right), + position=TextPosition.BOTTOM_MID) + + visualizer.draw(packet.frame) + cv2.imshow(packet.name, packet.frame) + + + # Visualize detections on the frame. Also display FPS on the frame. Don't show the frame but send the packet + # to the callback function (where it will be displayed) + oak.visualize(emotion_nn, callback=cb, fps=True) + oak.visualize(det.out.passthrough) + # oak.show_graph() # Show pipeline graph, no need for now + oak.start(blocking=True) # This call will block until the app is stopped (by pressing 'Q' button) - cv2.imshow("Camera", frame) - if cv2.waitKey(1) == ord('q'): - break diff --git a/gen2-head-posture-detection/requirements.txt b/gen2-head-posture-detection/requirements.txt index aeb3b888f..8a92e20d7 100644 --- a/gen2-head-posture-detection/requirements.txt +++ b/gen2-head-posture-detection/requirements.txt @@ -1,4 +1 @@ -opencv-python==4.5.1.48 -blobconverter==1.3.0 -depthai==2.17.0.0 -numpy \ No newline at end of file +depthai-sdk==1.9.1 \ No newline at end of file diff --git a/gen2-head-posture-detection/sdk/main.py b/gen2-head-posture-detection/sdk/main.py new file mode 100644 index 000000000..89bd33e6f --- /dev/null +++ b/gen2-head-posture-detection/sdk/main.py @@ -0,0 +1,70 @@ +import cv2 +import numpy as np +from depthai import NNData + +from depthai_sdk import OakCamera, AspectRatioResizeMode, TextPosition + +MIN_THRESHOLD = 0.15 + + +def decode_recognition(nn_data: NNData): + yaw = nn_data.getLayerFp16('angle_y_fc')[0] + pitch = nn_data.getLayerFp16('angle_p_fc')[0] + roll = nn_data.getLayerFp16('angle_r_fc')[0] + + vals = np.array([abs(pitch), abs(yaw), abs(roll)]) + max_index = np.argmax(vals) + + if vals[max_index] < MIN_THRESHOLD: + return None + + txt = '' + if max_index == 0: + if pitch > 0: + txt = 'Look down' + else: + txt = 'Look up' + elif max_index == 1: + if yaw > 0: + txt = 'Turn right' + else: + txt = 'Turn left' + elif max_index == 2: + if roll > 0: + txt = 'Tilt right' + else: + txt = 'Tilt left' + + return txt, (yaw, pitch, roll) + + +def callback(packet, visualizer): + for det, rec in zip(packet.detections, packet.nnData): + bbox = (*det.top_left, *det.bottom_right) + + if rec is None: + continue + + txt, (yaw, pitch, roll) = rec + + visualizer.add_text('Pitch: {:.0f}\nYaw: {:.0f}\nRoll: {:.0f}'.format(pitch, yaw, roll), + bbox=bbox, position=TextPosition.BOTTOM_LEFT) + + visualizer.add_text(f'{txt}', bbox=bbox, position=TextPosition.TOP_LEFT) + + frame = visualizer.draw(packet.frame) + cv2.imshow('Head posture detection', frame) + + +with OakCamera() as oak: + camera = oak.create_camera('rgb', resolution='1080p', fps=30) + + face_nn = oak.create_nn('face-detection-retail-0004', camera) + face_nn.config_nn(aspect_ratio_resize_mode=AspectRatioResizeMode.CROP) + + recognition_nn = oak.create_nn('head-pose-estimation-adas-0001', face_nn, decode_fn=decode_recognition) + + visualizer = oak.visualize([recognition_nn.out.main], callback=callback, fps=True) + visualizer.detections(hide_label=True) + + oak.start(blocking=True) diff --git a/gen2-head-posture-detection/sdk/requirements.txt b/gen2-head-posture-detection/sdk/requirements.txt new file mode 100644 index 000000000..4fb36def5 --- /dev/null +++ b/gen2-head-posture-detection/sdk/requirements.txt @@ -0,0 +1,5 @@ +opencv-python +blobconverter +depthai +depthai-sdk +numpy \ No newline at end of file diff --git a/gen2-head-posture-detection/tools.py b/gen2-head-posture-detection/tools.py deleted file mode 100644 index 170950ebd..000000000 --- a/gen2-head-posture-detection/tools.py +++ /dev/null @@ -1,43 +0,0 @@ -import numpy as np -import cv2 - -MIN_THRESHOLD = 15. # Degrees in yaw/pitch/roll to be considered as head movement - -bg_color = (0, 0, 0) -color = (255, 255, 255) -text_type = cv2.FONT_HERSHEY_SIMPLEX -line_type = cv2.LINE_AA -def putText(frame, text, coords, size=0.6, bold=False): - mult = 2 if bold else 1 - cv2.putText(frame, text, coords, text_type, size, bg_color, 3*mult, line_type) - cv2.putText(frame, text, coords, text_type, size, color, 1*mult, line_type) - - -def frame_norm(frame, bbox): - normVals = np.full(len(bbox), frame.shape[0]) - normVals[::2] = frame.shape[1] - return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) - -def decode_pose(yaw, pitch, roll, face_bbox, frame): - """ - pitch > 0 Head down, < 0 look up - yaw > 0 Turn right < 0 Turn left - roll > 0 Tilt right, < 0 Tilt left - """ - putText(frame,"pitch:{:.0f}, yaw:{:.0f}, roll:{:.0f}".format(pitch,yaw,roll),(face_bbox[0]+10-15,face_bbox[1]-15)) - - vals = np.array([abs(pitch),abs(yaw),abs(roll)]) - max_index = np.argmax(vals) - - if vals[max_index] < MIN_THRESHOLD: return - - if max_index == 0: - if pitch > 0: txt = "Look down" - else: txt = "Look up" - elif max_index == 1: - if yaw > 0: txt = "Turn right" - else: txt = "Turn left" - elif max_index == 2: - if roll > 0: txt = "Tilt right" - else: txt = "Tilt left" - putText(frame,txt, (face_bbox[0]+10,face_bbox[1]+30), size=1, bold=True) \ No newline at end of file diff --git a/gen2-human-pose/sdk/main.py b/gen2-human-pose/sdk/main.py new file mode 100644 index 000000000..9abccadb4 --- /dev/null +++ b/gen2-human-pose/sdk/main.py @@ -0,0 +1,9 @@ +from depthai_sdk import OakCamera + +with OakCamera(replay='../input.mp4') as oak: + color = oak.create_camera('color') + + human_pose_nn = oak.create_nn('human-pose-estimation-0001', color) + + oak.visualize(human_pose_nn, fps=True) + oak.start(blocking=True) diff --git a/gen2-human-pose/sdk/requirements.txt b/gen2-human-pose/sdk/requirements.txt new file mode 100644 index 000000000..8d38dbba3 --- /dev/null +++ b/gen2-human-pose/sdk/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +depthai==2.16.0.0 +blobconverter==1.3.0 +depthai-sdk==1.0.1 \ No newline at end of file diff --git a/gen2-image-quality-assessment/sdk/main.py b/gen2-image-quality-assessment/sdk/main.py new file mode 100644 index 000000000..b185990d3 --- /dev/null +++ b/gen2-image-quality-assessment/sdk/main.py @@ -0,0 +1,83 @@ +import blobconverter +import cv2 +import numpy as np + +from depthai_sdk import OakCamera + +NN_WIDTH = 256 +NN_HEIGHT = 256 + +N_CLASS_FRAMES_THRESHOLD = 10 + + +def softmax(x): + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum(axis=0) + + +class ClassSmoothFilter: + def __init__(self, classes, n_frames_threshold=10): + self.classes = classes + self.n_frames_threshold = n_frames_threshold + self.classes_frame_counter = { + class_: 0 for class_ in classes.values() + } + self.current_class = None + + def init_classes_frame_counter(self): + self.classes_frame_counter = { + class_: 0 for class_ in classes.values() + } + + def update(self, classes_prob): + top_class_idx = np.argmax(classes_prob) + top_class = self.classes[top_class_idx] + + self.classes_frame_counter[top_class] += 1 + + if self.classes_frame_counter[top_class] >= self.n_frames_threshold: + self.current_class = top_class + self.init_classes_frame_counter() + + +classes = { + 0: "Clean", + 1: "Blur", + 2: "Occlusion", + 3: "Bright" +} +class_filter = ClassSmoothFilter( + classes=classes, + n_frames_threshold=N_CLASS_FRAMES_THRESHOLD +) + + +def callback(packet, visualizer): + frame = packet.frame + nn_data = packet.img_detections + + detected_classes = np.array(nn_data.getData()).view(np.float16) + classes_prob = softmax(detected_classes) + detected_class = np.argmax(classes_prob) + + class_filter.update(classes_prob=classes_prob) + label = class_filter.current_class + + cv2.putText(frame, label, (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 3) + cv2.putText(frame, " : ".join([class_name for class_name in classes.values()]), + (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2) + cv2.putText(frame, " : ".join([f"{class_prob:.2f}" for class_prob in classes_prob]), + (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2) + + cv2.imshow('Image Quality Assessment', frame) + + +with OakCamera() as oak: + color = oak.create_camera('color') + + nn_path = blobconverter.from_zoo(name='image_quality_assessment_256x256', shaves=6, zoo_type='depthai') + nn = oak.create_nn(nn_path, color) + nn.config_nn(aspect_ratio_resize_mode='crop') + + oak.callback(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-image-quality-assessment/sdk/requirements.txt b/gen2-image-quality-assessment/sdk/requirements.txt new file mode 100644 index 000000000..8e25ba7bc --- /dev/null +++ b/gen2-image-quality-assessment/sdk/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +blobconverter +depthai +depthai-sdk \ No newline at end of file diff --git a/gen2-lanenet/sdk/main.py b/gen2-lanenet/sdk/main.py new file mode 100644 index 000000000..ff2155d16 --- /dev/null +++ b/gen2-lanenet/sdk/main.py @@ -0,0 +1,95 @@ +import cv2 +import numpy as np +from sklearn.cluster import DBSCAN + +from depthai_sdk import OakCamera + +# resize input to smaller size for faster inference +NN_WIDTH, NN_HEIGHT = 512, 256 + +# set max clusters for color output (number of lines + a few more due to errors in fast postprocessing) +MAX_CLUSTERS = 6 + + +# perform postprocessing - clustering of line embeddings using DBSCAN +# note that this is not whole postprocessing, just a quick implementation to show what is possible +# if you want to use whole postprocessing please refer to the LaneNet paper: https://arxiv.org/abs/1802.05591 +def cluster_outputs(binary_seg_ret, instance_seg_ret): + # create mask from binary output + mask = binary_seg_ret.copy() + mask = mask.astype(bool) + + # mask out embeddings + embeddings = instance_seg_ret.copy() + embeddings = np.transpose(embeddings, (1, 0)) + embeddings_masked = embeddings[mask] + + # sort so same classes are sorted first each time and generate inverse sort + # works only if new lanes are added on the right side + idx = np.lexsort(np.transpose(embeddings_masked)[::-1]) + idx_inverse = np.empty_like(idx) + idx_inverse[idx] = np.arange(idx.size) + embeddings_masked = embeddings_masked[idx] + + # cluster embeddings with DBSCAN + clustering = DBSCAN(eps=0.4, min_samples=500, algorithm="kd_tree").fit(embeddings_masked) + + # unsort so pixels match their positions again + clustering_labels = clustering.labels_[idx_inverse] + + # create an array of masked clusters + clusters = np.zeros((NN_WIDTH * NN_HEIGHT,)) + clusters[mask] = clustering_labels + 1 + clusters = clusters.reshape((NN_HEIGHT, NN_WIDTH)) + + return clusters + + +# create overlay from cluster_outputs +def create_overlay(cluster_outputs): + output = np.array(cluster_outputs) * (255 / MAX_CLUSTERS) # multiply to get classes between 0 and 255 + output = output.astype(np.uint8) + output_colors = cv2.applyColorMap(output, cv2.COLORMAP_JET) + output_colors[output == 0] = [0, 0, 0] + return output_colors + + +# merge 2 frames together +def show_output(overlay, frame): + return cv2.addWeighted(frame, 1, overlay, 0.8, 0) + + +def to_planar(arr: np.ndarray, shape: tuple) -> np.ndarray: + return cv2.resize(arr, shape).transpose(2, 0, 1).flatten() + + +def callback(packet, visualizer): + nn_data = packet.img_detections + + # first layer is a binary segmentation mask of lanes + out1 = np.array(nn_data.getLayerInt32("LaneNet/bisenetv2_backend/binary_seg/ArgMax/Squeeze")) + out1 = out1.astype(np.uint8) + + # second layer is an array of embeddings of dimension 4 for each pixel in the input + out2 = np.array(nn_data.getLayerFp16("LaneNet/bisenetv2_backend/instance_seg/pix_embedding_conv/Conv2D")) + out2 = out2.reshape((4, NN_WIDTH * NN_HEIGHT)) + + # cluster outputs + clusters = cluster_outputs(out1, out2) + + overlay = create_overlay(clusters) + overlay = cv2.resize(overlay, (packet.frame.shape[1], packet.frame.shape[0])) + + # add overlay + combined_frame = show_output(overlay, packet.frame) + cv2.imshow("Lane prediction", combined_frame) + + +with OakCamera(replay='../vids/vid1.mp4') as oak: + color = oak.create_camera('color', resolution='1080p') + + nn = oak.create_nn('../models/lanenet_openvino_2021.4_6shave.blob', color) + nn.config_nn(aspect_ratio_resize_mode='stretch') + + oak.callback(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-lanenet/sdk/requirements.txt b/gen2-lanenet/sdk/requirements.txt new file mode 100644 index 000000000..5473fee45 --- /dev/null +++ b/gen2-lanenet/sdk/requirements.txt @@ -0,0 +1,6 @@ +opencv-python +depthai +depthai-sdk +gdown +numpy +scikit-learn \ No newline at end of file diff --git a/gen2-license-plate-recognition/sdk/main.py b/gen2-license-plate-recognition/sdk/main.py new file mode 100755 index 000000000..9430f232c --- /dev/null +++ b/gen2-license-plate-recognition/sdk/main.py @@ -0,0 +1,62 @@ +from collections import deque + +import blobconverter +import cv2 +import numpy as np +from depthai import NNData + +from depthai_sdk import OakCamera, AspectRatioResizeMode +from depthai_sdk.oak_outputs.normalize_bb import NormalizeBoundingBox + +items = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "A", "B", "C", "D", "E", "F", "G", "H", "I", + "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"] + + +def decode_recognition(nn_data: NNData): + rec_data = nn_data.getFirstLayerInt32() + last_idx = rec_data.index(-1) + decoded_text = ''.join([items[int(i)] for i in rec_data[:last_idx]]) + return decoded_text + + +plates = deque(maxlen=10) + + +def callback(packet, visualizer): + frame = packet.frame + + for det in packet.img_detections.detections: + bbox = det.xmin, det.ymin, det.xmax, det.ymax + bbox = NormalizeBoundingBox((300, 300), AspectRatioResizeMode.STRETCH).normalize(packet.frame, bbox) + plates.append(cv2.resize(frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])], (94, 24))) + cv2.rectangle(packet.frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2) + + try: + for nn_data in packet.nnData: + print(nn_data) + except: + pass + + if len(plates) > 0: + cv2.imshow('Plates', np.vstack(plates)) + + cv2.imshow('Detections', packet.frame) + + +with OakCamera(replay='../chinese_traffic.mp4') as oak: + color = oak.create_camera('color', resolution='1080p', fps=30) + + det_nn_path = blobconverter.from_zoo(name='vehicle-license-plate-detection-barrier-0106', shaves=7, version='2021.4') + det = oak.create_nn(det_nn_path, color, nn_type='mobilenet') + det.config_nn(aspect_ratio_resize_mode=AspectRatioResizeMode.STRETCH) + + lp_rec_path = blobconverter.from_zoo(name='license-plate-recognition-barrier-0007', shaves=7, version='2021.4') + lp_rec = oak.create_nn(lp_rec_path, det, decode_fn=decode_recognition) + lp_rec.config_multistage_nn(labels=[2]) # Enable only license plates + + oak.visualize([lp_rec.out.main], callback=callback, fps=True) + oak.start(blocking=True) diff --git a/gen2-license-plate-recognition/sdk/requirements.txt b/gen2-license-plate-recognition/sdk/requirements.txt new file mode 100644 index 000000000..66f40f96b --- /dev/null +++ b/gen2-license-plate-recognition/sdk/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +depthai +blobconverter +depthai-sdk diff --git a/gen2-lossless-zooming/sdk/main.py b/gen2-lossless-zooming/sdk/main.py new file mode 100644 index 000000000..7d7923f5b --- /dev/null +++ b/gen2-lossless-zooming/sdk/main.py @@ -0,0 +1,67 @@ +import cv2 +import numpy as np + +from depthai_sdk import OakCamera + +# Constants +ORIGINAL_SIZE = (3840, 2160) # 4K +SCENE_SIZE = (1920, 1080) # 1080P +AVG_MAX_NUM = 7 + +x_arr = [] +y_arr = [] + +limits = [SCENE_SIZE[0] // 2, SCENE_SIZE[1] // 2] # xmin and ymin limits +limits.append(ORIGINAL_SIZE[0] - limits[0]) # xmax limit +limits.append(ORIGINAL_SIZE[1] - limits[1]) # ymax limit + + +def average_filter(x, y): + x_arr.append(x) + y_arr.append(y) + if AVG_MAX_NUM < len(x_arr): x_arr.pop(0) + if AVG_MAX_NUM < len(y_arr): y_arr.pop(0) + x_avg = 0 + y_avg = 0 + for i in range(len(x_arr)): + x_avg += x_arr[i] + y_avg += y_arr[i] + x_avg = x_avg / len(x_arr) + y_avg = y_avg / len(y_arr) + if x_avg < limits[0]: x_avg = limits[0] + if y_avg < limits[1]: y_avg = limits[1] + if limits[2] < x_avg: x_avg = limits[2] + if limits[3] < y_avg: y_avg = limits[3] + return x_avg, y_avg + + +def callback(packet, visualizer): + if len(packet.img_detections.detections) == 0: + return + + coords = packet.img_detections.detections[0] + + # Get detection center + x = (coords.xmin + coords.xmax) / 2 * ORIGINAL_SIZE[0] + y = (coords.ymin + coords.ymax) / 2 * ORIGINAL_SIZE[1] + 100 + + x_avg, y_avg = average_filter(x, y) + + bbox = [x_avg - SCENE_SIZE[0] // 2, y_avg - SCENE_SIZE[1] // 2, + x_avg + SCENE_SIZE[0] // 2, y_avg + SCENE_SIZE[1] // 2] + bbox = np.int0(bbox) + + roi_frame = packet.frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] + cv2.imshow('Lossless zooming', roi_frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='4K') + color.config_color_camera(interleaved=False) + + nn = oak.create_nn('face-detection-retail-0004', color) + nn.config_nn(conf_threshold=0.7) + nn.config_nn(aspect_ratio_resize_mode='stretch') + + oak.callback(nn.out.main, callback=callback) + oak.start(blocking=True) diff --git a/gen2-lossless-zooming/sdk/requirements.txt b/gen2-lossless-zooming/sdk/requirements.txt new file mode 100644 index 000000000..8e25ba7bc --- /dev/null +++ b/gen2-lossless-zooming/sdk/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +blobconverter +depthai +depthai-sdk \ No newline at end of file diff --git a/gen2-mask-detection/main.py b/gen2-mask-detection/main.py index 89a978c4e..598571936 100644 --- a/gen2-mask-detection/main.py +++ b/gen2-mask-detection/main.py @@ -1,225 +1,41 @@ -from MultiMsgSync import TwoStageHostSeqSync -import blobconverter -import cv2 -import depthai as dai +from depthai_sdk import OakCamera, TwoStagePacket, AspectRatioResizeMode, Visualizer, TextPosition, BboxStyle import numpy as np +import cv2 def log_softmax(x): e_x = np.exp(x - np.max(x)) return np.log(e_x / e_x.sum()) -def frame_norm(frame, bbox): - normVals = np.full(len(bbox), frame.shape[0]) - normVals[::2] = frame.shape[1] - return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) - -def create_pipeline(stereo): - pipeline = dai.Pipeline() - - print("Creating Color Camera...") - cam = pipeline.create(dai.node.ColorCamera) - cam.setPreviewSize(1080, 1080) - cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) - cam.setInterleaved(False) - cam.setBoardSocket(dai.CameraBoardSocket.RGB) - - cam_xout = pipeline.create(dai.node.XLinkOut) - cam_xout.setStreamName("color") - cam.preview.link(cam_xout.input) - - # Workaround: remove in 2.18, use `cam.setPreviewNumFramesPool(10)` - # This manip uses 15*3.5 MB => 52 MB of RAM. - copy_manip = pipeline.create(dai.node.ImageManip) - copy_manip.setNumFramesPool(15) - copy_manip.setMaxOutputFrameSize(3499200) - cam.preview.link(copy_manip.inputImage) - - # ImageManip will resize the frame before sending it to the Face detection NN node - face_det_manip = pipeline.create(dai.node.ImageManip) - face_det_manip.initialConfig.setResize(300, 300) - face_det_manip.initialConfig.setFrameType(dai.RawImgFrame.Type.RGB888p) - copy_manip.out.link(face_det_manip.inputImage) - - if stereo: - monoLeft = pipeline.create(dai.node.MonoCamera) - monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT) - - monoRight = pipeline.create(dai.node.MonoCamera) - monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT) - - stereo = pipeline.create(dai.node.StereoDepth) - stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) - stereo.setDepthAlign(dai.CameraBoardSocket.RGB) - monoLeft.out.link(stereo.left) - monoRight.out.link(stereo.right) - - # Spatial Detection network if OAK-D - print("OAK-D detected, app will display spatial coordiantes") - face_det_nn = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork) - face_det_nn.setBoundingBoxScaleFactor(0.8) - face_det_nn.setDepthLowerThreshold(100) - face_det_nn.setDepthUpperThreshold(5000) - stereo.depth.link(face_det_nn.inputDepth) - else: # Detection network if OAK-1 - print("OAK-1 detected, app won't display spatial coordiantes") - face_det_nn = pipeline.create(dai.node.MobileNetDetectionNetwork) - - face_det_nn.setConfidenceThreshold(0.5) - face_det_nn.setBlobPath(blobconverter.from_zoo(name="face-detection-retail-0004", shaves=6)) - face_det_manip.out.link(face_det_nn.input) - - # Send face detections to the host (for bounding boxes) - face_det_xout = pipeline.create(dai.node.XLinkOut) - face_det_xout.setStreamName("detection") - face_det_nn.out.link(face_det_xout.input) - - # Script node will take the output from the face detection NN as an input and set ImageManipConfig - # to the 'recognition_manip' to crop the initial frame - image_manip_script = pipeline.create(dai.node.Script) - face_det_nn.out.link(image_manip_script.inputs['face_det_in']) - - # Only send metadata, we are only interested in timestamp, so we can sync - # depth frames with NN output - face_det_nn.passthrough.link(image_manip_script.inputs['passthrough']) - - copy_manip.out.link(image_manip_script.inputs['preview']) - - image_manip_script.setScript(""" - import time - msgs = dict() - - def add_msg(msg, name, seq = None): - global msgs - if seq is None: - seq = msg.getSequenceNum() - seq = str(seq) - # node.warn(f"New msg {name}, seq {seq}") - - # Each seq number has it's own dict of msgs - if seq not in msgs: - msgs[seq] = dict() - msgs[seq][name] = msg - - # To avoid freezing (not necessary for this ObjDet model) - if 15 < len(msgs): - node.warn(f"Removing first element! len {len(msgs)}") - msgs.popitem() # Remove first element - - def get_msgs(): - global msgs - seq_remove = [] # Arr of sequence numbers to get deleted - for seq, syncMsgs in msgs.items(): - seq_remove.append(seq) # Will get removed from dict if we find synced msgs pair - # node.warn(f"Checking sync {seq}") - - # Check if we have both detections and color frame with this sequence number - if len(syncMsgs) == 2: # 1 frame, 1 detection - for rm in seq_remove: - del msgs[rm] - # node.warn(f"synced {seq}. Removed older sync values. len {len(msgs)}") - return syncMsgs # Returned synced msgs - return None - - def correct_bb(bb): - if bb.xmin < 0: bb.xmin = 0.001 - if bb.ymin < 0: bb.ymin = 0.001 - if bb.xmax > 1: bb.xmax = 0.999 - if bb.ymax > 1: bb.ymax = 0.999 - return bb - - while True: - time.sleep(0.001) # Avoid lazy looping - - preview = node.io['preview'].tryGet() - if preview is not None: - add_msg(preview, 'preview') - - face_dets = node.io['face_det_in'].tryGet() - if face_dets is not None: - # TODO: in 2.18.0.0 use face_dets.getSequenceNum() - passthrough = node.io['passthrough'].get() - seq = passthrough.getSequenceNum() - add_msg(face_dets, 'dets', seq) - - sync_msgs = get_msgs() - if sync_msgs is not None: - img = sync_msgs['preview'] - dets = sync_msgs['dets'] - for i, det in enumerate(dets.detections): - cfg = ImageManipConfig() - correct_bb(det) - cfg.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax) - # node.warn(f"Sending {i + 1}. det. Seq {seq}. Det {det.xmin}, {det.ymin}, {det.xmax}, {det.ymax}") - cfg.setResize(224, 224) - cfg.setKeepAspectRatio(False) - node.io['manip_cfg'].send(cfg) - node.io['manip_img'].send(img) - """) - - recognition_manip = pipeline.create(dai.node.ImageManip) - recognition_manip.initialConfig.setResize(224, 224) - recognition_manip.setWaitForConfigInput(True) - image_manip_script.outputs['manip_cfg'].link(recognition_manip.inputConfig) - image_manip_script.outputs['manip_img'].link(recognition_manip.inputImage) - - # Second stange recognition NN - print("Creating recognition Neural Network...") - recognition_nn = pipeline.create(dai.node.NeuralNetwork) - recognition_nn.setBlobPath(blobconverter.from_zoo(name="sbd_mask_classification_224x224", zoo_type="depthai", shaves=6)) - recognition_manip.out.link(recognition_nn.input) - - recognition_xout = pipeline.create(dai.node.XLinkOut) - recognition_xout.setStreamName("recognition") - recognition_nn.out.link(recognition_xout.input) - - return pipeline - -with dai.Device() as device: - stereo = 1 < len(device.getConnectedCameras()) - device.startPipeline(create_pipeline(stereo)) +with OakCamera() as oak: + color = oak.create_camera('color') - sync = TwoStageHostSeqSync() - queues = {} - # Create output queues - for name in ["color", "detection", "recognition"]: - queues[name] = device.getOutputQueue(name) + det = oak.create_nn('face-detection-retail-0004', color) + # AspectRatioResizeMode has to be CROP for 2-stage pipelines at the moment + det.config_nn(aspect_ratio_resize_mode=AspectRatioResizeMode.CROP) - while True: - for name, q in queues.items(): - # Add all msgs (color frames, object detections and recognitions) to the Sync class. - if q.has(): - sync.add_msg(q.get(), name) + mask = oak.create_nn('sbd_mask_classification_224x224', input=det) + # mask.config_multistage_nn(show_cropped_frames=True) # For debugging - msgs = sync.get_msgs() - if msgs is not None: - frame = msgs["color"].getCvFrame() - detections = msgs["detection"].detections - recognitions = msgs["recognition"] + def cb(packet: TwoStagePacket, visualizer: Visualizer): + for det, rec in zip(packet.detections, packet.nnData): + index = np.argmax(log_softmax(rec.getFirstLayerFp16())) + text = "No Mask" + color = (0,0,255) # Red + if index == 1: + text = "Mask" + color = (0,255,0) - for i, detection in enumerate(detections): - bbox = frame_norm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) + visualizer.add_text(text, + bbox=(*det.top_left, *det.bottom_right), + position=TextPosition.BOTTOM_MID) - # Decoding of recognition results - rec = recognitions[i].getFirstLayerFp16() - index = np.argmax(log_softmax(rec)) - text = "No Mask" - color = (0,0,255) # Red - if index == 1: - text = "Mask" - color = (0,255,0) + frame = visualizer.draw(packet.frame) + cv2.imshow('Mask detection', frame) - cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 3) - y = (bbox[1] + bbox[3]) // 2 - cv2.putText(frame, text, (bbox[0], y), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (0, 0, 0), 8) - cv2.putText(frame, text, (bbox[0], y), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (255, 255, 255), 2) - if stereo: - # You could also get detection.spatialCoordinates.x and detection.spatialCoordinates.y coordinates - coords = "Z: {:.2f} m".format(detection.spatialCoordinates.z/1000) - cv2.putText(frame, coords, (bbox[0], y + 60), cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 0, 0), 8) - cv2.putText(frame, coords, (bbox[0], y + 60), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 255), 2) + # Visualize detections on the frame. Don't show the frame but send the packet + # to the callback function (where it will be displayed) + oak.visualize(mask, callback=cb).detections(fill_transparency=0.1) + oak.visualize(det.out.passthrough) - cv2.imshow("Camera", frame) - if cv2.waitKey(1) == ord('q'): - break + # oak.show_graph() # Show pipeline graph, no need for now + oak.start(blocking=True) # This call will block until the app is stopped (by pressing 'Q' button) diff --git a/gen2-mask-detection/requirements.txt b/gen2-mask-detection/requirements.txt index 708ec656f..9d0665d44 100644 --- a/gen2-mask-detection/requirements.txt +++ b/gen2-mask-detection/requirements.txt @@ -1,4 +1,4 @@ opencv-python -blobconverter==1.3.0 -depthai==2.17.0.0 +blobconverter +depthai-sdk numpy \ No newline at end of file diff --git a/gen2-mask-detection/sdk/main.py b/gen2-mask-detection/sdk/main.py new file mode 100644 index 000000000..963322097 --- /dev/null +++ b/gen2-mask-detection/sdk/main.py @@ -0,0 +1,29 @@ +import cv2 +import numpy as np + +from depthai_sdk import OakCamera, AspectRatioResizeMode, TwoStagePacket, Visualizer, TextPosition + + +def callback(packet: TwoStagePacket, visualizer: Visualizer): + for det, rec in zip(packet.detections, packet.nnData): + has_mask = np.argmax(rec.getFirstLayerFp16()) + mask_str = "Mask" if has_mask else "No mask" + + visualizer.add_text(f'{mask_str}', + bbox=(*det.top_left, *det.bottom_right), + position=TextPosition.BOTTOM_RIGHT) + + frame = visualizer.draw(packet.frame) + cv2.imshow('Mask detection', frame) + + +with OakCamera() as oak: + camera = oak.create_camera('rgb', resolution='1080p', fps=30) + + face_nn = oak.create_nn('face-detection-retail-0004', camera) + face_nn.config_nn(aspect_ratio_resize_mode=AspectRatioResizeMode.CROP) + + recognition_nn = oak.create_nn('../models/sbd_mask_classification_224x224.blob', face_nn) + + oak.visualize([recognition_nn.out.main], callback=callback, fps=True) + oak.start(blocking=True) diff --git a/gen2-mask-detection/sdk/requirements.txt b/gen2-mask-detection/sdk/requirements.txt new file mode 100644 index 000000000..708ec656f --- /dev/null +++ b/gen2-mask-detection/sdk/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +blobconverter==1.3.0 +depthai==2.17.0.0 +numpy \ No newline at end of file diff --git a/gen2-maskrcnn-resnet50/requirements.txt b/gen2-maskrcnn-resnet50/requirements.txt index bc488b0ad..bb652a32f 100644 --- a/gen2-maskrcnn-resnet50/requirements.txt +++ b/gen2-maskrcnn-resnet50/requirements.txt @@ -1,4 +1,5 @@ opencv-python numpy -depthai==2.16.0.0 -blobconverter>=1.2.9 \ No newline at end of file +depthai +depthai-sdk +blobconverter \ No newline at end of file diff --git a/gen2-maskrcnn-resnet50/sdk/main.py b/gen2-maskrcnn-resnet50/sdk/main.py new file mode 100644 index 000000000..194c40eed --- /dev/null +++ b/gen2-maskrcnn-resnet50/sdk/main.py @@ -0,0 +1,102 @@ +import blobconverter +import cv2 +import depthai as dai +import numpy as np + +from depthai_sdk import OakCamera, AspectRatioResizeMode +from depthai_sdk.oak_outputs.normalize_bb import NormalizeBoundingBox + +NN_SHAPE = 300, 300 +COLORS = np.random.random(size=(256, 3)) * 256 +THRESHOLD = 0.5 +REGION_THRESHOLD = 0.5 +LABEL_MAP = [ + "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", + "truck", "boat", "traffic light", "fire hydrant", "street sign", "stop sign", "parking meter", + "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "hat", "backpack", "umbrella", + "shoe", "eye glasses", "handbag", "tie", "suitcase", "frisbee", "skis", + "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "plate", "wine glass", "cup", "fork", "knife", + "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", + "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", + "pottedplant", "bed", "mirror", "diningtable", "window", "desk", "toilet", + "door", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "blender", "book", + "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" +] + + +def to_tensor(packet): + """ + Converts NN packet to dict, with each key being output tensor name and each value being correctly reshaped and converted results array + + Useful as a first step of processing NN results for custom neural networks + + Args: + packet (depthai.NNData): Packet returned from NN node + + Returns: + dict: Dict containing prepared output tensors + """ + data = {} + for tensor in packet.getRaw().tensors: + if tensor.dataType == dai.TensorInfo.DataType.INT: + data[tensor.name] = np.array(packet.getLayerInt32(tensor.name)).reshape(tensor.dims) + elif tensor.dataType == dai.TensorInfo.DataType.FP16: + data[tensor.name] = np.array(packet.getLayerFp16(tensor.name)).reshape(tensor.dims) + elif tensor.dataType == dai.TensorInfo.DataType.I8: + data[tensor.name] = np.array(packet.getLayerUInt8(tensor.name)).reshape(tensor.dims) + else: + print("Unsupported tensor layer type: {}".format(tensor.dataType)) + return data + + +def callback(packet, visualizer): + frame = packet.frame + nn_data = packet.img_detections + + tensors = to_tensor(nn_data) + + boxes = np.squeeze(tensors["DetectionOutput_647"]) + masks = tensors["Sigmoid_733"] + for i, box in enumerate(boxes): + if box[0] == -1: + break + + cls = int(box[1]) + prob = box[2] + + if prob < THRESHOLD: + continue + + bbox = NormalizeBoundingBox(NN_SHAPE, ar_resize_mode=AspectRatioResizeMode.LETTERBOX).normalize(frame, box[-4:]) + cv2.rectangle(frame, (bbox[0], bbox[1] - 15), (bbox[2], bbox[1]), COLORS[cls], -1) + cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), COLORS[cls], 1) + cv2.putText(frame, f"{LABEL_MAP[cls - 1]}: {prob:.2f}", (bbox[0] + 5, bbox[1] - 5), + cv2.FONT_HERSHEY_DUPLEX, 0.3, (0, 0, 0), 2) + cv2.putText(frame, f"{LABEL_MAP[cls - 1]}: {prob:.2f}", (bbox[0] + 5, bbox[1] - 5), + cv2.FONT_HERSHEY_DUPLEX, 0.3, (255, 255, 255), 1) + + bbox_w = bbox[2] - bbox[0] + bbox_h = bbox[3] - bbox[1] + + mask = cv2.resize(masks[i, cls], (bbox_w, bbox_h)) + mask = mask > REGION_THRESHOLD + + roi = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] + roi[mask] = roi[mask] * 0.6 + COLORS[cls] * 0.4 + frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] = roi + + cv2.imshow('Mask-RCNN ResNet-50', frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + color.config_color_camera(color_order=dai.ColorCameraProperties.ColorOrder.RGB) + + nn_path = blobconverter.from_zoo("mask_rcnn_resnet50_coco_300x300", shaves=6, zoo_type="depthai") + nn = oak.create_nn(nn_path, color) + + oak.callback(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-maskrcnn-resnet50/sdk/requirements.txt b/gen2-maskrcnn-resnet50/sdk/requirements.txt new file mode 100644 index 000000000..bb652a32f --- /dev/null +++ b/gen2-maskrcnn-resnet50/sdk/requirements.txt @@ -0,0 +1,5 @@ +opencv-python +numpy +depthai +depthai-sdk +blobconverter \ No newline at end of file diff --git a/gen2-mega-depth/sdk/main.py b/gen2-mega-depth/sdk/main.py new file mode 100644 index 000000000..425243a89 --- /dev/null +++ b/gen2-mega-depth/sdk/main.py @@ -0,0 +1,42 @@ +import cv2 +import numpy as np + +from depthai_sdk import OakCamera, AspectRatioResizeMode + +NN_WIDTH, NN_HEIGHT = 256, 192 + + +def callback(packet, visualizer): + nn_data = packet.img_detections + + pred = np.array(nn_data.getFirstLayerFp16()).reshape((NN_HEIGHT, NN_WIDTH)) + + # Scale depth to get relative depth + d_min = np.min(pred) + d_max = np.max(pred) + depth_relative = (pred - d_min) / (d_max - d_min) + + # Color it + depth_relative = np.array(depth_relative) * 255 + depth_relative = depth_relative.astype(np.uint8) + depth_relative = 255 - depth_relative + depth_relative = cv2.applyColorMap(depth_relative, cv2.COLORMAP_INFERNO) + + target_width = int(1920) + target_height = int(NN_HEIGHT * (1920 / NN_HEIGHT) / (16 / 9)) + + frame = cv2.resize(packet.frame, (target_width, target_height)) + depth_relative = cv2.resize(depth_relative, (target_width, target_height)) + + cv2.imshow('Mega-depth', cv2.hconcat([frame, depth_relative])) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + + nn_path = '../models/megadepth_192x256_openvino_2021.4_6shave.blob' + nn = oak.create_nn(nn_path, color) + nn.config_nn(aspect_ratio_resize_mode=AspectRatioResizeMode.STRETCH) + + oak.callback(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-mega-depth/sdk/requirements.txt b/gen2-mega-depth/sdk/requirements.txt new file mode 100644 index 000000000..2ddc725bd --- /dev/null +++ b/gen2-mega-depth/sdk/requirements.txt @@ -0,0 +1,4 @@ +opencv-python~=4.5.1.48 +depthai +depthai-sdk +numpy~=1.19.5 \ No newline at end of file diff --git a/gen2-mjpeg-streaming/sdk/main.py b/gen2-mjpeg-streaming/sdk/main.py new file mode 100644 index 000000000..e7020dd4f --- /dev/null +++ b/gen2-mjpeg-streaming/sdk/main.py @@ -0,0 +1,89 @@ +import socketserver +import threading +from http.server import BaseHTTPRequestHandler, HTTPServer +from io import BytesIO +from socketserver import ThreadingMixIn +from time import sleep + +import cv2 +from PIL import Image + +from depthai_sdk import OakCamera + +HTTP_SERVER_PORT = 8090 + + +class TCPServerRequest(socketserver.BaseRequestHandler): + def handle(self): + # Handle is called each time a client is connected + # When OpenDataCam connects, do not return - instead keep the connection open and keep streaming data + # First send HTTP header + header = 'HTTP/1.0 200 OK\r\nServer: Mozarella/2.2\r\nAccept-Range: bytes\r\nConnection: close\r\nMax-Age: 0\r\nExpires: 0\r\nCache-Control: no-cache, private\r\nPragma: no-cache\r\nContent-Type: application/json\r\n\r\n' + self.request.send(header.encode()) + while True: + sleep(0.1) + if hasattr(self.server, 'datatosend'): + self.request.send(self.server.datatosend.encode() + "\r\n".encode()) + + +# HTTPServer MJPEG +class VideoStreamHandler(BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200) + self.send_header('Content-type', 'multipart/x-mixed-replace; boundary=--jpgboundary') + self.end_headers() + while True: + sleep(0.1) + if hasattr(self.server, 'frametosend'): + image = Image.fromarray(cv2.cvtColor(self.server.frametosend, cv2.COLOR_BGR2RGB)) + stream_file = BytesIO() + image.save(stream_file, 'JPEG') + self.wfile.write("--jpgboundary".encode()) + + self.send_header('Content-type', 'image/jpeg') + self.send_header('Content-length', str(stream_file.getbuffer().nbytes)) + self.end_headers() + image.save(self.wfile, 'JPEG') + + +class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): + """Handle requests in a separate thread.""" + pass + + +# start TCP data server +server_TCP = socketserver.TCPServer(('localhost', 8070), TCPServerRequest) +th = threading.Thread(target=server_TCP.serve_forever) +th.daemon = True +th.start() + +# start MJPEG HTTP Server +server_HTTP = ThreadedHTTPServer(('localhost', HTTP_SERVER_PORT), VideoStreamHandler) +th2 = threading.Thread(target=server_HTTP.serve_forever) +th2.daemon = True +th2.start() + +# MobilenetSSD label texts +labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", + "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + + +def callback(packet, visualizer): + frame = packet.frame + + for detection in packet.img_detections.detections: + server_TCP.datatosend = str(detection.label) + + visualizer.draw(frame) + server_HTTP.frametosend = frame + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p', encode='mjpeg') + nn = oak.create_nn('mobilenet-ssd', color) + + print(f"DepthAI SDK is up & running." + f"Navigate to http://localhost:{HTTP_SERVER_PORT} with Chrome to see the mjpeg stream") + + oak.visualize(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-mjpeg-streaming/sdk/requirements.txt b/gen2-mjpeg-streaming/sdk/requirements.txt new file mode 100644 index 000000000..7cbf5c88f --- /dev/null +++ b/gen2-mjpeg-streaming/sdk/requirements.txt @@ -0,0 +1,5 @@ +Pillow +opencv-python +depthai +depthai-sdk +blobconverter \ No newline at end of file diff --git a/gen2-mobile-object-localizer/sdk/main.py b/gen2-mobile-object-localizer/sdk/main.py new file mode 100644 index 000000000..bb11fedb8 --- /dev/null +++ b/gen2-mobile-object-localizer/sdk/main.py @@ -0,0 +1,59 @@ +import blobconverter +import cv2 +import numpy as np + +from depthai_sdk import OakCamera, DetectionPacket, Visualizer + +THRESHOLD = 0.2 +NN_WIDTH = 192 +NN_HEIGHT = 192 +PREVIEW_WIDTH = 640 +PREVIEW_HEIGHT = 360 + +colors_full = np.random.randint(255, size=(100, 3), dtype=int) + + +def plot_boxes(frame, boxes, colors, scores): + color_black = (0, 0, 0) + for i in range(boxes.shape[0]): + box = boxes[i] + y1 = (frame.shape[0] * box[0]).astype(int) + y2 = (frame.shape[0] * box[2]).astype(int) + x1 = (frame.shape[1] * box[1]).astype(int) + x2 = (frame.shape[1] * box[3]).astype(int) + color = (int(colors[i, 0]), int(colors[i, 1]), int(colors[i, 2])) + + cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) + cv2.rectangle(frame, (x1, y1), (x1 + 50, y1 + 15), color, -1) + cv2.putText(frame, f"{scores[i]:.2f}", (x1 + 10, y1 + 10), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color_black) + + +def callback(packet: DetectionPacket, visualizer: Visualizer): + frame = packet.frame + nn_data = packet.img_detections + + # get outputs + detection_boxes = np.array(nn_data.getLayerFp16("ExpandDims")).reshape((100, 4)) + detection_scores = np.array(nn_data.getLayerFp16("ExpandDims_2")).reshape((100,)) + + # keep boxes bigger than threshold + mask = detection_scores >= THRESHOLD + boxes = detection_boxes[mask] + colors = colors_full[mask] + scores = detection_scores[mask] + + # draw boxes + plot_boxes(frame, boxes, colors, scores) + + cv2.imshow("Localizer", frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + + nn_path = blobconverter.from_zoo(name="mobile_object_localizer_192x192", zoo_type="depthai", shaves=6) + nn = oak.create_nn(nn_path, color) + nn.config_nn(aspect_ratio_resize_mode='stretch') + + oak.callback(nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-mobile-object-localizer/sdk/requirements.txt b/gen2-mobile-object-localizer/sdk/requirements.txt new file mode 100644 index 000000000..63880bef2 --- /dev/null +++ b/gen2-mobile-object-localizer/sdk/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +depthai==2.16.0.0 +numpy +blobconverter==1.2.9 \ No newline at end of file diff --git a/gen2-multiple-devices/multi-cam-calibration/.gitignore b/gen2-multiple-devices/multi-cam-calibration/.gitignore deleted file mode 100644 index 0846a02e5..000000000 --- a/gen2-multiple-devices/multi-cam-calibration/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -calibration_data/* -test.ipynb -__pycache__ \ No newline at end of file diff --git a/gen2-multiple-devices/multi-cam-calibration/README.md b/gen2-multiple-devices/multi-cam-calibration/README.md deleted file mode 100644 index a410c18fb..000000000 --- a/gen2-multiple-devices/multi-cam-calibration/README.md +++ /dev/null @@ -1,67 +0,0 @@ -# Multi camera calibration -This example demonstrates how to compute extrinsic parameters (pose of the camera) for multiple cameras. - - -## Controls -| key | action -| :--- | :--- | -| `1` ... `9` | select camera | -| `q` | quit | -| `p` | compute the pose of the selected camera and save the results | - -## Configuration -Properties of the checkerboard used for calibration and the calibration data directory can be changed in the [`config.py`](config.py). - -## Usage -Run the [`main.py`](main.py) with Python 3. - -__Measure the pose of the camera__ \ -Select the camera. Press the `p` key to estimate the pose of the camera. An overlay showing the coordinate system will appear. To dismiss the overlay press any key. The pose of the camera will be saved to a file (`calibration_data/extrinsics_` by default). \ -_Repeat for every camera_. - -![pose estimation](img/pose.png) - -## How it works -Open CV's [`findChessboardCorners`](https://docs.opencv.org/4.x/d9/d0c/group__calib3d.html#ga93efa9b0aa890de240ca32b11253dd4a) is used to find the checkerboard and [`solvePnP`](https://docs.opencv.org/4.x/d9/d0c/group__calib3d.html#ga549c2075fac14829ff4a58bc931c033d) is used to compute the translation and rotation from the camera to the checkerboard. - - - -## Interpret the results - -The results can be read with Numpy's load function: -```python -import numpy as np -extrinsics = np.load("calibration_data/extrinsics_19443010F1CCF41200.npz") -print("World to cam: \n", extrinsics["world_to_cam"]) -print("Cam to world: \n", extrinsics["cam_to_world"]) -print("Rotation vector: \n", extrinsics["rot_vec"]) -print("Translation vector: \n", extrinsics["trans_vec"]) -``` - -To transform a point from the camera coordinate system to the world coordinate system or vise versa simply multiply a point with the appropriate transformation matrix. -```python -p_cam = np.array([x,y,z,1]) -p_world = data["cam_to_world"] @ p_cam -``` - -To get the position of the camera in the world coordinate system transform all zero vector to wrold space: -```python -camera_pos = data["cam_to_world"] @ np.array([0,0,0,1]) -``` - -> Notice how the points have an extra coordinate. This is called a homogeneous component and enables translation as well as rotation with a single transformation matrix. - -### Reproject points to the image -To reproject points written in the world coordinate system to the camera image we need the intrinsic matrix in adition to previously described extrinsic parameters. To get it use: -```python -calibration = dai.Device().readCalibration() -intrinsic_mat = calibration.getCameraIntrinsics(dai.CameraBoardSocket.RGB, width, height) -``` -where `width` and `height` represent the image width and height in pixels. -```python -points, _ = cv2.projectPoints( - np.float64([[x1, y1, z1], [x2, y2, z2], [x3, y3, z3], [x4, y4, z4], ...]), - extrinsics["rot_vec"], extrinsics["trans_vec"], intrinsic_mat, None -) -``` -resulting `points` contains postions in pixels. \ No newline at end of file diff --git a/gen2-multiple-devices/multi-cam-calibration/camera.py b/gen2-multiple-devices/multi-cam-calibration/camera.py deleted file mode 100644 index beea30cb8..000000000 --- a/gen2-multiple-devices/multi-cam-calibration/camera.py +++ /dev/null @@ -1,179 +0,0 @@ -import depthai as dai -import cv2 -import numpy as np -import time -import config -import os - -class Camera: - def __init__(self, device_info: dai.DeviceInfo, friendly_id: int): - self.device_info = device_info - self.friendly_id = friendly_id - self.mxid = device_info.getMxId() - self._create_pipeline() - self.device = dai.Device(self.pipeline, self.device_info) - - self.rgb_queue = self.device.getOutputQueue(name="rgb", maxSize=1, blocking=False) - self.still_queue = self.device.getOutputQueue(name="still", maxSize=1, blocking=False) - self.control_queue = self.device.getInputQueue(name="control") - - self.window_name = f"[{self.friendly_id}] Camera - mxid: {self.mxid}" - cv2.namedWindow(self.window_name, cv2.WINDOW_NORMAL) - cv2.resizeWindow(self.window_name, 640, 360) - - # Camera intrinsic parameters - self.intrinsic_mat = np.array(self.device.readCalibration().getCameraIntrinsics(dai.CameraBoardSocket.RGB, 3840, 2160)) - self.distortion_coef = np.zeros((1,5)) - - # Camera extrinsic parameters - self.rot_vec = None - self.trans_vec = None - self.world_to_cam = None - self.cam_to_world = None - - self.checkerboard_size = config.checkerboard_size - self.checkerboard_inner_size = (self.checkerboard_size[0] - 1, self.checkerboard_size[1] - 1) - self.square_size = config.square_size - self.corners_world = np.zeros((1, self.checkerboard_inner_size[0] * self.checkerboard_inner_size[1], 3), np.float32) - self.corners_world[0,:,:2] = np.mgrid[0:self.checkerboard_inner_size[0], 0:self.checkerboard_inner_size[1]].T.reshape(-1, 2) - self.corners_world *= self.square_size - - print("=== Connected to " + self.device_info.getMxId()) - - def __del__(self): - self.device.close() - print("=== Closed " + self.device_info.getMxId()) - - def _create_pipeline(self): - pipeline = dai.Pipeline() - - # RGB cam -> 'rgb' - cam_rgb = pipeline.create(dai.node.ColorCamera) - cam_rgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_4_K) - cam_rgb.setPreviewSize(640, 360) - cam_rgb.setInterleaved(False) - cam_rgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) - cam_rgb.setPreviewKeepAspectRatio(False) - xout_rgb = pipeline.createXLinkOut() - xout_rgb.setStreamName("rgb") - cam_rgb.preview.link(xout_rgb.input) - - # Still encoder -> 'still' - still_encoder = pipeline.create(dai.node.VideoEncoder) - still_encoder.setDefaultProfilePreset(1, dai.VideoEncoderProperties.Profile.MJPEG) - cam_rgb.still.link(still_encoder.input) - xout_still = pipeline.createXLinkOut() - xout_still.setStreamName("still") - still_encoder.bitstream.link(xout_still.input) - - # Camera control -> 'control' - control = pipeline.create(dai.node.XLinkIn) - control.setStreamName('control') - control.out.link(cam_rgb.inputControl) - - self.pipeline = pipeline - - def update(self): - in_rgb = self.rgb_queue.tryGet() - - if in_rgb is None: - return - - self.frame_rgb = in_rgb.getCvFrame() - - cv2.imshow(self.window_name, self.frame_rgb) - - def capture_still(self, timeout_ms: int = 1000): - print("capturing still") - # Empty the queue - self.still_queue.tryGetAll() - - # Send a capture command - ctrl = dai.CameraControl() - ctrl.setCaptureStill(True) - self.control_queue.send(ctrl) - - # Wait for the still to be captured - in_still = None - start_time = time.time()*1000 - while in_still is None: - time.sleep(0.1) - in_still = self.still_queue.tryGet() - if time.time()*1000 - start_time > timeout_ms: - print("did not recieve still image - retrying") - return self.capture_still(timeout_ms) - - still_rgb = cv2.imdecode(in_still.getData(), cv2.IMREAD_UNCHANGED) - - return still_rgb - - def draw_origin(self, frame_rgb: cv2.Mat): - points, _ = cv2.projectPoints( - np.float64([[0, 0, 0], [0.1, 0, 0], [0, 0.1, 0], [0, 0, -0.1]]), - self.rot_vec, self.trans_vec, self.intrinsic_mat, self.distortion_coef - ) - [p_0, p_x, p_y, p_z] = points.astype(np.int64) - - reprojection = frame_rgb.copy() - reprojection = cv2.line(reprojection, p_0[0], p_x[0], (0, 0, 255), 5) - reprojection = cv2.line(reprojection, p_0[0], p_y[0], (0, 255, 0), 5) - reprojection = cv2.line(reprojection, p_0[0], p_z[0], (255, 0, 0), 5) - - return reprojection - - def estimate_pose(self): - frame_rgb = self.capture_still() - if frame_rgb is None: - print("did not recieve still image") - return - - frame_gray = cv2.cvtColor(frame_rgb, cv2.COLOR_BGR2GRAY) - - print("Finding checkerboard corners...") - - # find the checkerboard corners - found, corners = cv2.findChessboardCorners( - frame_gray, self.checkerboard_inner_size, - cv2.CALIB_CB_ADAPTIVE_THRESH + cv2.CALIB_CB_FAST_CHECK + cv2.CALIB_CB_NORMALIZE_IMAGE - ) - - if not found: - print("Checkerboard not found") - return None - - # refine the corner locations - corners = cv2.cornerSubPix( - frame_gray, corners, (11, 11), (-1, -1), - (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001) - ) - - # compute the rotation and translation from the camera to the checkerboard - ret, self.rot_vec, self.trans_vec = cv2.solvePnP( - self.corners_world, corners, self.intrinsic_mat, self.distortion_coef - ) - - # compute transformation from world to camera space and wise versa - rotM = cv2.Rodrigues(self.rot_vec)[0] - self.world_to_cam = np.vstack((np.hstack((rotM, self.trans_vec)), np.array([0,0,0,1]))) - self.cam_to_world = np.linalg.inv(self.world_to_cam) - - # show origin overlay - reprojection = self.draw_origin(frame_rgb) - cv2.imshow(self.window_name, reprojection) - cv2.waitKey() - - print("Camera to world transformation: \n", self.cam_to_world) - print("World to camera transformation: \n", self.world_to_cam) - print("Rotation vector: \n", self.rot_vec) - print("Translation vector: \n", self.trans_vec) - - # save the results - try: - path = os.path.join(os.path.dirname(__file__), f"{config.calibration_data_dir}") - os.makedirs(path, exist_ok=True) - np.savez( - f"{path}/extrinsics_{self.device_info.getMxId()}.npz", - world_to_cam=self.world_to_cam, cam_to_world=self.cam_to_world, trans_vec=self.trans_vec, rot_vec=self.rot_vec - ) - except: - print("Could not save calibration data") \ No newline at end of file diff --git a/gen2-multiple-devices/multi-cam-calibration/config.py b/gen2-multiple-devices/multi-cam-calibration/config.py deleted file mode 100644 index f9a501663..000000000 --- a/gen2-multiple-devices/multi-cam-calibration/config.py +++ /dev/null @@ -1,3 +0,0 @@ -checkerboard_size = (10, 7) # number of squares on the checkerboard -square_size = 0.0251 # size of a square in meters -calibration_data_dir = "calibration_data" # directory relative to main.py diff --git a/gen2-multiple-devices/multi-cam-calibration/img/pose.png b/gen2-multiple-devices/multi-cam-calibration/img/pose.png deleted file mode 100644 index e2fe59759..000000000 Binary files a/gen2-multiple-devices/multi-cam-calibration/img/pose.png and /dev/null differ diff --git a/gen2-multiple-devices/multi-cam-calibration/main.py b/gen2-multiple-devices/multi-cam-calibration/main.py deleted file mode 100644 index d2e8b6af7..000000000 --- a/gen2-multiple-devices/multi-cam-calibration/main.py +++ /dev/null @@ -1,56 +0,0 @@ -import cv2 -import depthai as dai -from camera import Camera -from typing import List - -device_infos = dai.Device.getAllAvailableDevices() -if len(device_infos) == 0: - raise RuntimeError("No devices found!") -else: - print("Found", len(device_infos), "devices") - -device_infos.sort(key=lambda x: x.getMxId(), reverse=True) # sort the cameras by their mxId - -cameras: List[Camera] = [] - -friendly_id = 0 -for device_info in device_infos: - friendly_id += 1 - cameras.append(Camera(device_info, friendly_id)) - -selected_camera = cameras[0] - -def select_camera(friendly_id: int): - global selected_camera - - i = friendly_id - 1 - if i >= len(cameras) or i < 0: - return None - - selected_camera = cameras[i] - print(f"Selected camera {friendly_id}") - - return selected_camera - -select_camera(1) - -while True: - key = cv2.waitKey(1) - - # QUIT - press `q` to quit - if key == ord('q'): - break - - # CAMERA SELECTION - use the number keys to select a camera - if key >= ord('1') and key <= ord('9'): - select_camera(key - ord('1') + 1) - - # POSE ESTIMATION - press `p` to estimate the pose of the selected camera and save it to file - if key == ord('p'): - selected_camera.estimate_pose() - - for camera in cameras: - camera.update() - - - \ No newline at end of file diff --git a/gen2-multiple-devices/multi-cam-calibration/pattern.pdf b/gen2-multiple-devices/multi-cam-calibration/pattern.pdf deleted file mode 100644 index 73f072038..000000000 Binary files a/gen2-multiple-devices/multi-cam-calibration/pattern.pdf and /dev/null differ diff --git a/gen2-multiple-devices/rgbd-pointcloud-fusion/README.md b/gen2-multiple-devices/rgbd-pointcloud-fusion/README.md deleted file mode 100644 index fe4e06bef..000000000 --- a/gen2-multiple-devices/rgbd-pointcloud-fusion/README.md +++ /dev/null @@ -1,21 +0,0 @@ -## Point cloud fusion -This example demonstrates how point clouds from different cameras can be merged together. - -_For a single camera version check out [rgbd-pointcloud](https://github.com/luxonis/depthai-experiments/tree/master/gen2-pointcloud/rgbd-pointcloud)._ - -![demo](img/demo.gif) -## Install project requirements -``` -python3 -m pip install -r requirements.txt -``` -> Note: Running the command above also tries to install open3D which is required for this example. Open3D is not supported by all platforms, but is required for pointcloud visualization. - -## Usage -> Before you can run this demo you need to calibrate the cameras. Go to [multi-cam-calibration](../multi-cam-calibration) and generate a calibration file for each camera. Make sure that the `calibration_data_dir` in the [`config.py`](config.py) is set correctly. - -Run the [`main.py`](main.py) with Python 3. -``` -python3 main.py -``` - - diff --git a/gen2-multiple-devices/rgbd-pointcloud-fusion/camera.py b/gen2-multiple-devices/rgbd-pointcloud-fusion/camera.py deleted file mode 100644 index 05f54499d..000000000 --- a/gen2-multiple-devices/rgbd-pointcloud-fusion/camera.py +++ /dev/null @@ -1,183 +0,0 @@ -import depthai as dai -import cv2 -import numpy as np -import open3d as o3d -from typing import List -import config -from host_sync import HostSync - -class Camera: - def __init__(self, device_info: dai.DeviceInfo, friendly_id: int, show_video: bool = True, show_point_cloud: bool = True): - self.show_video = show_video - self.show_point_cloud = show_point_cloud - self.show_detph = False - self.device_info = device_info - self.friendly_id = friendly_id - self.mxid = device_info.getMxId() - self._create_pipeline() - self.device = dai.Device(self.pipeline, self.device_info) - - self.image_queue = self.device.getOutputQueue(name="image", maxSize=1, blocking=False) - self.depth_queue = self.device.getOutputQueue(name="depth", maxSize=1, blocking=False) - self.host_sync = HostSync() - - self.image_frame = None - self.depth_frame = None - self.depth_visualization_frame = None - self.point_cloud = o3d.geometry.PointCloud() - - # camera window - self.window_name = f"[{self.friendly_id}] Camera - mxid: {self.mxid}" - if show_video: - cv2.namedWindow(self.window_name, cv2.WINDOW_NORMAL) - cv2.resizeWindow(self.window_name, 640, 360) - - # point cloud window - if show_point_cloud: - self.point_cloud_window = o3d.visualization.Visualizer() - self.point_cloud_window.create_window(window_name=f"[{self.friendly_id}] Point Cloud - mxid: {self.mxid}") - self.point_cloud_window.add_geometry(self.point_cloud) - origin = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.3, origin=[0, 0, 0]) - self.point_cloud_window.add_geometry(origin) - self.point_cloud_window.get_view_control().set_constant_z_far(config.max_range*2) - - self._load_calibration() - - print("=== Connected to " + self.device_info.getMxId()) - - def __del__(self): - self.device.close() - print("=== Closed " + self.device_info.getMxId()) - - def _load_calibration(self): - path = f"{config.calibration_data_dir}/extrinsics_{self.mxid}.npz" - try: - extrinsics = np.load(path) - self.cam_to_world = extrinsics["cam_to_world"] - self.world_to_cam = extrinsics["world_to_cam"] - except: - raise RuntimeError(f"Could not load calibration data for camera {self.mxid} from {path}!") - - calibration = self.device.readCalibration() - self.intrinsics = calibration.getCameraIntrinsics( - dai.CameraBoardSocket.RGB if config.COLOR else dai.CameraBoardSocket.RIGHT, - dai.Size2f(*self.image_size) - ) - - self.pinhole_camera_intrinsic = o3d.camera.PinholeCameraIntrinsic( - *self.image_size, self.intrinsics[0][0], self.intrinsics[1][1], self.intrinsics[0][2], self.intrinsics[1][2] - ) - - print(self.pinhole_camera_intrinsic) - - - def _create_pipeline(self): - pipeline = dai.Pipeline() - - # Depth cam -> 'depth' - mono_left = pipeline.createMonoCamera() - mono_right = pipeline.createMonoCamera() - mono_left.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - mono_left.setBoardSocket(dai.CameraBoardSocket.LEFT) - mono_right.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - mono_right.setBoardSocket(dai.CameraBoardSocket.RIGHT) - cam_stereo = pipeline.createStereoDepth() - cam_stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) - cam_stereo.initialConfig.setMedianFilter(config.median) - cam_stereo.initialConfig.setConfidenceThreshold(config.confidence_threshold) - cam_stereo.setLeftRightCheck(config.lrcheck) - cam_stereo.setExtendedDisparity(config.extended) - cam_stereo.setSubpixel(config.subpixel) - mono_left.out.link(cam_stereo.left) - mono_right.out.link(cam_stereo.right) - - init_config = cam_stereo.initialConfig.get() - init_config.postProcessing.speckleFilter.enable = False - init_config.postProcessing.speckleFilter.speckleRange = 50 - init_config.postProcessing.temporalFilter.enable = True - init_config.postProcessing.spatialFilter.enable = True - init_config.postProcessing.spatialFilter.holeFillingRadius = 2 - init_config.postProcessing.spatialFilter.numIterations = 1 - init_config.postProcessing.thresholdFilter.minRange = config.min_range - init_config.postProcessing.thresholdFilter.maxRange = config.max_range - init_config.postProcessing.decimationFilter.decimationFactor = 1 - cam_stereo.initialConfig.set(init_config) - - xout_depth = pipeline.createXLinkOut() - xout_depth.setStreamName("depth") - cam_stereo.depth.link(xout_depth.input) - - - # RGB cam or mono right -> 'image' - xout_image = pipeline.createXLinkOut() - xout_image.setStreamName("image") - if config.COLOR: - cam_rgb = pipeline.createColorCamera() - cam_rgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) - cam_rgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB) - cam_rgb.setIspScale(1, 3) - cam_rgb.initialControl.setManualFocus(130) - cam_stereo.setDepthAlign(dai.CameraBoardSocket.RGB) - cam_rgb.isp.link(xout_image.input) - self.image_size = cam_rgb.getIspSize() - else: - cam_stereo.rectifiedRight.link(xout_image.input) - self.image_size = mono_right.getResolutionSize() - - self.pipeline = pipeline - - def update(self): - for queue in [self.depth_queue, self.image_queue]: - new_msg = queue.tryGet() - if new_msg is not None: - msgs = self.host_sync.add_msg(queue.getName(), new_msg) - if msgs == False: - break - - self.depth_frame = msgs["depth"].getFrame() - self.image_frame = msgs["image"].getCvFrame() - self.depth_visualization_frame = cv2.normalize(self.depth_frame, None, 255, 0, cv2.NORM_INF, cv2.CV_8UC1) - self.depth_visualization_frame = cv2.equalizeHist(self.depth_visualization_frame) - self.depth_visualization_frame = cv2.applyColorMap(self.depth_visualization_frame, cv2.COLORMAP_HOT) - - if self.show_video: - if self.show_detph: - cv2.imshow(self.window_name, self.depth_visualization_frame) - else: - cv2.imshow(self.window_name, self.image_frame) - - rgb = cv2.cvtColor(self.image_frame, cv2.COLOR_BGR2RGB) - self.rgbd_to_point_cloud(self.depth_frame, rgb) - - if self.show_point_cloud: - self.point_cloud_window.update_geometry(self.point_cloud) - self.point_cloud_window.poll_events() - self.point_cloud_window.update_renderer() - - - def rgbd_to_point_cloud(self, depth_frame, image_frame, downsample=False, remove_noise=False): - rgb_o3d = o3d.geometry.Image(image_frame) - depth_o3d = o3d.geometry.Image(depth_frame) - rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( - rgb_o3d, depth_o3d, convert_rgb_to_intensity=(len(image_frame.shape) != 3) - ) - - point_cloud = o3d.geometry.PointCloud.create_from_rgbd_image( - rgbd_image, self.pinhole_camera_intrinsic, self.world_to_cam - ) - - if downsample: - point_cloud = point_cloud.voxel_down_sample(voxel_size=0.01) - - if remove_noise: - point_cloud = point_cloud.remove_statistical_outlier(nb_neighbors=30, std_ratio=0.1)[0] - - self.point_cloud.points = point_cloud.points - self.point_cloud.colors = point_cloud.colors - - # correct upside down z axis - T = np.eye(4) - T[2,2] = -1 - self.point_cloud.transform(T) - - return self.point_cloud \ No newline at end of file diff --git a/gen2-multiple-devices/rgbd-pointcloud-fusion/config.py b/gen2-multiple-devices/rgbd-pointcloud-fusion/config.py deleted file mode 100644 index 6d806b6a6..000000000 --- a/gen2-multiple-devices/rgbd-pointcloud-fusion/config.py +++ /dev/null @@ -1,19 +0,0 @@ -import depthai as dai - -COLOR = True # Use color camera of mono camera - -# DEPTH CONFIG -lrcheck = True # Better handling for occlusions -extended = False # Closer-in minimum depth, disparity range is doubled -subpixel = True # Better accuracy for longer distance, fractional disparity 32-levels -confidence_threshold = 250 # 0-255, 255 = low confidence, 0 = high confidence -min_range = 100 # mm -max_range = 2000 # mm - -# Median filter -# Options: MEDIAN_OFF, KERNEL_3x3, KERNEL_5x5, KERNEL_7x7 -median = dai.StereoDepthProperties.MedianFilter.KERNEL_7x7 - -# CALIBRATION -calibration_data_dir = '../multi-cam-calibration/calibration_data' # Path to camera extrinsics relative to main.py - diff --git a/gen2-multiple-devices/rgbd-pointcloud-fusion/host_sync.py b/gen2-multiple-devices/rgbd-pointcloud-fusion/host_sync.py deleted file mode 100644 index f6afe4792..000000000 --- a/gen2-multiple-devices/rgbd-pointcloud-fusion/host_sync.py +++ /dev/null @@ -1,26 +0,0 @@ -class HostSync: - def __init__(self): - self.arrays = {} - def add_msg(self, name, msg): - if not name in self.arrays: - self.arrays[name] = [] - # Add msg to array - self.arrays[name].append({'msg': msg, 'seq': msg.getSequenceNum()}) - - synced = {} - for name, arr in self.arrays.items(): - for i, obj in enumerate(arr): - if msg.getSequenceNum() == obj['seq']: - synced[name] = obj['msg'] - break - # If there are 2 (all) synced msgs, remove all old msgs - # and return synced msgs - if len(synced) == 2: # color, depth - # Remove old msgs - for name, arr in self.arrays.items(): - for i, obj in enumerate(arr): - if obj['seq'] < msg.getSequenceNum(): - arr.remove(obj) - else: break - return synced - return False \ No newline at end of file diff --git a/gen2-multiple-devices/rgbd-pointcloud-fusion/img/demo.gif b/gen2-multiple-devices/rgbd-pointcloud-fusion/img/demo.gif deleted file mode 100644 index 8c44cdf6c..000000000 Binary files a/gen2-multiple-devices/rgbd-pointcloud-fusion/img/demo.gif and /dev/null differ diff --git a/gen2-multiple-devices/rgbd-pointcloud-fusion/main.py b/gen2-multiple-devices/rgbd-pointcloud-fusion/main.py deleted file mode 100644 index f8e4304a4..000000000 --- a/gen2-multiple-devices/rgbd-pointcloud-fusion/main.py +++ /dev/null @@ -1,38 +0,0 @@ -import cv2 -import depthai as dai -from camera import Camera -from typing import List -from point_cloud_visualizer import PointCloudVisualizer - -device_infos = dai.Device.getAllAvailableDevices() -if len(device_infos) == 0: - raise RuntimeError("No devices found!") -else: - print("Found", len(device_infos), "devices") - -device_infos.sort(key=lambda x: x.getMxId(), reverse=True) # sort the cameras by their mxId - -cameras: List[Camera] = [] - -for device_info in device_infos: - cameras.append(Camera(device_info, len(cameras)+1, show_video=True, show_point_cloud=False)) - - -point_cloud_visualizer = PointCloudVisualizer(cameras) - -while True: - key = cv2.waitKey(1) - - # QUIT - press `q` to quit - if key == ord('q'): - break - - # TOGGLE DEPTH VIEW - press `d` to toggle depth view - if key == ord('d'): - for camera in cameras: - camera.show_detph = not camera.show_detph - - for camera in cameras: - camera.update() - - point_cloud_visualizer.update() \ No newline at end of file diff --git a/gen2-multiple-devices/rgbd-pointcloud-fusion/point_cloud_visualizer.py b/gen2-multiple-devices/rgbd-pointcloud-fusion/point_cloud_visualizer.py deleted file mode 100644 index 68fc8b8a8..000000000 --- a/gen2-multiple-devices/rgbd-pointcloud-fusion/point_cloud_visualizer.py +++ /dev/null @@ -1,28 +0,0 @@ -import open3d as o3d -from camera import Camera -from typing import List -import numpy as np -import config - -class PointCloudVisualizer: - def __init__(self, cameras: List[Camera]): - self.cameras = cameras - self.pointcloud = o3d.geometry.PointCloud() - - self.pointcloud_window = o3d.visualization.Visualizer() - self.pointcloud_window.create_window(window_name="Pointcloud") - self.pointcloud_window.add_geometry(self.pointcloud) - origin = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.3, origin=[0, 0, 0]) - self.pointcloud_window.add_geometry(origin) - view = self.pointcloud_window.get_view_control() - view.set_constant_z_far(config.max_range*2) - - def update(self): - self.pointcloud.clear() - - for camera in self.cameras: - self.pointcloud += camera.point_cloud - - self.pointcloud_window.update_geometry(self.pointcloud) - self.pointcloud_window.poll_events() - self.pointcloud_window.update_renderer() \ No newline at end of file diff --git a/gen2-multiple-devices/rgbd-pointcloud-fusion/requirements.txt b/gen2-multiple-devices/rgbd-pointcloud-fusion/requirements.txt deleted file mode 100644 index 8ca454c73..000000000 --- a/gen2-multiple-devices/rgbd-pointcloud-fusion/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -opencv-python -depthai==2.17.4 -numpy -open3d==0.10.0.0; platform_machine != "armv7l" and python_version < "3.9" \ No newline at end of file diff --git a/gen2-multiple-devices/spatial-detection-fusion/.gitignore b/gen2-multiple-devices/spatial-detection-fusion/.gitignore deleted file mode 100644 index 1b37e1a92..000000000 --- a/gen2-multiple-devices/spatial-detection-fusion/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -config/* -test.ipynb -__pycache__ \ No newline at end of file diff --git a/gen2-multiple-devices/spatial-detection-fusion/README.md b/gen2-multiple-devices/spatial-detection-fusion/README.md deleted file mode 100644 index c860011fb..000000000 --- a/gen2-multiple-devices/spatial-detection-fusion/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# Spatial detection fusion -This example demonstrates multiple Luxonis OAK cameras tracking objects and showing their position in bird's-eye view. - -![](img/demo.gif) -## Controls -| key | action -| :--- | :--- | -| `q` | quit | -| `d` | toggle depth view | - - -## Usage -> Before you can run this demo you need to calibrate the cameras. Go to [multi-cam-calibration](../multi-cam-calibration) and generate a calibration file for each camera. Make sure that the `calibration_data_dir` in the [`config.py`](config.py) is set correctly. - -Run the [`main.py`](main.py) with Python 3. - -Camera's position will appear in the bird's-eye view along with its detected objects. - -![bird's-eye view](img/birdseye.png) \ No newline at end of file diff --git a/gen2-multiple-devices/spatial-detection-fusion/birdseyeview.py b/gen2-multiple-devices/spatial-detection-fusion/birdseyeview.py deleted file mode 100644 index cc5bda87c..000000000 --- a/gen2-multiple-devices/spatial-detection-fusion/birdseyeview.py +++ /dev/null @@ -1,129 +0,0 @@ -from camera import Camera -from typing import List -import numpy as np -import cv2 -import collections - -class BirdsEyeView: - colors = [(0, 255, 255), (255, 0, 255), (255, 255, 0), (0,0,255), (0,255,0), (255,0,0)] - - def __init__(self, cameras: List[Camera], width, height, scale, trail_length=300): - self.cameras = cameras - self.width = width - self.height = height - self.scale = scale - self.history = collections.deque(maxlen=trail_length) - - self.img = np.zeros((height, width, 3), np.uint8) - self.world_to_birds_eye = np.array([ - [scale, 0, 0, width//2], - [0, scale, 0, height//2], - ]) - - def draw_coordinate_system(self): - p_0 = (self.world_to_birds_eye @ np.array([0, 0, 0, 1])).astype(np.int64) - p_x = (self.world_to_birds_eye @ np.array([0.3, 0, 0, 1])).astype(np.int64) - p_y = (self.world_to_birds_eye @ np.array([0, 0.3, 0, 1])).astype(np.int64) - cv2.line(self.img, p_0, p_x, (0, 0, 255), 2) - cv2.line(self.img, p_0, p_y, (0, 255, 0), 2) - - def draw_cameras(self): - for camera in self.cameras: - try: - color = self.colors[camera.friendly_id - 1] - except: - color = (255,255,255) - - # draw the camera position - if camera.cam_to_world is not None: - p = (self.world_to_birds_eye @ (camera.cam_to_world @ np.array([0,0,0,1]))).astype(np.int64) - p_l = (self.world_to_birds_eye @ (camera.cam_to_world @ np.array([0.2,0,0.1,1]))).astype(np.int64) - p_r = (self.world_to_birds_eye @ (camera.cam_to_world @ np.array([-0.2,0,0.1,1]))).astype(np.int64) - cv2.circle(self.img, p, 5, color, -1) - cv2.line(self.img, p, p_l, color, 1) - cv2.line(self.img, p, p_r, color, 1) - - def make_groups(self): - n = 2 # use only first n components - distance_threshold = 1.5 # m - for camera in self.cameras: - for det in camera.detected_objects: - det.corresponding_detections = [] - for other_camera in self.cameras: - # find closest detection - d = np.inf - closest_det = None - for other_det in other_camera.detected_objects: - if other_det.label != det.label: - continue - d_ = np.linalg.norm(det.pos[:,:n] - other_det.pos[:,:n]) - if d_ < d: - d = d_ - closest_det = other_det - if closest_det is not None and d < distance_threshold: - det.corresponding_detections.append(closest_det) - - # keep only double correspondences - for camera in self.cameras: - for det in camera.detected_objects: - det.corresponding_detections = [other_det for other_det in det.corresponding_detections if det in other_det.corresponding_detections] - - # find groups of correspondences - groups = [] - for camera in self.cameras: - for det in camera.detected_objects: - # find group - group = None - for g in groups: - if det in g: - group = g - break - if group is None: - group = set() - groups.append(group) - # add to group - group.add(det) - for other_det in det.corresponding_detections: - if other_det not in group: - group.add(other_det) - - return groups - - def draw_groups(self, groups): - for group in groups: - avg = np.zeros(2) - label = "" - for det in group: - label = det.label - try: c = self.colors[det.camera_friendly_id - 1] - except: c = (255,255,255) - p = (self.world_to_birds_eye @ det.pos).flatten().astype(np.int64) - avg += p - cv2.circle(self.img, p, 2, c, -1) - - avg = (avg/len(group)).astype(np.int64) - cv2.circle(self.img, avg, int(0.25*self.scale), (255, 255, 255), 0) - cv2.putText(self.img, str(label), avg+np.array([0, int(0.25*self.scale) + 10]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) - - def draw_history(self): - for i, groups in enumerate(self.history): - for group in groups: - avg = np.zeros(2) - for det in group: - p = (self.world_to_birds_eye @ det.pos).flatten().astype(np.int64) - avg += p - avg = (avg/len(group)).astype(np.int64) - c = int(i/self.history.maxlen*50) - cv2.circle(self.img, avg, int(i/self.history.maxlen*10), (c, c, c), -1) - - def render(self): - self.img = np.zeros((self.height, self.width, 3), np.uint8) - self.draw_coordinate_system() - self.draw_cameras() - groups = self.make_groups() - self.draw_history() - self.history.append(groups) - self.draw_groups(groups) - - cv2.imshow("Bird's Eye View", self.img) - \ No newline at end of file diff --git a/gen2-multiple-devices/spatial-detection-fusion/camera.py b/gen2-multiple-devices/spatial-detection-fusion/camera.py deleted file mode 100644 index 889d06f48..000000000 --- a/gen2-multiple-devices/spatial-detection-fusion/camera.py +++ /dev/null @@ -1,196 +0,0 @@ -import depthai as dai -import blobconverter -import cv2 -import time -import numpy as np -from typing import List -from detection import Detection -import config -import os - -class Camera: - label_map = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", - "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] - - def __init__(self, device_info: dai.DeviceInfo, friendly_id: int, show_video: bool = True): - self.show_video = show_video - self.show_detph = False - self.device_info = device_info - self.friendly_id = friendly_id - self.mxid = device_info.getMxId() - self._create_pipeline() - self.device = dai.Device(self.pipeline, self.device_info) - - self.rgb_queue = self.device.getOutputQueue(name="rgb", maxSize=1, blocking=False) - self.still_queue = self.device.getOutputQueue(name="still", maxSize=1, blocking=False) - self.control_queue = self.device.getInputQueue(name="control") - self.mapping_queue = self.device.getOutputQueue(name="mapping", maxSize=1, blocking=False) - self.nn_queue = self.device.getOutputQueue(name="nn", maxSize=1, blocking=False) - self.depth_queue = self.device.getOutputQueue(name="depth", maxSize=1, blocking=False) - - self.window_name = f"[{self.friendly_id}] Camera - mxid: {self.mxid}" - if show_video: - cv2.namedWindow(self.window_name, cv2.WINDOW_NORMAL) - cv2.resizeWindow(self.window_name, 640, 360) - - self.frame_rgb = None - self.detected_objects: List[Detection] = [] - - self._load_calibration() - - print("=== Connected to " + self.device_info.getMxId()) - - def __del__(self): - self.device.close() - print("=== Closed " + self.device_info.getMxId()) - - def _load_calibration(self): - path = os.path.join(os.path.dirname(__file__), f"{config.calibration_data_dir}") - try: - extrinsics = np.load(f"{path}/extrinsics_{self.mxid}.npz") - self.cam_to_world = extrinsics["cam_to_world"] - except: - raise RuntimeError(f"Could not load calibration data for camera {self.mxid} from {path}!") - - - def _create_pipeline(self): - pipeline = dai.Pipeline() - - # RGB cam -> 'rgb' - cam_rgb = pipeline.create(dai.node.ColorCamera) - cam_rgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_4_K) - cam_rgb.setPreviewSize(300, 300) - cam_rgb.setInterleaved(False) - cam_rgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) - cam_rgb.setPreviewKeepAspectRatio(False) - xout_rgb = pipeline.createXLinkOut() - xout_rgb.setStreamName("rgb") - - # Depth cam -> 'depth' - mono_left = pipeline.create(dai.node.MonoCamera) - mono_right = pipeline.create(dai.node.MonoCamera) - mono_left.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - mono_left.setBoardSocket(dai.CameraBoardSocket.LEFT) - mono_right.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) - mono_right.setBoardSocket(dai.CameraBoardSocket.RIGHT) - cam_stereo = pipeline.create(dai.node.StereoDepth) - cam_stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) - cam_stereo.setDepthAlign(dai.CameraBoardSocket.RGB) # Align depth map to the perspective of RGB camera, on which inference is done - cam_stereo.setOutputSize(mono_left.getResolutionWidth(), mono_left.getResolutionHeight()) - mono_left.out.link(cam_stereo.left) - mono_right.out.link(cam_stereo.right) - - xout_depth = pipeline.create(dai.node.XLinkOut) - xout_depth.setStreamName("depth") - - # Bounding box maping from depth to RGB -> 'mapping' - xout_bounding_box_bepth_mapping = pipeline.create(dai.node.XLinkOut) - xout_bounding_box_bepth_mapping.setStreamName("mapping") - - # Spatial detection network -> 'nn' - spatial_nn = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork) - spatial_nn.setBlobPath(blobconverter.from_zoo(name='mobilenet-ssd', shaves=6)) - spatial_nn.setConfidenceThreshold(0.6) - spatial_nn.input.setBlocking(False) - spatial_nn.setBoundingBoxScaleFactor(0.2) - spatial_nn.setDepthLowerThreshold(100) - spatial_nn.setDepthUpperThreshold(5000) - xout_nn = pipeline.create(dai.node.XLinkOut) - xout_nn.setStreamName("nn") - - cam_rgb.preview.link(spatial_nn.input) - # cam_rgb.preview.link(xout_rgb.input) - cam_stereo.depth.link(spatial_nn.inputDepth) - spatial_nn.passthrough.link(xout_rgb.input) - spatial_nn.passthroughDepth.link(xout_depth.input) - spatial_nn.boundingBoxMapping.link(xout_bounding_box_bepth_mapping.input) - spatial_nn.out.link(xout_nn.input) - - - # Still encoder -> 'still' - still_encoder = pipeline.create(dai.node.VideoEncoder) - still_encoder.setDefaultProfilePreset(1, dai.VideoEncoderProperties.Profile.MJPEG) - cam_rgb.still.link(still_encoder.input) - xout_still = pipeline.createXLinkOut() - xout_still.setStreamName("still") - still_encoder.bitstream.link(xout_still.input) - - # Camera control -> 'control' - control = pipeline.create(dai.node.XLinkIn) - control.setStreamName('control') - control.out.link(cam_rgb.inputControl) - - self.pipeline = pipeline - - def update(self): - in_rgb = self.rgb_queue.tryGet() - in_nn = self.nn_queue.tryGet() - in_depth = self.depth_queue.tryGet() - - if in_rgb is None or in_depth is None: - return - - depth_frame = in_depth.getFrame() # depthFrame values are in millimeters - depth_frame_color = cv2.normalize(depth_frame, None, 255, 0, cv2.NORM_INF, cv2.CV_8UC1) - depth_frame_color = cv2.equalizeHist(depth_frame_color) - depth_frame_color = cv2.applyColorMap(depth_frame_color, cv2.COLORMAP_HOT) - - self.frame_rgb = in_rgb.getCvFrame() - - if self.show_detph: - visualization = depth_frame_color.copy() - else: - visualization = self.frame_rgb.copy() - visualization = cv2.resize(visualization, (640, 360), interpolation = cv2.INTER_NEAREST) - - height = visualization.shape[0] - width = visualization.shape[1] - - detections = [] - if in_nn is not None: - detections = in_nn.detections - - mapping = self.mapping_queue.tryGet() - self.detected_objects = [] - if len(detections) > 0 and mapping is not None: - roi_datas = mapping.getConfigData() - - for roi_data, detection in zip(roi_datas, detections): - roi = roi_data.roi - roi = roi.denormalize(width, height) - top_left = roi.topLeft() - bottom_right = roi.bottomRight() - xmin = int(top_left.x) - ymin = int(top_left.y) - xmax = int(bottom_right.x) - ymax = int(bottom_right.y) - - x1 = int(detection.xmin * width) - x2 = int(detection.xmax * width) - y1 = int(detection.ymin * height) - y2 = int(detection.ymax * height) - - try: - label = self.label_map[detection.label] - except: - label = detection.label - - if self.cam_to_world is not None: - pos_camera_frame = np.array([[detection.spatialCoordinates.x / 1000, -detection.spatialCoordinates.y / 1000, detection.spatialCoordinates.z / 1000, 1]]).T - # pos_camera_frame = np.array([[0, 0, detection.spatialCoordinates.z/1000, 1]]).T - pos_world_frame = self.cam_to_world @ pos_camera_frame - - self.detected_objects.append(Detection(label, detection.confidence, pos_world_frame, self.friendly_id)) - - - cv2.rectangle(visualization, (xmin, ymin), (xmax, ymax), (100, 0, 0), 2) - cv2.rectangle(visualization, (x1, y1), (x2, y2), (255, 0, 0), 2) - cv2.putText(visualization, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) - cv2.putText(visualization, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) - cv2.putText(visualization, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) - cv2.putText(visualization, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) - cv2.putText(visualization, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) - - - if self.show_video: - cv2.imshow(self.window_name, visualization) \ No newline at end of file diff --git a/gen2-multiple-devices/spatial-detection-fusion/config.py b/gen2-multiple-devices/spatial-detection-fusion/config.py deleted file mode 100644 index c4b31c10e..000000000 --- a/gen2-multiple-devices/spatial-detection-fusion/config.py +++ /dev/null @@ -1,6 +0,0 @@ -# calibration -calibration_data_dir = '../multi-cam-calibration/calibration_data' # directory relative to main.py - -# bird's-eye view -size = (512, 512) -scale = 100 # px/m \ No newline at end of file diff --git a/gen2-multiple-devices/spatial-detection-fusion/detection.py b/gen2-multiple-devices/spatial-detection-fusion/detection.py deleted file mode 100644 index e71cc77ff..000000000 --- a/gen2-multiple-devices/spatial-detection-fusion/detection.py +++ /dev/null @@ -1,10 +0,0 @@ -import numpy as np -from typing import List - -class Detection: - def __init__(self, label: str, confidence: float, pos: np.ndarray, camera_friendly_id: int): - self.label = label - self.confidence = confidence - self.pos = pos - self.corresponding_detections: List[Detection] = [] - self.camera_friendly_id = camera_friendly_id \ No newline at end of file diff --git a/gen2-multiple-devices/spatial-detection-fusion/img/birdseye.png b/gen2-multiple-devices/spatial-detection-fusion/img/birdseye.png deleted file mode 100644 index 14fa3877d..000000000 Binary files a/gen2-multiple-devices/spatial-detection-fusion/img/birdseye.png and /dev/null differ diff --git a/gen2-multiple-devices/spatial-detection-fusion/img/demo.gif b/gen2-multiple-devices/spatial-detection-fusion/img/demo.gif deleted file mode 100644 index ef6abd91d..000000000 Binary files a/gen2-multiple-devices/spatial-detection-fusion/img/demo.gif and /dev/null differ diff --git a/gen2-multiple-devices/spatial-detection-fusion/main.py b/gen2-multiple-devices/spatial-detection-fusion/main.py deleted file mode 100644 index 81b9e2a45..000000000 --- a/gen2-multiple-devices/spatial-detection-fusion/main.py +++ /dev/null @@ -1,45 +0,0 @@ -import cv2 -import depthai as dai -from birdseyeview import BirdsEyeView -from camera import Camera -from typing import List -import config - -device_infos = dai.Device.getAllAvailableDevices() -if len(device_infos) == 0: - raise RuntimeError("No devices found!") -else: - print("Found", len(device_infos), "devices") - -device_infos.sort(key=lambda x: x.getMxId(), reverse=True) # sort the cameras by their mxId - -cameras: List[Camera] = [] - -friendly_id = 0 -for device_info in device_infos: - friendly_id += 1 - cameras.append(Camera(device_info, friendly_id, show_video=True)) - - -birds_eye_view = BirdsEyeView(cameras, config.size[0], config.size[1], config.scale) - -while True: - key = cv2.waitKey(1) - - # QUIT - press `q` to quit - if key == ord('q'): - break - - # TOGGLE DEPTH VIEW - press `d` to toggle depth view - if key == ord('d'): - for camera in cameras: - camera.show_detph = not camera.show_detph - - for camera in cameras: - camera.update() - - birds_eye_view.render() - - - - \ No newline at end of file diff --git a/gen2-multiple-devices/spatial-detection-fusion/requirements.txt b/gen2-multiple-devices/spatial-detection-fusion/requirements.txt deleted file mode 100644 index 368c0d7a7..000000000 --- a/gen2-multiple-devices/spatial-detection-fusion/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -opencv-python -numpy -blobconverter==1.2.8 -depthai==2.16.0.0 \ No newline at end of file diff --git a/gen2-objectron/sdk/functions.py b/gen2-objectron/sdk/functions.py new file mode 100644 index 000000000..c538c7e91 --- /dev/null +++ b/gen2-objectron/sdk/functions.py @@ -0,0 +1,86 @@ +import cv2 +import numpy as np + + +# +# 3 + + + + + + + + 7 +# +\ +\ UP +# + \ + \ +# + \ + \ | +# + 4 + + + + + + + + 8 | y +# + + + + | +# + + + + | +# + + (0) + + .------- x +# + + + + \ +# 1 + + + + + + + + 5 + \ +# \ + \ + \ z +# \ + \ + \ +# \+ \+ +# 2 + + + + + + + + 6 + + +def draw_box(image, pts, dist=None, vol=None): + overlay = image.copy() + + # draw bottom + bottom_sel = [1, 2, 6, 5] + bottom_pts = pts[bottom_sel] + bottom_pts = bottom_pts.reshape((-1, 1, 2)) + bottom_pts = bottom_pts.astype(np.int32) + cv2.fillPoly(overlay, pts=[bottom_pts], color=(192, 192, 192)) + + # draw green size + lines_side = [(1, 4), (2, 3)] + for line in lines_side: + pt0 = pts[line[0]] + pt1 = pts[line[1]] + pt0 = (int(pt0[0]), int(pt0[1])) + pt1 = (int(pt1[0]), int(pt1[1])) + cv2.line(overlay, pt0, pt1, (0, 255, 0)) + + # draw blue lines + lines = [(1, 2), (2, 4), (1, 3), (4, 3), (2, 6), (1, 5), (3, 7), (4, 8), (6, 8), (7, 8), (7, 5), (5, 6)] + for line in lines: + pt0 = pts[line[0]] + pt1 = pts[line[1]] + pt0 = (int(pt0[0]), int(pt0[1])) + pt1 = (int(pt1[0]), int(pt1[1])) + cv2.line(overlay, pt0, pt1, (255, 0, 0)) + + # draw top lines + lines_top = [(3, 8), (4, 7)] + for line in lines_top: + pt0 = pts[line[0]] + pt1 = pts[line[1]] + pt0 = (int(pt0[0]), int(pt0[1])) + pt1 = (int(pt1[0]), int(pt1[1])) + cv2.line(overlay, pt0, pt1, (0, 0, 255)) + + if dist is not None: + # show fps + color_font, color_bg = (255, 255, 255), (255, 0, 0) + label_fps = "Distance: {:.2f} m".format(dist / 1000) + (w1, h1), _ = cv2.getTextSize(label_fps, cv2.FONT_HERSHEY_DUPLEX, 0.4, 1) + + x, y = np.min(pts, axis=0).astype(np.int32) + y -= (h1 + 15) + if y < 0: + y = 0 + + cv2.rectangle(overlay, (x - 2, y - h1 - 2), (x + w1 + 6, y + 4), color_bg, -1) + cv2.putText(overlay, label_fps, (x, y), cv2.FONT_HERSHEY_TRIPLEX, + 0.4, color_font) + cv2.line(overlay, (x, y + 2), (x, y), color_bg) + + if vol is not None: + color_font, color_bg = (255, 255, 255), (0, 0, 255) + vol_label = "Volume: {:.2f} m^3".format(vol) + (w1, h1), _ = cv2.getTextSize(vol_label, cv2.FONT_HERSHEY_DUPLEX, 0.4, 1) + x = int(pts[0, 0]) - 50 + y = int(pts[0, 1]) + cv2.rectangle(overlay, (x - 2, y - h1 - 2), (x + w1 + 6, y + 4), color_bg, -1) + cv2.putText(overlay, vol_label, (x, y), cv2.FONT_HERSHEY_TRIPLEX, + 0.4, color_font) + + cv2.addWeighted(overlay, 0.5, image, 0.5, + 0, image) diff --git a/gen2-objectron/sdk/main.py b/gen2-objectron/sdk/main.py new file mode 100644 index 000000000..a793a25ca --- /dev/null +++ b/gen2-objectron/sdk/main.py @@ -0,0 +1,53 @@ +import cv2 +import numpy as np + +from depthai_sdk import OakCamera, TwoStagePacket, AspectRatioResizeMode, Visualizer +from depthai_sdk.oak_outputs.normalize_bb import NormalizeBoundingBox +from functions import draw_box + +OBJECTRON_CHAIR_PATH = '../models/objectron_chair_openvino_2021.4_6shave.blob' + +INPUT_W, INPUT_H = 640, 360 + + +def callback(packet: TwoStagePacket, visualizer: Visualizer): + frame = packet.frame + + detections = packet.img_detections.detections + # find the detection with the highest confidence + # label of 9 indicates chair in VOC + confs = [det.confidence for det in detections if det.label == 9] + + if len(confs) > 0: + idx = confs.index(max(confs)) + detection = detections[idx] + + out = np.array(packet.nnData[idx].getLayerFp16("StatefulPartitionedCall:1")).reshape(9, 2) + + bbox = detection.xmin, detection.ymin, detection.xmax, detection.ymax + x_min, y_min, x_max, y_max = NormalizeBoundingBox((224, 224), AspectRatioResizeMode.CROP).normalize(frame, bbox) + + OG_W, OG_H = x_max - x_min, y_max - y_min + + scale_x = OG_W / 224 + scale_y = OG_H / 224 + + out[:, 0] = out[:, 0] * scale_x + x_min + out[:, 1] = out[:, 1] * scale_y + y_min + + draw_box(frame, out) + + frame = cv2.resize(frame, (INPUT_W, INPUT_H)) + cv2.imshow('Objectron', frame) + + +with OakCamera() as oak: + color = oak.create_camera('color') + + mobilenet = oak.create_nn('mobilenet-ssd', color) + mobilenet.config_nn(aspect_ratio_resize_mode='crop') + + objectron = oak.create_nn(OBJECTRON_CHAIR_PATH, mobilenet) + + oak.callback(objectron.out.main, callback=callback) + oak.start(blocking=True) diff --git a/gen2-objectron/sdk/requirements.txt b/gen2-objectron/sdk/requirements.txt new file mode 100644 index 000000000..9ea235e9c --- /dev/null +++ b/gen2-objectron/sdk/requirements.txt @@ -0,0 +1,3 @@ +blobconverter +depthai +depthai-sdk diff --git a/gen2-ocr/sdk/codec.py b/gen2-ocr/sdk/codec.py new file mode 100644 index 000000000..00cf67ca9 --- /dev/null +++ b/gen2-ocr/sdk/codec.py @@ -0,0 +1,41 @@ +import numpy as np + + +class CTCCodec: + """ Convert between text-label and text-index """ + CHARACTERS = dict(zip(range(37), '0123456789abcdefghijklmnopqrstuvwxyz#')) + + @staticmethod + def decode(preds): + """Convert text-index into text-label.""" + texts = [] + index = 0 + # Select max probabilty (greedy decoding) then decode index to character + preds = preds.astype(np.float16) + preds_index = np.argmax(preds, 2) + preds_index = preds_index.transpose(1, 0) + preds_index_reshape = preds_index.reshape(-1) + preds_sizes = np.array([preds_index.shape[1]] * preds_index.shape[0]) + + for l in preds_sizes: + t = preds_index_reshape[index:index + l] + + # NOTE: t might be zero size + if t.shape[0] == 0: + continue + + char_list = [] + for i in range(l): + # removing repeated characters and blank. + if not (i > 0 and t[i - 1] == t[i]): + if CTCCodec.CHARACTERS[t[i]] != '#': + char_list.append(CTCCodec.CHARACTERS[t[i]]) + text = ''.join(char_list) + texts.append(text) + + index += l + + return texts + + +codec = CTCCodec() diff --git a/gen2-ocr/sdk/east.py b/gen2-ocr/sdk/east.py new file mode 100644 index 000000000..753e622dd --- /dev/null +++ b/gen2-ocr/sdk/east.py @@ -0,0 +1,231 @@ +import math + +import cv2 +import depthai +import numpy as np + +_conf_threshold = 0.5 + +def get_cv_rotated_rect(bbox, angle): + x0, y0, x1, y1 = bbox + width = abs(x0 - x1) + height = abs(y0 - y1) + x = x0 + width * 0.5 + y = y0 + height * 0.5 + return ([x.tolist(), y.tolist()], [width.tolist(), height.tolist()], np.rad2deg(angle)) + +def rotated_Rectangle(bbox, angle): + X0, Y0, X1, Y1 = bbox + width = abs(X0 - X1) + height = abs(Y0 - Y1) + x = int(X0 + width * 0.5) + y = int(Y0 + height * 0.5) + + pt1_1 = (int(x + width / 2), int(y + height / 2)) + pt2_1 = (int(x + width / 2), int(y - height / 2)) + pt3_1 = (int(x - width / 2), int(y - height / 2)) + pt4_1 = (int(x - width / 2), int(y + height / 2)) + + t = np.array([[np.cos(angle), -np.sin(angle), x - x * np.cos(angle) + y * np.sin(angle)], + [np.sin(angle), np.cos(angle), y - x * np.sin(angle) - y * np.cos(angle)], + [0, 0, 1]]) + + tmp_pt1_1 = np.array([[pt1_1[0]], [pt1_1[1]], [1]]) + tmp_pt1_2 = np.dot(t, tmp_pt1_1) + pt1_2 = (int(tmp_pt1_2[0][0]), int(tmp_pt1_2[1][0])) + + tmp_pt2_1 = np.array([[pt2_1[0]], [pt2_1[1]], [1]]) + tmp_pt2_2 = np.dot(t, tmp_pt2_1) + pt2_2 = (int(tmp_pt2_2[0][0]), int(tmp_pt2_2[1][0])) + + tmp_pt3_1 = np.array([[pt3_1[0]], [pt3_1[1]], [1]]) + tmp_pt3_2 = np.dot(t, tmp_pt3_1) + pt3_2 = (int(tmp_pt3_2[0][0]), int(tmp_pt3_2[1][0])) + + tmp_pt4_1 = np.array([[pt4_1[0]], [pt4_1[1]], [1]]) + tmp_pt4_2 = np.dot(t, tmp_pt4_1) + pt4_2 = (int(tmp_pt4_2[0][0]), int(tmp_pt4_2[1][0])) + + points = np.array([pt1_2, pt2_2, pt3_2, pt4_2]) + + return points + + +def non_max_suppression(boxes, probs=None, angles=None, overlapThresh=0.3): + # if there are no boxes, return an empty list + if len(boxes) == 0: + return [], [] + + # if the bounding boxes are integers, convert them to floats -- this + # is important since we'll be doing a bunch of divisions + if boxes.dtype.kind == "i": + boxes = boxes.astype("float") + + # initialize the list of picked indexes + pick = [] + + # grab the coordinates of the bounding boxes + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + # compute the area of the bounding boxes and grab the indexes to sort + # (in the case that no probabilities are provided, simply sort on the bottom-left y-coordinate) + area = (x2 - x1 + 1) * (y2 - y1 + 1) + idxs = y2 + + # if probabilities are provided, sort on them instead + if probs is not None: + idxs = probs + + # sort the indexes + idxs = np.argsort(idxs) + + # keep looping while some indexes still remain in the indexes list + while len(idxs) > 0: + # grab the last index in the indexes list and add the index value to the list of picked indexes + last = len(idxs) - 1 + i = idxs[last] + pick.append(i) + + # find the largest (x, y) coordinates for the start of the bounding box and the smallest (x, y) coordinates for the end of the bounding box + xx1 = np.maximum(x1[i], x1[idxs[:last]]) + yy1 = np.maximum(y1[i], y1[idxs[:last]]) + xx2 = np.minimum(x2[i], x2[idxs[:last]]) + yy2 = np.minimum(y2[i], y2[idxs[:last]]) + + # compute the width and height of the bounding box + w = np.maximum(0, xx2 - xx1 + 1) + h = np.maximum(0, yy2 - yy1 + 1) + + # compute the ratio of overlap + overlap = (w * h) / area[idxs[:last]] + + # delete all indexes from the index list that have overlap greater than the provided overlap threshold + idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0]))) + + # return only the bounding boxes that were picked + return boxes[pick].astype("int"), angles[pick] + + +def decode_predictions(scores, geometry1, geometry2): + # grab the number of rows and columns from the scores volume, then + # initialize our set of bounding box rectangles and corresponding + # confidence scores + (numRows, numCols) = scores.shape[2:4] + rects = [] + confidences = [] + angles = [] + + # loop over the number of rows + for y in range(0, numRows): + # extract the scores (probabilities), followed by the + # geometrical data used to derive potential bounding box + # coordinates that surround text + scoresData = scores[0, 0, y] + xData0 = geometry1[0, 0, y] + xData1 = geometry1[0, 1, y] + xData2 = geometry1[0, 2, y] + xData3 = geometry1[0, 3, y] + anglesData = geometry2[0, 0, y] + + # loop over the number of columns + for x in range(0, numCols): + # if our score does not have sufficient probability, + # ignore it + if scoresData[x] < _conf_threshold: + continue + + # compute the offset factor as our resulting feature + # maps will be 4x smaller than the input image + (offsetX, offsetY) = (x * 4.0, y * 4.0) + + # extract the rotation angle for the prediction and + # then compute the sin and cosine + angle = anglesData[x] + cos = math.cos(angle) + sin = math.sin(angle) + + # use the geometry volume to derive the width and height + # of the bounding box + h = xData0[x] + xData2[x] + w = xData1[x] + xData3[x] + + # compute both the starting and ending (x, y)-coordinates + # for the text prediction bounding box + endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) + endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) + startX = int(endX - w) + startY = int(endY - h) + + # add the bounding box coordinates and probability score + # to our respective lists + rects.append((startX, startY, endX, endY)) + confidences.append(scoresData[x]) + angles.append(angle) + + # return a tuple of the bounding boxes and associated confidences + return (rects, confidences, angles) + + +def decode_east(nnet_packet, **kwargs): + scores = nnet_packet.get_tensor(0) + geometry1 = nnet_packet.get_tensor(1) + geometry2 = nnet_packet.get_tensor(2) + bboxes, confs, angles = decode_predictions(scores, geometry1, geometry2 + ) + boxes, angles = non_max_suppression(np.array(bboxes), probs=confs, angles=np.array(angles)) + boxesangles = (boxes, angles) + return boxesangles + + +def show_east(boxesangles, frame, **kwargs): + bboxes = boxesangles[0] + angles = boxesangles[1] + for ((X0, Y0, X1, Y1), angle) in zip(bboxes, angles): + width = abs(X0 - X1) + height = abs(Y0 - Y1) + cX = int(X0 + width * 0.5) + cY = int(Y0 + height * 0.5) + + rotRect = ((cX, cY), ((X1 - X0), (Y1 - Y0)), angle * (-1)) + points = rotated_Rectangle(frame, rotRect, color=(255, 0, 0), thickness=1) + cv2.polylines(frame, [points], isClosed=True, color=(255, 0, 0), thickness=1, lineType=cv2.LINE_8) + + return frame + + +def order_points(pts): + rect = np.zeros((4, 2), dtype="float32") + s = pts.sum(axis=1) + rect[0] = pts[np.argmin(s)] + rect[2] = pts[np.argmax(s)] + diff = np.diff(pts, axis=1) + rect[1] = pts[np.argmin(diff)] + rect[3] = pts[np.argmax(diff)] + return rect + + +def four_point_transform(image, pts): + rect = order_points(pts) + (tl, tr, br, bl) = rect + + widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) + widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) + maxWidth = max(int(widthA), int(widthB)) + + heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) + heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) + maxHeight = max(int(heightA), int(heightB)) + + dst = np.array([ + [0, 0], + [maxWidth - 1, 0], + [maxWidth - 1, maxHeight - 1], + [0, maxHeight - 1]], dtype="float32") + + M = cv2.getPerspectiveTransform(rect, dst) + warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) + + return warped diff --git a/gen2-ocr/sdk/main.py b/gen2-ocr/sdk/main.py new file mode 100755 index 000000000..85d781f2e --- /dev/null +++ b/gen2-ocr/sdk/main.py @@ -0,0 +1,105 @@ +import blobconverter +import cv2 +import numpy as np + +import east +from codec import codec +from depthai_sdk import OakCamera, Detections, AspectRatioResizeMode +from depthai_sdk.oak_outputs.normalize_bb import NormalizeBoundingBox + +NN_WIDTH = 256 +NN_HEIGHT = 256 + + +def decode(nn_data): + scores, geom1, geom2 = [nn_data.getLayerFp16(name) for name in nn_data.getAllLayerNames()] + + scores = np.reshape(scores, (1, 1, 64, 64)) + geom1 = np.reshape(geom1, (1, 4, 64, 64)) + geom2 = np.reshape(geom2, (1, 1, 64, 64)) + + bboxes, confs, angles = east.decode_predictions(scores, geom1, geom2) + boxes, angles = east.non_max_suppression(np.array(bboxes), probs=confs, angles=np.array(angles)) + + rotated_rectangles = [ + east.get_cv_rotated_rect(bbox, angle * -1) + for (bbox, angle) in zip(boxes, angles) + ] + + dets = Detections(nn_data=nn_data, is_rotated=True) + bboxes = [] + for rect in rotated_rectangles: + rect_w, rect_h = rect[1] + x_min = (rect[0][0] - rect_w / 2) / NN_WIDTH + y_min = (rect[0][1] - rect_h / 2) / NN_HEIGHT + x_max = (rect[0][0] + rect_w / 2) / NN_WIDTH + y_max = (rect[0][1] + rect_h / 2) / NN_HEIGHT + bbox = (x_min, y_min, x_max, y_max) + angle = rect[2] + bboxes.append(bbox) + dets.add(0, 1.0, bbox, angle=angle) + + return dets + + +def decode_recognition(nn_data): + rec_data = np.array(nn_data.getFirstLayerFp16()).reshape(30, 1, 37) + decoded_text = codec.decode(rec_data) + return decoded_text + + +def callback(packet, visualizer): + frame = packet.frame + + detections = packet.img_detections.detections + + texts = [] + for i, det in enumerate(detections): + bbox = det.xmin, det.ymin, det.xmax, det.ymax + bbox = NormalizeBoundingBox((NN_WIDTH, NN_HEIGHT), AspectRatioResizeMode.STRETCH).normalize(packet.frame, bbox) + bbox_w, bbox_h = bbox[2] - bbox[0], bbox[3] - bbox[1] + + rect = ( + [bbox[0], bbox[1]], + [bbox_w, bbox_h], + packet.img_detections.angles[i] + ) + rect = cv2.boxPoints(rect) + rect = np.int0(rect) + texts.append(cv2.resize(packet.frame[bbox[1]:bbox[3], bbox[0]:bbox[2]], (240, 64))) + cv2.polylines(frame, [rect], True, (0, 255, 0), 2) + + try: + print(packet.nnData) + except AttributeError: + pass + + if len(texts) > 0: + cv2.imshow('Texts', np.vstack(texts)) + + cv2.imshow('Detections', frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', fps=10) + version = '2021.4' + + # Download the models + det_blob_path = blobconverter.from_zoo(name='east_text_detection_256x256', + zoo_type='depthai', + shaves=7, + version=version) + rec_blob_path = blobconverter.from_zoo(name='text-recognition-0012', + shaves=7, + version=version) + + # Create NN components + det_nn = oak.create_nn(det_blob_path, color, decode_fn=decode) + det_nn.config_nn(aspect_ratio_resize_mode='stretch') + + rec_nn = oak.create_nn(rec_blob_path, input=det_nn, decode_fn=decode_recognition) + + oak.callback(rec_nn, callback=callback) + + # oak.callback(rec_nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-ocr/sdk/requirements.txt b/gen2-ocr/sdk/requirements.txt new file mode 100644 index 000000000..cf98dca98 --- /dev/null +++ b/gen2-ocr/sdk/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +depthai +depthai-sdk +blobconverter diff --git a/gen2-palm-detection/sdk/main.py b/gen2-palm-detection/sdk/main.py new file mode 100644 index 000000000..7f1b0392c --- /dev/null +++ b/gen2-palm-detection/sdk/main.py @@ -0,0 +1,34 @@ +import blobconverter +import cv2 + +from depthai_sdk import OakCamera +from palm_detection import PalmDetection + +palm_detection = PalmDetection() + + +def callback(packet, visualizer): + frame = packet.frame + nn_data = packet.img_detections + + palm_coords = palm_detection.decode(frame, nn_data) + for bbox in palm_coords: + cv2.rectangle( + img=frame, + pt1=(bbox[0], bbox[1]), + pt2=(bbox[2], bbox[3]), + color=(0, 127, 255), + thickness=4 + ) + + cv2.imshow('Palm detection', frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + + nn_path = blobconverter.from_zoo(name='palm_detection_128x128', zoo_type='depthai', shaves=6) + model_nn = oak.create_nn(nn_path, color) + + oak.callback(model_nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-palm-detection/sdk/palm_detection.py b/gen2-palm-detection/sdk/palm_detection.py new file mode 100644 index 000000000..26c2d36dc --- /dev/null +++ b/gen2-palm-detection/sdk/palm_detection.py @@ -0,0 +1,165 @@ +import numpy as np + + +class PalmDetection: + def decode(self, frame, nn_data): + """ + Each palm detection is a tensor consisting of 19 numbers: + - ymin, xmin, ymax, xmax + - x,y-coordinates for the 7 key_points + - confidence score + :return: + """ + if nn_data is None: return [] + shape = (128, 128) + num_keypoints = 7 + min_score_thresh = 0.7 + anchors = np.load("../anchors_palm.npy") + + # Run the neural network + results = self.to_tensor_result(nn_data) + + raw_box_tensor = results.get("regressors").reshape(-1, 896, 18) # regress + raw_score_tensor = results.get("classificators").reshape(-1, 896, 1) # classification + + detections = self.raw_to_detections(raw_box_tensor, raw_score_tensor, anchors, shape, num_keypoints) + + palm_coords = [ + self.frame_norm(frame, *obj[:4]) + for det in detections + for obj in det + if obj[-1] > min_score_thresh + ] + + palm_confs = [ + obj[-1] for det in detections for obj in det if obj[-1] > min_score_thresh + ] + + if len(palm_coords) == 0: return [] + + nms = self.non_max_suppression( + boxes=np.concatenate(palm_coords).reshape(-1, 4), + probs=palm_confs, + overlapThresh=0.1, + ) + + if nms is None: return [] + return nms + + def sigmoid(self, x): + return (1.0 + np.tanh(0.5 * x)) * 0.5 + + def decode_boxes(self, raw_boxes, anchors, shape, num_keypoints): + """ + Converts the predictions into actual coordinates using the anchor boxes. + Processes the entire batch at once. + """ + boxes = np.zeros_like(raw_boxes) + x_scale, y_scale = shape + + x_center = raw_boxes[..., 0] / x_scale * anchors[:, 2] + anchors[:, 0] + y_center = raw_boxes[..., 1] / y_scale * anchors[:, 3] + anchors[:, 1] + + w = raw_boxes[..., 2] / x_scale * anchors[:, 2] + h = raw_boxes[..., 3] / y_scale * anchors[:, 3] + + boxes[..., 1] = y_center - h / 2.0 # xmin + boxes[..., 0] = x_center - w / 2.0 # ymin + boxes[..., 3] = y_center + h / 2.0 # xmax + boxes[..., 2] = x_center + w / 2.0 # ymax + + for k in range(num_keypoints): + offset = 4 + k * 2 + keypoint_x = raw_boxes[..., offset] / x_scale * anchors[:, 2] + anchors[:, 0] + keypoint_y = ( + raw_boxes[..., offset + 1] / y_scale * anchors[:, 3] + anchors[:, 1] + ) + boxes[..., offset] = keypoint_x + boxes[..., offset + 1] = keypoint_y + + return boxes + + def raw_to_detections(self, raw_box_tensor, raw_score_tensor, anchors_, shape, num_keypoints): + """ + + This function converts these two "raw" tensors into proper detections. + Returns a list of (num_detections, 17) tensors, one for each image in + the batch. + + This is based on the source code from: + mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc + mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.proto + """ + detection_boxes = self.decode_boxes(raw_box_tensor, anchors_, shape, num_keypoints) + detection_scores = self.sigmoid(raw_score_tensor).squeeze(-1) + output_detections = [] + for i in range(raw_box_tensor.shape[0]): + boxes = detection_boxes[i] + scores = np.expand_dims(detection_scores[i], -1) + output_detections.append(np.concatenate((boxes, scores), -1)) + return output_detections + + def non_max_suppression(self, boxes, probs=None, angles=None, overlapThresh=0.3): + if len(boxes) == 0: + return [], [] + + if boxes.dtype.kind == "i": + boxes = boxes.astype("float") + + pick = [] + + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + area = (x2 - x1 + 1) * (y2 - y1 + 1) + idxs = y2 + + if probs is not None: + idxs = probs + + idxs = np.argsort(idxs) + + while len(idxs) > 0: + last = len(idxs) - 1 + i = idxs[last] + pick.append(i) + + xx1 = np.maximum(x1[i], x1[idxs[:last]]) + yy1 = np.maximum(y1[i], y1[idxs[:last]]) + xx2 = np.minimum(x2[i], x2[idxs[:last]]) + yy2 = np.minimum(y2[i], y2[idxs[:last]]) + + w = np.maximum(0, xx2 - xx1 + 1) + h = np.maximum(0, yy2 - yy1 + 1) + + overlap = (w * h) / area[idxs[:last]] + + idxs = np.delete( + idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0])) + ) + + if angles is not None: + return boxes[pick].astype("int"), angles[pick] + return boxes[pick].astype("int") + + def to_tensor_result(self, packet): + return { + name: np.array(packet.getLayerFp16(name)) + for name in [tensor.name for tensor in packet.getRaw().tensors] + } + + def frame_norm(self, frame, *xy_vals): + """ + nn data, being the bounding box locations, are in <0..1> range - + they need to be normalized with frame width/height + + :param frame: + :param xy_vals: the bounding box locations + :return: + """ + return ( + np.clip(np.array(xy_vals), 0, 1) + * np.array(frame.shape[:2] * (len(xy_vals) // 2))[::-1] + ).astype(int) diff --git a/gen2-palm-detection/sdk/requirements.txt b/gen2-palm-detection/sdk/requirements.txt new file mode 100644 index 000000000..bb652a32f --- /dev/null +++ b/gen2-palm-detection/sdk/requirements.txt @@ -0,0 +1,5 @@ +opencv-python +numpy +depthai +depthai-sdk +blobconverter \ No newline at end of file diff --git a/gen2-pedestrian-reidentification/MultiMsgSync.py b/gen2-pedestrian-reidentification/MultiMsgSync.py deleted file mode 100644 index 237f38c62..000000000 --- a/gen2-pedestrian-reidentification/MultiMsgSync.py +++ /dev/null @@ -1,56 +0,0 @@ -# Color frames (ImgFrame), object detection (ImgDetections) and recognition (NNData) -# messages arrive to the host all with some additional delay. -# For each ImgFrame there's one ImgDetections msg, which has multiple detections, and for each -# detection there's a NNData msg which contains recognition results. - -# How it works: -# Every ImgFrame, ImgDetections and NNData message has it's own sequence number, by which we can sync messages. - -class TwoStageHostSeqSync: - def __init__(self): - self.msgs = {} - # name: color, detection, or recognition - def add_msg(self, msg, name): - seq = str(msg.getSequenceNum()) - if seq not in self.msgs: - self.msgs[seq] = {} # Create directory for msgs - if "recognition" not in self.msgs[seq]: - self.msgs[seq]["recognition"] = [] # Create recognition array - - if name == "recognition": - # Append recognition msgs to an array - self.msgs[seq]["recognition"].append(msg) - # print(f'Added recognition seq {seq}, total len {len(self.msgs[seq]["recognition"])}') - - elif name == "detection": - # Save detection msg in the directory - self.msgs[seq][name] = msg - self.msgs[seq]["len"] = len(msg.detections) - # print(f'Added detection seq {seq}') - - elif name == "color": # color - # Save color frame in the directory - self.msgs[seq][name] = msg - # print(f'Added frame seq {seq}') - - - def get_msgs(self): - seq_remove = [] # Arr of sequence numbers to get deleted - - for seq, msgs in self.msgs.items(): - seq_remove.append(seq) # Will get removed from dict if we find synced msgs pair - - # Check if we have both detections and color frame with this sequence number - if "color" in msgs and "len" in msgs: - - # Check if all detected objects (faces) have finished recognition inference - if msgs["len"] == len(msgs["recognition"]): - # print(f"Synce`d msgs with sequence number {seq}", msgs) - - # We have synced msgs, remove previous msgs (memory cleaning) - for rm in seq_remove: - del self.msgs[rm] - - return msgs # Returned synced msgs - - return None # No synced msgs \ No newline at end of file diff --git a/gen2-pedestrian-reidentification/main.py b/gen2-pedestrian-reidentification/main.py index a2d459cfe..924200ff0 100644 --- a/gen2-pedestrian-reidentification/main.py +++ b/gen2-pedestrian-reidentification/main.py @@ -1,206 +1,35 @@ -from MultiMsgSync import TwoStageHostSeqSync -import blobconverter -import cv2 +#!/usr/bin/env python3 + +from depthai_sdk import OakCamera, AspectRatioResizeMode, TwoStagePacket, TextPosition import depthai as dai import numpy as np -from depthai_sdk import FPSHandler - -def cos_dist(a, b): - return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) - -def frame_norm(frame, bbox): - normVals = np.full(len(bbox), frame.shape[0]) - normVals[::2] = frame.shape[1] - return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) - -def create_pipeline(stereo): - pipeline = dai.Pipeline() - pipeline.setOpenVINOVersion(dai.OpenVINO.VERSION_2021_4) - - print("Creating Color Camera...") - cam = pipeline.create(dai.node.ColorCamera) - cam.setPreviewSize(1632, 960) - cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) - cam.setInterleaved(False) - cam.setBoardSocket(dai.CameraBoardSocket.RGB) - - cam_xout = pipeline.create(dai.node.XLinkOut) - cam_xout.setStreamName("color") - cam.preview.link(cam_xout.input) - - # Workaround: remove in 2.18, use `cam.setPreviewNumFramesPool(10)` - # This manip uses 15*4.7 MB => 70 MB of RAM. - copy_manip = pipeline.create(dai.node.ImageManip) - copy_manip.setNumFramesPool(15) - copy_manip.setMaxOutputFrameSize(1632 * 960 * 3) - cam.preview.link(copy_manip.inputImage) - - # ImageManip will resize the frame before sending it to the Face detection NN node - person_det_manip = pipeline.create(dai.node.ImageManip) - person_det_manip.initialConfig.setResize(544, 320) - person_det_manip.initialConfig.setFrameType(dai.RawImgFrame.Type.RGB888p) - copy_manip.out.link(person_det_manip.inputImage) - - person_nn = pipeline.create(dai.node.MobileNetDetectionNetwork) - person_nn.setConfidenceThreshold(0.5) - person_nn.setBlobPath(blobconverter.from_zoo(name="person-detection-retail-0013", shaves=6)) - person_det_manip.out.link(person_nn.input) - - # Send face detections to the host (for bounding boxes) - face_det_xout = pipeline.create(dai.node.XLinkOut) - face_det_xout.setStreamName("detection") - person_nn.out.link(face_det_xout.input) - - # Script node will take the output from the face detection NN as an input and set ImageManipConfig - # to the 'recognition_manip' to crop the initial frame - image_manip_script = pipeline.create(dai.node.Script) - copy_manip.out.link(image_manip_script.inputs['preview']) - person_nn.out.link(image_manip_script.inputs['dets_in']) - # Only send metadata, we are only interested in timestamp, so we can sync - # depth frames with NN output - person_nn.passthrough.link(image_manip_script.inputs['passthrough']) - - image_manip_script.setScript(""" - import time - msgs = dict() - - def add_msg(msg, name, seq = None): - global msgs - if seq is None: - seq = msg.getSequenceNum() - seq = str(seq) - # node.warn(f"New msg {name}, seq {seq}") - - # Each seq number has it's own dict of msgs - if seq not in msgs: - msgs[seq] = dict() - msgs[seq][name] = msg - - # To avoid freezing (not necessary for this ObjDet model) - if 15 < len(msgs): - node.warn(f"Removing first element! len {len(msgs)}") - msgs.popitem() # Remove first element - - def get_msgs(): - global msgs - seq_remove = [] # Arr of sequence numbers to get deleted - for seq, syncMsgs in msgs.items(): - seq_remove.append(seq) # Will get removed from dict if we find synced msgs pair - # node.warn(f"Checking sync {seq}") - - # Check if we have both detections and color frame with this sequence number - if len(syncMsgs) == 2: # 1 frame, 1 detection - for rm in seq_remove: - del msgs[rm] - # node.warn(f"synced {seq}. Removed older sync values. len {len(msgs)}") - return syncMsgs # Returned synced msgs - return None - - def correct_bb(bb): - if bb.xmin < 0: bb.xmin = 0.001 - if bb.ymin < 0: bb.ymin = 0.001 - if bb.xmax > 1: bb.xmax = 0.999 - if bb.ymax > 1: bb.ymax = 0.999 - return bb - - while True: - time.sleep(0.001) # Avoid lazy looping - - preview = node.io['preview'].tryGet() - if preview is not None: - add_msg(preview, 'preview') - - dets = node.io['dets_in'].tryGet() - if dets is not None: - # TODO: in 2.18.0.0 use dets.getSequenceNum() - passthrough = node.io['passthrough'].get() - seq = passthrough.getSequenceNum() - add_msg(dets, 'dets', seq) - - sync_msgs = get_msgs() - if sync_msgs is not None: - img = sync_msgs['preview'] - dets = sync_msgs['dets'] - for i, det in enumerate(dets.detections): - cfg = ImageManipConfig() - correct_bb(det) - cfg.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax) - # node.warn(f"Sending {i + 1}. age/gender det. Seq {seq}. Det {det.xmin}, {det.ymin}, {det.xmax}, {det.ymax}") - cfg.setResize(128, 256) - cfg.setKeepAspectRatio(False) - node.io['manip_cfg'].send(cfg) - node.io['manip_img'].send(img) - """) - recognition_manip = pipeline.create(dai.node.ImageManip) - recognition_manip.initialConfig.setResize(128, 256) - recognition_manip.setWaitForConfigInput(True) - image_manip_script.outputs['manip_cfg'].link(recognition_manip.inputConfig) - image_manip_script.outputs['manip_img'].link(recognition_manip.inputImage) - - # Age/Gender second stange NN - print("Creating Recognition Neural Network...") - recognition_nn = pipeline.create(dai.node.NeuralNetwork) - - recognition_nn.setBlobPath(blobconverter.from_zoo(name="person-reidentification-retail-0288", shaves=6)) - recognition_manip.out.link(recognition_nn.input) - - recognition_nn_xout = pipeline.create(dai.node.XLinkOut) - recognition_nn_xout.setStreamName("recognition") - recognition_nn.out.link(recognition_nn_xout.input) - - return pipeline - -with dai.Device() as device: - stereo = 1 < len(device.getConnectedCameras()) - stereo=False - device.startPipeline(create_pipeline(stereo)) - - sync = TwoStageHostSeqSync() - fps = FPSHandler() - queues = {} - results = [] - # Create output queues - for name in ["color", "detection", "recognition"]: - queues[name] = device.getOutputQueue(name) - - while True: - for name, q in queues.items(): - # Add all msgs (color frames, object detections and age/gender recognitions) to the Sync class. - if q.has(): - sync.add_msg(q.get(), name) - - msgs = sync.get_msgs() - if msgs is not None: - fps.nextIter() - frame = msgs["color"].getCvFrame() - detections = msgs["detection"].detections - recognitions = msgs["recognition"] - - txt = "FPS: {:.1f}".format(fps.fps()) - cv2.putText(frame, txt, (10, 30), cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 0, 0), 8) - cv2.putText(frame, txt, (10, 30), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 255), 2) - - for i, detection in enumerate(detections): - bbox = frame_norm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) - - reid_result = recognitions[i].getFirstLayerFp16() - # print('result', reid_result) - - for i, vector in enumerate(results): - # print(f"Checking vector {i}") - dist = cos_dist(reid_result, vector) - if dist > 0.7: - results[i] = np.array(reid_result) - break - else: - # print("adding new vector") - results.append(np.array(reid_result)) - - cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (10, 245, 10), 2) - y = (bbox[1] + bbox[3]) // 2 - cv2.putText(frame, f"Person {i}", (bbox[0], y), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (0, 0, 0), 8) - cv2.putText(frame, f"Person {i}", (bbox[0], y), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (255, 255, 255), 2) +import cv2 - cv2.imshow("Camera", frame) - if cv2.waitKey(1) == ord('q'): - break \ No newline at end of file +class PedestrianReId: + def __init__(self) -> None: + self.results = [] + + def cosine_dist(self, a, b): + return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) + + def new_result(self, vector_result) -> int: + vector_result = np.array(vector_result) + for i, vector in enumerate(self.results): + dist = self.cosine_dist(vector, vector_result) + if dist > 0.7: + self.results[i] = vector_result + return i + else: + self.results.append(vector_result) + return len(self.results) - 1 + + +with OakCamera(replay='people-construction-vest-01') as oak: + color = oak.create_camera('color') + person_det = oak.create_nn('person-detection-retail-0013', color) + # Passthrough is enabled for debugging purposes + # AspectRatioResizeMode has to be CROP for 2-stage pipelines at the moment + person_det.config_nn(aspect_ratio_resize_mode=AspectRatioResizeMode.CROP) + + oak.visualize([person_det, person_det.out.passthrough]) + oak.start(blocking=True) diff --git a/gen2-pedestrian-reidentification/requirements.txt b/gen2-pedestrian-reidentification/requirements.txt index b7d7d7cf2..ce7f5340f 100644 --- a/gen2-pedestrian-reidentification/requirements.txt +++ b/gen2-pedestrian-reidentification/requirements.txt @@ -1,5 +1 @@ -opencv-python -depthai==2.17.0.0 -depthai-sdk -numpy -blobconverter==1.3.0 \ No newline at end of file +depthai-sdk==1.9.1 diff --git a/gen2-people-counter/images/img1.jpeg b/gen2-people-counter/images/img1.jpeg deleted file mode 100644 index 2d410c9c4..000000000 Binary files a/gen2-people-counter/images/img1.jpeg and /dev/null differ diff --git a/gen2-people-counter/images/img2.jpeg b/gen2-people-counter/images/img2.jpeg deleted file mode 100644 index b9cdf0767..000000000 Binary files a/gen2-people-counter/images/img2.jpeg and /dev/null differ diff --git a/gen2-people-counter/images/img3.jpeg b/gen2-people-counter/images/img3.jpeg deleted file mode 100644 index 2e4b2a5cf..000000000 Binary files a/gen2-people-counter/images/img3.jpeg and /dev/null differ diff --git a/gen2-people-counter/images/img4.jpeg b/gen2-people-counter/images/img4.jpeg deleted file mode 100644 index 7ddc8a0e0..000000000 Binary files a/gen2-people-counter/images/img4.jpeg and /dev/null differ diff --git a/gen2-people-counter/main.py b/gen2-people-counter/main.py index dc04d6d51..72201324b 100644 --- a/gen2-people-counter/main.py +++ b/gen2-people-counter/main.py @@ -1,73 +1,21 @@ #!/usr/bin/env python3 -from pathlib import Path -import blobconverter +from depthai_sdk import OakCamera, DetectionPacket, Visualizer, TextPosition import cv2 -import depthai as dai -import numpy as np -import argparse -from time import monotonic -import itertools -from depthai_sdk import PipelineManager, NNetManager, PreviewManager -from depthai_sdk import cropToAspectRatio +with OakCamera(replay='people-images-01') as oak: + color = oak.create_camera('color') + nn = oak.create_nn('person-detection-retail-0013', color) + oak.replay.setFps(3) -parentDir = Path(__file__).parent + def cb(packet: DetectionPacket, visualizer: Visualizer): + num = len(packet.img_detections.detections) + print('New msgs! Number of people detected:', num) -#===================================================================================== -# To use a different NN, change `size` and `nnPath` here: -size = (544, 320) -nnPath = blobconverter.from_zoo("person-detection-retail-0013", shaves=8) -#===================================================================================== + visualizer.add_text(f"Number of people: {num}", position=TextPosition.TOP_MID) + visualizer.draw(packet.frame) + cv2.imshow(f'frame {packet.name}', packet.frame) -# Labels -labelMap = ["background", "person"] - -# Get argument first -parser = argparse.ArgumentParser() -parser.add_argument('-nn', '--nn', type=str, help=".blob path") -parser.add_argument('-i', '--image', type=str, - help="Path to an image file to be used for inference (conflicts with -cam)") -parser.add_argument('-cam', '--camera', action="store_true", - help="Use DepthAI RGB camera for inference (conflicts with -vid)") -args = parser.parse_args() - -# Whether we want to use images from host or rgb camera -IMAGE = not args.camera -nnSource = "host" if IMAGE else "color" - -# Start defining a pipeline -pm = PipelineManager() -if not IMAGE: - pm.createColorCam(previewSize=size, xout=True) - pv = PreviewManager(display=["color"], nnSource=nnSource) - -nm = NNetManager(inputSize=size, nnFamily="mobilenet", labels=labelMap, confidence=0.5) -nn = nm.createNN(pm.pipeline, pm.nodes, blobPath=nnPath, source=nnSource) -pm.setNnManager(nm) -pm.addNn(nn) - -# Pipeline defined, now the device is connected to -with dai.Device(pm.pipeline) as device: - nm.createQueues(device) - if IMAGE: - imgPaths = [args.image] if args.image else list(parentDir.glob('images/*.jpeg')) - og_frames = itertools.cycle([cropToAspectRatio(cv2.imread(str(imgPath)), size) for imgPath in imgPaths]) - else: - pv.createQueues(device) - - while True: - if IMAGE: - frame = next(og_frames).copy() - nm.sendInputFrame(frame) - else: - pv.prepareFrames(blocking=True) - frame = pv.get("color") - - nn_data = nm.decode(nm.outputQueue.get()) - nm.draw(frame, nn_data) - cv2.putText(frame, f"People count: {len(nn_data)}", (5, 30), cv2.FONT_HERSHEY_TRIPLEX, 1, (0,0,255)) - cv2.imshow("color", frame) - - if cv2.waitKey(3000 if IMAGE else 1) == ord('q'): - break + oak.visualize(nn, callback=cb) + # oak.show_graph() + oak.start(blocking=True) diff --git a/gen2-people-counter/requirements.txt b/gen2-people-counter/requirements.txt index f3a88c814..73fd581c6 100644 --- a/gen2-people-counter/requirements.txt +++ b/gen2-people-counter/requirements.txt @@ -1,4 +1,2 @@ -opencv-python==4.5.1.48 -depthai==2.16.0.0 -blobconverter==1.2.9 -depthai_sdk==1.1.6 +depthai-sdk==1.9.1 + diff --git a/gen2-people-tracker/README.md b/gen2-people-tracker/README.md index 62a32cdc2..18225e068 100644 --- a/gen2-people-tracker/README.md +++ b/gen2-people-tracker/README.md @@ -1,18 +1,13 @@ # Gen2 People tracker This application counts how many people went up / down / left / right in the video stream, allowing you to -receive an information about how many people went into a room or went through a corridor. - -Demo uses [Script](https://docs.luxonis.com/projects/api/en/latest/components/nodes/script/) node to "decode" movement from trackelts information. In script node, when a new tracklet is added, it's coordiantes are saved. When it's removed/lost, it compares starting coordinates -with end coordinates and if movement was greater than `THRESH_DIST_DELTA`, it means a person movement was valid and added to counter. - -This demo can also send counter results through the SPI. +receive an information about eg. how many people went into a room or went through a corridor. Demo uses [person_detection_retail_0013](https://docs.openvinotoolkit.org/latest/omz_models_intel_person_detection_retail_0013_description_person_detection_retail_0013.html) from the Open Model Zoo for person detection. ## Demo -![demo](https://user-images.githubusercontent.com/18037362/145656510-94e12444-7524-47f9-a036-7ed8ee78fd7a.gif) +![demo](https://user-images.githubusercontent.com/18037362/199674225-ebb1b811-6e45-4535-abe3-f18d1644634d.gif) ## Install project requirements diff --git a/gen2-people-tracker/demo/example_01.mp4 b/gen2-people-tracker/demo/example_01.mp4 deleted file mode 100644 index 74ae877d2..000000000 Binary files a/gen2-people-tracker/demo/example_01.mp4 and /dev/null differ diff --git a/gen2-people-tracker/demo/example_02.mp4 b/gen2-people-tracker/demo/example_02.mp4 deleted file mode 100644 index 250d1ee11..000000000 Binary files a/gen2-people-tracker/demo/example_02.mp4 and /dev/null differ diff --git a/gen2-people-tracker/main.py b/gen2-people-tracker/main.py index 4850b08a5..e3e8da20d 100644 --- a/gen2-people-tracker/main.py +++ b/gen2-people-tracker/main.py @@ -1,166 +1,25 @@ #!/usr/bin/env python3 -from pathlib import Path -import json - -import blobconverter -import cv2 +from depthai_sdk import OakCamera, TrackerPacket, Visualizer, TextPosition import depthai as dai -import numpy as np -import argparse -from time import monotonic - -# Labels -labelMap = ["background", "person" ] - -# Get argument first -parser = argparse.ArgumentParser() -parser.add_argument('-nn', '--nn', type=str, help=".blob path") -parser.add_argument('-vid', '--video', type=str, help="Path to video file to be used for inference (conflicts with -cam)") -parser.add_argument('-spi', '--spi', action='store_true', default=False, help="Send tracklets to the MCU via SPI") -parser.add_argument('-cam', '--camera', action="store_true", help="Use DepthAI RGB camera for inference (conflicts with -vid)") -parser.add_argument('-t', '--threshold', default=0.25, type=float, - help="Minimum distance the person has to move (across the x/y axis) to be considered a real movement") -args = parser.parse_args() - -parentDir = Path(__file__).parent - -videoPath = args.video or parentDir / Path('demo/example_01.mp4') -nnPath = args.nn or blobconverter.from_zoo(name="person-detection-retail-0013", shaves=7) - -# Whether we want to use video from host or rgb camera -VIDEO = not args.camera - -class TextHelper: - def __init__(self) -> None: - self.bg_color = (0, 0, 0) - self.color = (255, 255, 255) - self.text_type = cv2.FONT_HERSHEY_SIMPLEX - self.line_type = cv2.LINE_AA - def putText(self, frame, text, coords): - cv2.putText(frame, text, coords, self.text_type, 1, self.bg_color, 6, self.line_type) - cv2.putText(frame, text, coords, self.text_type, 1, self.color, 2, self.line_type) - -# Start defining a pipeline -pipeline = dai.Pipeline() - -# Create and configure the detection network -detectionNetwork = pipeline.create(dai.node.MobileNetDetectionNetwork) -detectionNetwork.setBlobPath(str(Path(nnPath).resolve().absolute())) -detectionNetwork.setConfidenceThreshold(0.5) - -if VIDEO: - # Configure XLinkIn - we will send video through it - videoIn = pipeline.create(dai.node.XLinkIn) - videoIn.setStreamName("video_in") - - # NOTE: video must be of size 544x320. We will resize this on the - # host, but you could also use ImageManip node to do it on device - - # Link video in with the detection network - videoIn.out.link(detectionNetwork.input) - -else: - # Create and configure the color camera - colorCam = pipeline.create(dai.node.ColorCamera) - colorCam.setPreviewSize(544, 320) - colorCam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) - colorCam.setInterleaved(False) - colorCam.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) - # Connect RGB preview to the detection network - colorCam.preview.link(detectionNetwork.input) - -# Create and configure the object tracker -objectTracker = pipeline.create(dai.node.ObjectTracker) -objectTracker.setDetectionLabelsToTrack([1]) # Track people -# possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF -objectTracker.setTrackerType(dai.TrackerType.ZERO_TERM_COLOR_HISTOGRAM) -# Take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID -objectTracker.setTrackerIdAssignmentPolicy(dai.TrackerIdAssignmentPolicy.SMALLEST_ID) - -# Link detection networks outputs to the object tracker -detectionNetwork.passthrough.link(objectTracker.inputTrackerFrame) -detectionNetwork.passthrough.link(objectTracker.inputDetectionFrame) -detectionNetwork.out.link(objectTracker.inputDetections) - - -script = pipeline.create(dai.node.Script) -objectTracker.out.link(script.inputs['tracklets']) - -with open("script.py", "r") as f: - s = f.read() - s = s.replace("THRESH_DIST_DELTA", str(args.threshold)) - script.setScript(s) - -# Send tracklets to the host -trackerOut = pipeline.create(dai.node.XLinkOut) -trackerOut.setStreamName("out") -script.outputs['out'].link(trackerOut.input) - -# Send send RGB preview frames to the host -xlinkOut = pipeline.create(dai.node.XLinkOut) -xlinkOut.setStreamName("preview") -objectTracker.passthroughTrackerFrame.link(xlinkOut.input) - -if args.spi: - # Send tracklets via SPI to the MCU - spiOut = pipeline.create(dai.node.SPIOut) - spiOut.setStreamName("tracklets") - spiOut.setBusId(0) - objectTracker.out.link(spiOut.input) - - -# Pipeline defined, now the device is connected to -with dai.Device(pipeline) as device: - previewQ = device.getOutputQueue("preview") - outQ = device.getOutputQueue("out") - - counters = None - frame = None - text = TextHelper() - - def update(): - global counters, frame, text - if previewQ.has(): - frame = previewQ.get().getCvFrame() - - if outQ.has(): - jsonText = str(outQ.get().getData(), 'utf-8') - counters = json.loads(jsonText) - print(counters) +from people_tracker import PeopleTracker +import cv2 - if counters is not None: - text.putText(frame, f"Up: {counters['up']}, Down: {counters['down']}", (3, 30)) - if frame is not None: - cv2.imshow("frame", frame) +pt = PeopleTracker() - def to_planar(arr: np.ndarray, shape: tuple) -> np.ndarray: - return cv2.resize(arr, shape).transpose(2, 0, 1).flatten() +with OakCamera(replay='people-tracking-above-03') as oak: + color_cam = oak.create_camera('color') + tracker = oak.create_nn('person-detection-retail-0013', color_cam, tracker=True) + tracker.config_nn(conf_threshold=0.6) + tracker.config_tracker(track_labels=[1], tracker_type=dai.TrackerType.ZERO_TERM_COLOR_HISTOGRAM) - if VIDEO: - videoQ = device.getInputQueue("video_in") + def cb(packet: TrackerPacket, vis: Visualizer): + left, right, up, down = pt.calculate_tracklet_movement(packet.daiTracklets) - cap = cv2.VideoCapture(str(Path(videoPath).resolve().absolute())) - while cap.isOpened(): - read_correctly, video_frame = cap.read() - if not read_correctly: - break + vis.add_text(f"Up: {up}, Down: {down}", position=TextPosition.TOP_LEFT) + vis.draw(packet.frame) - img = dai.ImgFrame() - # Also reshapes the video frame to 544x320 - img.setData(to_planar(video_frame, (544, 320))) - img.setType(dai.RawImgFrame.Type.BGR888p) - img.setTimestamp(monotonic()) - img.setWidth(544) - img.setHeight(320) - videoQ.send(img) + cv2.imshow('People Tracker', packet.frame) - update() - if cv2.waitKey(1) == ord('q'): - break - print("End of the video") - else: - while True: - update() - if cv2.waitKey(1) == ord('q'): - break + oak.visualize(tracker.out.tracker, callback=cb, record_path='demo.mp4') + oak.start(blocking=True) diff --git a/gen2-people-tracker/requirements.txt b/gen2-people-tracker/requirements.txt index 840114e62..ce7f5340f 100644 --- a/gen2-people-tracker/requirements.txt +++ b/gen2-people-tracker/requirements.txt @@ -1,3 +1 @@ -opencv-python==4.5.1.48 -depthai==2.16.0.0 -blobconverter==1.2.9 +depthai-sdk==1.9.1 diff --git a/gen2-people-tracker/script.py b/gen2-people-tracker/script.py deleted file mode 100644 index 70770a5e3..000000000 --- a/gen2-people-tracker/script.py +++ /dev/null @@ -1,57 +0,0 @@ -import json -data = {} -counter = { 'up': 0, 'down': 0, 'left': 0, 'right': 0 } # Y axis (up/down), X axis (left/right) - -def send(): - b = Buffer(50) - b.setData(json.dumps(counter).encode('utf-8')) - node.io['out'].send(b) - -def tracklet_removed(tracklet, coords2): - coords1 = tracklet['coords'] - deltaX = coords2[0] - coords1[0] - deltaY = coords2[1] - coords1[1] - - if abs(deltaX) > abs(deltaY) and abs(deltaX) > THRESH_DIST_DELTA: - direction = "left" if 0 > deltaX else "right" - counter[direction] += 1 - send() - node.warn(f"Person moved {direction}") - # node.warn("DeltaX: " + str(abs(deltaX))) - elif abs(deltaY) > abs(deltaX) and abs(deltaY) > THRESH_DIST_DELTA: - direction = "up" if 0 > deltaY else "down" - counter[direction] += 1 - send() - node.warn(f"Person moved {direction}") - #node.warn("DeltaY: " + str(abs(deltaY))) - # else: node.warn("Invalid movement") - -def get_centroid(roi): - x1 = roi.topLeft().x - y1 = roi.topLeft().y - x2 = roi.bottomRight().x - y2 = roi.bottomRight().y - return ((x2-x1)/2+x1, (y2-y1)/2+y1) - -# Send dictionary initially (all counters 0) -send() - -while True: - tracklets = node.io['tracklets'].get() - for t in tracklets.tracklets: - # If new tracklet, save its centroid - if t.status == Tracklet.TrackingStatus.NEW: - data[str(t.id)] = {} # Reset - data[str(t.id)]['coords'] = get_centroid(t.roi) - elif t.status == Tracklet.TrackingStatus.TRACKED: - data[str(t.id)]['lostCnt'] = 0 - elif t.status == Tracklet.TrackingStatus.LOST: - data[str(t.id)]['lostCnt'] += 1 - # If tracklet has been "LOST" for more than 10 frames, remove it - if 10 < data[str(t.id)]['lostCnt'] and "lost" not in data[str(t.id)]: - #node.warn(f"Tracklet {t.id} lost: {data[str(t.id)]['lostCnt']}") - tracklet_removed(data[str(t.id)], get_centroid(t.roi)) - data[str(t.id)]["lost"] = True - elif (t.status == Tracklet.TrackingStatus.REMOVED) and "lost" not in data[str(t.id)]: - tracklet_removed(data[str(t.id)], get_centroid(t.roi)) - #node.warn(f"Tracklet {t.id} removed") \ No newline at end of file diff --git a/gen2-people-tracker/sdk/main.py b/gen2-people-tracker/sdk/main.py new file mode 100644 index 000000000..7643ba601 --- /dev/null +++ b/gen2-people-tracker/sdk/main.py @@ -0,0 +1,70 @@ +import cv2 +from depthai import TrackerType, TrackerIdAssignmentPolicy, Tracklet + +from depthai_sdk import OakCamera, BboxStyle, TextPosition + +THRESHOLD = 0.25 + +tracked_objects = {} +counter = {'up': 0, 'down': 0, 'left': 0, 'right': 0} + + +def get_centroid(roi): + x1 = roi.topLeft().x + y1 = roi.topLeft().y + x2 = roi.bottomRight().x + y2 = roi.bottomRight().y + return ((x2 - x1) / 2 + x1, (y2 - y1) / 2 + y1) + + +def tracklet_removed(tracklet, coords2): + coords1 = tracklet['coords'] + deltaX = coords2[0] - coords1[0] + deltaY = coords2[1] - coords1[1] + + if abs(deltaX) > abs(deltaY) and abs(deltaX) > THRESHOLD: + direction = 'left' if 0 > deltaX else 'right' + counter[direction] += 1 + print(f'Person moved {direction}') + elif abs(deltaY) > abs(deltaX) and abs(deltaY) > THRESHOLD: + direction = 'up' if 0 > deltaY else 'down' + counter[direction] += 1 + print(f'Person moved {direction}') + + +def callback(packet, visualizer): + for t in packet.daiTracklets.tracklets: + # If new tracklet, save its centroid + if t.status == Tracklet.TrackingStatus.NEW: + tracked_objects[str(t.id)] = {} # Reset + tracked_objects[str(t.id)]['coords'] = get_centroid(t.roi) + + elif (t.status == Tracklet.TrackingStatus.REMOVED) and 'lost' not in tracked_objects[str(t.id)]: + tracklet_removed(tracked_objects[str(t.id)], get_centroid(t.roi)) + + counter_str = f'Up: {counter["up"]}\nDown: {counter["down"]}\n' \ + f'Left: {counter["left"]}\nRight: {counter["right"]}' + visualizer.add_text(counter_str, position=TextPosition.BOTTOM_LEFT) + + frame = visualizer.draw(packet.frame) + cv2.imshow('People tracking', frame) + + +with OakCamera() as oak: + color = oak.create_camera('color') + nn = oak.create_nn('person-detection-retail-0013', color, nn_type='mobilenet', tracker=True) + + nn.config_nn(aspect_ratio_resize_mode='stretch') + nn.config_tracker(tracker_type=TrackerType.ZERO_TERM_COLOR_HISTOGRAM, + track_labels=[1], + assignment_policy=TrackerIdAssignmentPolicy.SMALLEST_ID) + + visualizer = oak.visualize(nn.out.tracker, callback=callback, fps=True) + visualizer.detections( + hide_label=True, + bbox_style=BboxStyle.ROUNDED_RECTANGLE + ).tracking( + deletion_lost_threshold=5 + ) + + oak.start(blocking=True) diff --git a/gen2-people-tracker/sdk/requirements.txt b/gen2-people-tracker/sdk/requirements.txt new file mode 100644 index 000000000..cf98dca98 --- /dev/null +++ b/gen2-people-tracker/sdk/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +depthai +depthai-sdk +blobconverter diff --git a/gen2-play-encoded-stream/sdk/main.py b/gen2-play-encoded-stream/sdk/main.py new file mode 100644 index 000000000..603aaf8c4 --- /dev/null +++ b/gen2-play-encoded-stream/sdk/main.py @@ -0,0 +1,32 @@ +import subprocess as sp +from os import name as os_name + +from depthai_sdk import OakCamera + +width, height = 720, 500 +command = [ + "ffplay", + "-i", "-", + "-x", str(width), + "-y", str(height), + "-framerate", "60", + "-fflags", "nobuffer", + "-flags", "low_delay", + "-framedrop", + "-strict", "experimental" +] + +if os_name == "nt": # Running on Windows + command = ["cmd", "/c"] + command + +try: + proc = sp.Popen(command, stdin=sp.PIPE) # Start the ffplay process +except: + exit("Error: cannot run ffplay!\nTry running: sudo apt install ffmpeg") + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p', encode='h264', fps=60) + oak.callback(color.out.encoded, lambda packet: proc.stdin.write(packet.frame)) + oak.start(blocking=True) + +proc.stdin.close() diff --git a/gen2-play-encoded-stream/sdk/mjpeg.py b/gen2-play-encoded-stream/sdk/mjpeg.py new file mode 100644 index 000000000..00d39c462 --- /dev/null +++ b/gen2-play-encoded-stream/sdk/mjpeg.py @@ -0,0 +1,6 @@ +from depthai_sdk import OakCamera + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p', encode='mjpeg', fps=60) + oak.visualize(color.out.encoded) + oak.start(blocking=True) diff --git a/gen2-play-encoded-stream/sdk/pyav.py b/gen2-play-encoded-stream/sdk/pyav.py new file mode 100644 index 000000000..7e4fc6390 --- /dev/null +++ b/gen2-play-encoded-stream/sdk/pyav.py @@ -0,0 +1,7 @@ +from depthai_sdk import OakCamera + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p', encode='h264', fps=60) + oak.visualize(color.out.encoded) + oak.start(blocking=True) diff --git a/gen2-play-encoded-stream/sdk/requirements.txt b/gen2-play-encoded-stream/sdk/requirements.txt new file mode 100644 index 000000000..830fdb13d --- /dev/null +++ b/gen2-play-encoded-stream/sdk/requirements.txt @@ -0,0 +1,3 @@ +depthai +depthai-sdk +pyav \ No newline at end of file diff --git a/gen2-pointcloud/rgbd-pointcloud/main.py b/gen2-pointcloud/rgbd-pointcloud/main.py index 79ceb4819..e09faabea 100644 --- a/gen2-pointcloud/rgbd-pointcloud/main.py +++ b/gen2-pointcloud/rgbd-pointcloud/main.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -from sys import maxsize import cv2 import depthai as dai @@ -45,7 +44,7 @@ config.postProcessing.spatialFilter.holeFillingRadius = 2 config.postProcessing.spatialFilter.numIterations = 1 config.postProcessing.thresholdFilter.minRange = 400 -config.postProcessing.thresholdFilter.maxRange = 2000 +config.postProcessing.thresholdFilter.maxRange = 15000 config.postProcessing.decimationFilter.decimationFactor = 1 stereo.initialConfig.set(config) @@ -102,8 +101,8 @@ def add_msg(self, name, msg): device.setIrLaserDotProjectorBrightness(1200) qs = [] - qs.append(device.getOutputQueue("depth", maxSize=1, blocking=False)) - qs.append(device.getOutputQueue("colorize", maxSize=1, blocking=False)) + qs.append(device.getOutputQueue("depth", 1)) + qs.append(device.getOutputQueue("colorize", 1)) try: from projector_3d import PointCloudVisualizer diff --git a/gen2-pointcloud/rgbd-pointcloud/projector_3d.py b/gen2-pointcloud/rgbd-pointcloud/projector_3d.py index 7e6ba7c45..7415c6d89 100644 --- a/gen2-pointcloud/rgbd-pointcloud/projector_3d.py +++ b/gen2-pointcloud/rgbd-pointcloud/projector_3d.py @@ -10,7 +10,7 @@ def __init__(self, intrinsic_matrix, width, height): self.R_camera_to_world = np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]]).astype(np.float64) self.depth_map = None self.rgb = None - self.pcl = o3d.geometry.PointCloud() + self.pcl = None self.pinhole_camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(width, height, @@ -19,39 +19,41 @@ def __init__(self, intrinsic_matrix, width, height): intrinsic_matrix[0][2], intrinsic_matrix[1][2]) self.vis = o3d.visualization.Visualizer() - self.vis.create_window(window_name="Point Cloud") - self.vis.add_geometry(self.pcl) - origin = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.3, origin=[0, 0, 0]) - self.vis.add_geometry(origin) - view_control = self.vis.get_view_control() - view_control.set_constant_z_far(1000) + self.vis.create_window() self.isstarted = False - def rgbd_to_projection(self, depth_map, rgb, downsample = True, remove_noise = False): + def rgbd_to_projection(self, depth_map, rgb, downsample = True): rgb_o3d = o3d.geometry.Image(rgb) depth_o3d = o3d.geometry.Image(depth_map) - rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( - rgb_o3d, depth_o3d, convert_rgb_to_intensity=(len(rgb.shape) != 3) - ) - - pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, self.pinhole_camera_intrinsic) - - if downsample: - pcd = pcd.voxel_down_sample(voxel_size=0.01) - - if remove_noise: + if len(rgb.shape) == 3: + rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(rgb_o3d, depth_o3d, convert_rgb_to_intensity=False) + else: + rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(rgb_o3d, depth_o3d) + + if self.pcl is None: + self.pcl = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, self.pinhole_camera_intrinsic) + else: + pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, self.pinhole_camera_intrinsic) + if downsample: + pcd = pcd.voxel_down_sample(voxel_size=0.01) + # Remove noise pcd = pcd.remove_statistical_outlier(30, 0.1)[0] - - self.pcl.points = pcd.points - self.pcl.colors = pcd.colors + self.pcl.points = pcd.points + self.pcl.colors = pcd.colors self.pcl.rotate(self.R_camera_to_world, center=np.array([0,0,0],dtype=np.float64)) return self.pcl def visualize_pcd(self): - self.vis.update_geometry(self.pcl) - self.vis.poll_events() - self.vis.update_renderer() + if not self.isstarted: + self.vis.add_geometry(self.pcl) + origin = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.3, origin=[0, 0, 0]) + self.vis.add_geometry(origin) + self.isstarted = True + else: + self.vis.update_geometry(self.pcl) + self.vis.poll_events() + self.vis.update_renderer() def close_window(self): self.vis.destroy_window() diff --git a/gen2-qr-code-scanner/sdk/main.py b/gen2-qr-code-scanner/sdk/main.py new file mode 100644 index 000000000..9186cfb40 --- /dev/null +++ b/gen2-qr-code-scanner/sdk/main.py @@ -0,0 +1,45 @@ +import blobconverter +import cv2 +import numpy as np + +from depthai_sdk import OakCamera, AspectRatioResizeMode, TextPosition +from depthai_sdk.oak_outputs.normalize_bb import NormalizeBoundingBox + +# TODO - change to better detector +detector = cv2.QRCodeDetector() + + +def expand_detection(det, percent=2): + percent /= 100 + det.xmin = np.clip(det.xmin - percent, 0, 1) + det.ymin = np.clip(det.ymin - percent, 0, 1) + det.xmax = np.clip(det.xmax + percent, 0, 1) + det.ymax = np.clip(det.ymax + percent, 0, 1) + + +def callback(packet, visualizer): + for i, detection in enumerate(packet.img_detections.detections): + expand_detection(detection) + bbox = detection.xmin, detection.ymin, detection.xmax, detection.ymax + bbox = NormalizeBoundingBox((384, 384), AspectRatioResizeMode.LETTERBOX).normalize(np.zeros(visualizer.frame_shape), bbox) + cropped_qr = packet.frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] + + data, _, _ = detector.detectAndDecode(cropped_qr) + + print(data) + + frame = visualizer.draw(packet.frame) + cv2.imshow('QR scanner', frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', '1080p', encode='h264') + + nn_path = blobconverter.from_zoo(name="qr_code_detection_384x384", zoo_type="depthai", shaves=6) + nn = oak.create_nn(nn_path, color, nn_type='mobilenet') + + # visualizer = oak.visualize(nn, callback=callback, fps=True) + # visualizer.text(auto_scale=True).detections(hide_label=True) + oak.callback(nn, callback=callback) + + oak.start(blocking=True) diff --git a/gen2-qr-code-scanner/sdk/requirements.txt b/gen2-qr-code-scanner/sdk/requirements.txt new file mode 100644 index 000000000..2cf4cbe9a --- /dev/null +++ b/gen2-qr-code-scanner/sdk/requirements.txt @@ -0,0 +1,5 @@ +opencv-python +depthai +depthai-sdk +blobconverter +pyzbar \ No newline at end of file diff --git a/gen2-social-distancing/sdk/main.py b/gen2-social-distancing/sdk/main.py new file mode 100644 index 000000000..5d86f6d48 --- /dev/null +++ b/gen2-social-distancing/sdk/main.py @@ -0,0 +1,69 @@ +import cv2 +import numpy as np + +from depthai_sdk import OakCamera, DetectionPacket, Visualizer + +MAX_DISTANCE = 1000 + + +def calculate_mutual_distance(points): + """ + Calculate the distance between all pairs of points. + The result is a symmetric matrix of shape (n, n) where n is the number of points. + """ + coords = np.array([[p.x, p.y, p.z] for p in points]) + return np.sqrt(np.power(coords[:, :, None] - coords[:, :, None].T, 2).sum(axis=1)) + + +def callback(packet: DetectionPacket, visualizer: Visualizer): + bboxes = [] + spatial_coords = [] + + for detection, spatial_detection in zip(packet.detections, packet.img_detections.detections): + bbox = (*detection.top_left, *detection.bottom_right) + bboxes.append(bbox) + spatial_coords.append(spatial_detection.spatialCoordinates) + + # If there are at least two people detected, calculate the distance between them + if len(spatial_coords) > 1: + mutual_distance = calculate_mutual_distance(spatial_coords) + lower_tril = np.tril(mutual_distance, -1) + below_threshold = (lower_tril > 0) & (lower_tril < MAX_DISTANCE) + + indices = np.nonzero(below_threshold) + dangerous_pairs = list(zip(indices[0], indices[1])) + unique_pairs = set(tuple(sorted(p)) for p in dangerous_pairs) + overlay = packet.frame.copy() + + # Draw a line between people who are too close + for pair in unique_pairs: + bbox1, bbox2 = bboxes[pair[0]], bboxes[pair[1]] + x1, y1 = (bbox1[0] + bbox1[2]) // 2, int(bbox1[3]) + x2, y2 = (bbox2[0] + bbox2[2]) // 2, int(bbox2[3]) + + danger_color = (0, 0, 255) + + visualizer.add_line((x1, y1), (x2, y2), danger_color, 2) + + w1 = bbox1[2] - bbox1[0] + w2 = bbox2[2] - bbox2[0] + cv2.ellipse(overlay, (x1, y1), (w1 // 2, 10), 0, 0, 360, danger_color, thickness=cv2.FILLED) + cv2.ellipse(overlay, (x2, y2), (w2 // 2, 10), 0, 0, 360, danger_color, thickness=cv2.FILLED) + + cv2.addWeighted(overlay, 0.5, packet.frame, 0.5, 0, packet.frame) + + visualizer.draw(packet.frame) + cv2.imshow('Social distancing', packet.frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', '1080p', fps=30) + + stereo = oak.create_stereo('800p', fps=30) + stereo.config_stereo(confidence=255) + + det_nn = oak.create_nn('person-detection-retail-0013', color, spatial=stereo) + + oak.visualize([det_nn], callback=callback, fps=True) + + oak.start(blocking=True) diff --git a/gen2-social-distancing/sdk/requirements.txt b/gen2-social-distancing/sdk/requirements.txt new file mode 100644 index 000000000..f2e572d3b --- /dev/null +++ b/gen2-social-distancing/sdk/requirements.txt @@ -0,0 +1,6 @@ +imutils +concurrent-log-handler +opencv-python +depthai +depthai-sdk +blobconverter \ No newline at end of file diff --git a/gen2-text-blur/sdk/main.py b/gen2-text-blur/sdk/main.py new file mode 100644 index 000000000..65b2b15ad --- /dev/null +++ b/gen2-text-blur/sdk/main.py @@ -0,0 +1,45 @@ +import cv2 +import numpy as np + +from depthai_sdk import OakCamera, AspectRatioResizeMode +from depthai_sdk.oak_outputs.normalize_bb import NormalizeBoundingBox +from utils.utils import get_boxes + +NN_PATH = '../models/text_detection_db_480x640_openvino_2021.4_6shave.blob' +MAX_CANDIDATES = 75 +MIN_SIZE = 1 +THRESH = 0.01 +BOX_THRESH = 0.2 + +NN_WIDTH, NN_HEIGHT = 640, 480 + + +def callback(packet, visualizer): + nn_data = packet.img_detections + pred = np.array(nn_data.getLayerFp16("out")).reshape((NN_HEIGHT, NN_WIDTH)) + boxes, scores = get_boxes(pred, THRESH, BOX_THRESH, MIN_SIZE, MAX_CANDIDATES) + + blur = cv2.GaussianBlur(packet.frame, (49, 49), 30) + + for bbox in boxes: + bbox = (bbox[0, 0], bbox[0, 1], bbox[2, 0], bbox[2, 1]) + bbox = NormalizeBoundingBox((NN_WIDTH, NN_HEIGHT), AspectRatioResizeMode.LETTERBOX).normalize(packet.frame, + bbox) + + x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3] + + x1, x2 = np.clip([x1, x2], 0, packet.frame.shape[1]) + y1, y2 = np.clip([y1, y2], 0, packet.frame.shape[0]) + + packet.frame[y1:y2, x1:x2] = blur[y1:y2, x1:x2] + + cv2.imshow("Text blurring", packet.frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', fps=30) + det_nn = oak.create_nn(NN_PATH, color) + + oak.callback(det_nn, callback=callback) + + oak.start(blocking=True) diff --git a/gen2-text-blur/sdk/requirements.txt b/gen2-text-blur/sdk/requirements.txt new file mode 100644 index 000000000..22dec35e3 --- /dev/null +++ b/gen2-text-blur/sdk/requirements.txt @@ -0,0 +1,6 @@ +opencv-python +depthai +depthai-sdk +numpy~=1.19.5 +pyclipper~=1.3.0 +shapely~=1.7.1 \ No newline at end of file diff --git a/gen2-text-blur/sdk/utils/utils.py b/gen2-text-blur/sdk/utils/utils.py new file mode 100644 index 000000000..571a7da79 --- /dev/null +++ b/gen2-text-blur/sdk/utils/utils.py @@ -0,0 +1,92 @@ +# Code taken and edited from https://github.com/MhLiao/DB + +import numpy as np +import cv2 +from shapely.geometry import Polygon +import pyclipper + +def get_mini_boxes(contour): + bounding_box = cv2.minAreaRect(contour) + points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) + + index_1, index_2, index_3, index_4 = 0, 1, 2, 3 + if points[1][1] > points[0][1]: + index_1 = 0 + index_4 = 1 + else: + index_1 = 1 + index_4 = 0 + if points[3][1] > points[2][1]: + index_2 = 2 + index_3 = 3 + else: + index_2 = 3 + index_3 = 2 + + box = [points[index_1], points[index_2], + points[index_3], points[index_4]] + return box, min(bounding_box[1]) + +def box_score_fast(bitmap, _box): + h, w = bitmap.shape[:2] + box = _box.copy() + xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1) + xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1) + ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1) + ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1) + + mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) + box[:, 0] = box[:, 0] - xmin + box[:, 1] = box[:, 1] - ymin + cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) + return cv2.mean(bitmap[ymin:ymax+1, xmin:xmax+1], mask)[0] + + +def unclip(box, unclip_ratio=1.5): + poly = Polygon(box) + distance = poly.area * unclip_ratio / poly.length + offset = pyclipper.PyclipperOffset() + offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) + expanded = np.array(offset.Execute(distance)) + return expanded + +def get_boxes(pred, THRESH, BOX_THRESH, MIN_SIZE, MAX_CANDIDATES): + bitmap = pred > THRESH + + height, width = pred.shape + + contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) + + num_contours = min(len(contours), MAX_CANDIDATES) + boxes = np.zeros((num_contours, 4, 2), dtype=np.float32) + scores = np.zeros((num_contours,), dtype=np.float32) + + for index in range(num_contours): + contour = contours[index] + points, sside = get_mini_boxes(contour) + if sside < MIN_SIZE: + continue + points = np.array(points) + score = box_score_fast(pred, points.reshape(-1, 2)) + if BOX_THRESH > score: + continue + + box = unclip(points).reshape(-1, 1, 2) + box, sside = get_mini_boxes(box) + if sside < MIN_SIZE + 2: + continue + box = np.array(box) + + box[:, 0] = np.clip(np.round(box[:, 0]), 0, width) + box[:, 1] = np.clip(np.round(box[:, 1]), 0, height) + box[:, 0] /= width + box[:, 1] /= height + + boxes[index, :, :] = box.astype(np.float32) + scores[index] = score + + mask = scores > 0 + scores = scores[mask] + boxes = boxes[mask] + + return boxes, scores diff --git a/gen2-tf-image-classification/sdk/main.py b/gen2-tf-image-classification/sdk/main.py new file mode 100755 index 000000000..6c4286e62 --- /dev/null +++ b/gen2-tf-image-classification/sdk/main.py @@ -0,0 +1,32 @@ +import cv2 +import numpy as np + +from depthai_sdk import OakCamera + +CLASSES = ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips'] + + +def softmax(x): + """Compute softmax values for each sets of scores in x.""" + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum(axis=0) + + +def callback(packet, visualizer): + nn_data = packet.img_detections + logits = softmax(nn_data.getFirstLayerFp16()) + pred = np.argmax(logits) + conf = logits[pred] + + if conf > 0.5: + cv2.putText(packet.frame, f'{CLASSES[pred]}: {conf:.2f}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) + + cv2.imshow('Flower classification', packet.frame) + + +with OakCamera(replay='../input.mp4') as oak: + color = oak.create_camera('color') + flower_nn = oak.create_nn('../flower.blob', color) + + oak.callback(flower_nn, callback=callback) + oak.start(blocking=True) diff --git a/gen2-tf-image-classification/sdk/requirements.txt b/gen2-tf-image-classification/sdk/requirements.txt new file mode 100644 index 000000000..9f6784651 --- /dev/null +++ b/gen2-tf-image-classification/sdk/requirements.txt @@ -0,0 +1,3 @@ +opencv-python +depthai +depthai-sdk diff --git a/gen2-triangulation/main.py b/gen2-triangulation/main.py index e62b6b531..f80728bf7 100644 --- a/gen2-triangulation/main.py +++ b/gen2-triangulation/main.py @@ -1,300 +1,300 @@ -import blobconverter -import numpy as np -import math -import cv2 -import depthai as dai -import re - - -VISUALIZE = False - -if VISUALIZE: - import open3d as o3d - -class TextHelper: - def __init__(self) -> None: - self.bg_color = (0, 0, 0) - self.color = (255, 255, 255) - self.text_type = cv2.FONT_HERSHEY_SIMPLEX - self.line_type = cv2.LINE_AA - def putText(self, frame, text, coords): - cv2.putText(frame, text, coords, self.text_type, 0.5, self.bg_color, 4, self.line_type) - cv2.putText(frame, text, coords, self.text_type, 0.5, self.color, 1, self.line_type) - -openvinoVersion = "2021.4" -p = dai.Pipeline() -p.setOpenVINOVersion(version=dai.OpenVINO.Version.VERSION_2021_4) - -# Set resolution of mono cameras -resolution = dai.MonoCameraProperties.SensorResolution.THE_720_P - -# THE_720_P => 720 -resolution_num = int(re.findall("\d+", str(resolution))[0]) - -def populate_pipeline(p, name, resolution): - cam = p.create(dai.node.MonoCamera) - socket = dai.CameraBoardSocket.LEFT if name == "left" else dai.CameraBoardSocket.RIGHT - cam.setBoardSocket(socket) - cam.setResolution(resolution) - - # ImageManip for cropping (face detection NN requires input image of 300x300) and to change frame type - face_manip = p.create(dai.node.ImageManip) - face_manip.initialConfig.setResize(300, 300) - # The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case) - face_manip.initialConfig.setFrameType(dai.RawImgFrame.Type.BGR888p) - cam.out.link(face_manip.inputImage) - - # NN that detects faces in the image - face_nn = p.create(dai.node.MobileNetDetectionNetwork) - face_nn.setConfidenceThreshold(0.2) - face_nn.setBlobPath(blobconverter.from_zoo("face-detection-retail-0004", shaves=6, version=openvinoVersion)) - face_manip.out.link(face_nn.input) - - # Send mono frames to the host via XLink - cam_xout = p.create(dai.node.XLinkOut) - cam_xout.setStreamName("mono_" + name) - face_nn.passthrough.link(cam_xout.input) - - # Script node will take the output from the NN as an input, get the first bounding box - # and send ImageManipConfig to the manip_crop - image_manip_script = p.create(dai.node.Script) - image_manip_script.inputs['nn_in'].setBlocking(False) - image_manip_script.inputs['nn_in'].setQueueSize(1) - face_nn.out.link(image_manip_script.inputs['nn_in']) - image_manip_script.setScript(""" -import time -def limit_roi(det): - if det.xmin <= 0: det.xmin = 0.001 - if det.ymin <= 0: det.ymin = 0.001 - if det.xmax >= 1: det.xmax = 0.999 - if det.ymax >= 1: det.ymax = 0.999 - -while True: - face_dets = node.io['nn_in'].get().detections - # node.warn(f"Faces detected: {len(face_dets)}") - for det in face_dets: - limit_roi(det) - # node.warn(f"Detection rect: {det.xmin}, {det.ymin}, {det.xmax}, {det.ymax}") - cfg = ImageManipConfig() - cfg.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax) - cfg.setResize(48, 48) - cfg.setKeepAspectRatio(False) - node.io['to_manip'].send(cfg) - # node.warn(f"1 from nn_in: {det.xmin}, {det.ymin}, {det.xmax}, {det.ymax}") - """) - - # This ImageManip will crop the mono frame based on the NN detections. Resulting image will be the cropped - # face that was detected by the face-detection NN. - manip_crop = p.create(dai.node.ImageManip) - face_nn.passthrough.link(manip_crop.inputImage) - image_manip_script.outputs['to_manip'].link(manip_crop.inputConfig) - manip_crop.initialConfig.setResize(48, 48) - manip_crop.inputConfig.setWaitForMessage(False) - - # Send ImageManipConfig to host so it can visualize the landmarks - config_xout = p.create(dai.node.XLinkOut) - config_xout.setStreamName("config_" + name) - image_manip_script.outputs['to_manip'].link(config_xout.input) - - crop_xout = p.create(dai.node.XLinkOut) - crop_xout.setStreamName("crop_" + name) - manip_crop.out.link(crop_xout.input) - - # Second NN that detcts landmarks from the cropped 48x48 face - landmarks_nn = p.create(dai.node.NeuralNetwork) - landmarks_nn.setBlobPath(blobconverter.from_zoo("landmarks-regression-retail-0009", shaves=6, version=openvinoVersion)) - manip_crop.out.link(landmarks_nn.input) - - landmarks_nn_xout = p.create(dai.node.XLinkOut) - landmarks_nn_xout.setStreamName("landmarks_" + name) - landmarks_nn.out.link(landmarks_nn_xout.input) - -populate_pipeline(p, "right", resolution) -populate_pipeline(p, "left", resolution) - -class StereoInference: - def __init__(self, device: dai.Device, resolution_num, width, heigth) -> None: - calibData = device.readCalibration() - baseline = calibData.getBaselineDistance(useSpecTranslation=True) * 10 # mm - - # Original mono frames shape - self.original_heigth = resolution_num - self.original_width = 640 if resolution_num == 400 else 1280 - self.hfov = calibData.getFov(dai.CameraBoardSocket.RIGHT) - - focalLength = self.get_focal_length_pixels(self.original_width, self.hfov) - self.dispScaleFactor = baseline * focalLength - - # Cropped frame shape - self.mono_width = width - self.mono_heigth = heigth - # Our coords are normalized for 300x300 image. 300x300 was downscaled from - # 720x720 (by ImageManip), so we need to multiple coords by 2.4 to get the correct disparity. - self.resize_factor = self.original_heigth / self.mono_heigth - - def get_focal_length_pixels(self, pixel_width, hfov): - return pixel_width * 0.5 / math.tan(hfov * 0.5 * math.pi/180) - - def calculate_depth(self, disparity_pixels: float): - try: - return self.dispScaleFactor / disparity_pixels - except ZeroDivisionError: - return 0 # Or inf? - - def calculate_distance(self, c1, c2): - # Our coords are normalized for 300x300 image. 300x300 was downscaled from 720x720 (by ImageManip), - # so we need to multiple coords by 2.4 (if using 720P resolution) to get the correct disparity. - c1 = np.array(c1) * self.resize_factor - c2 = np.array(c2) * self.resize_factor - - x_delta = c1[0] - c2[0] - y_delta = c1[1] - c2[1] - return math.sqrt(x_delta ** 2 + y_delta ** 2) - - def calc_angle(self, offset): - return math.atan(math.tan(self.hfov / 2.0) * offset / (self.original_width / 2.0)) - - def calc_spatials(self, coords, depth): - x, y = coords - bb_x_pos = x - self.mono_width / 2 - bb_y_pos = y - self.mono_heigth / 2 - - angle_x = self.calc_angle(bb_x_pos) - angle_y = self.calc_angle(bb_y_pos) - - z = depth - x = z * math.tan(angle_x) - y = -z * math.tan(angle_y) - # print(f"x {x}, y {y}, z {z}") - return [x,y,z] - -# Pipeline is defined, now we can connect to the device -with dai.Device(p.getOpenVINOVersion()) as device: - cams = device.getConnectedCameras() - depth_enabled = dai.CameraBoardSocket.LEFT in cams and dai.CameraBoardSocket.RIGHT in cams - if not depth_enabled: - raise RuntimeError("Unable to run this experiment on device without depth capabilities! (Available cameras: {})".format(cams)) - device.startPipeline(p) - # Set device log level - to see logs from the Script node - device.setLogLevel(dai.LogLevel.INFO) - device.setLogOutputLevel(dai.LogLevel.INFO) - - stereoInference = StereoInference(device, resolution_num, width=300, heigth=300) - - if VISUALIZE: - vis = o3d.visualization.Visualizer() - vis.create_window() - - # Start pipeline - queues = [] - for name in ["left", "right"]: - queues.append(device.getOutputQueue(name="mono_"+name, maxSize=4, blocking=False)) - queues.append(device.getOutputQueue(name="crop_"+name, maxSize=4, blocking=False)) - queues.append(device.getOutputQueue(name="landmarks_"+name, maxSize=4, blocking=False)) - queues.append(device.getOutputQueue(name="config_"+name, maxSize=4, blocking=False)) - - disparity_frame = None - left = None - right = None - leftColor = (255,0,0) - rightColor = (0,255,0) - textHelper = TextHelper() - - while True: - lr_landmarks = {} - for i in range(2): - name = "left" if i == 1 else "right" - color = leftColor if name == "left" else rightColor - - # 300x300 Mono image frame - frame = queues[i*4].get().getCvFrame() - - if name == "left": left = frame - else: right = frame - - # Cropped+streched (48x48) mono image frame - cropped_frame = queues[i*4 + 1].get().getCvFrame() - - inConfig = queues[i*4 + 3].tryGet() - if inConfig is not None: - xmin = int(300 * inConfig.getCropXMin()) - ymin = int(300 * inConfig.getCropYMin()) - xmax = int(300 * inConfig.getCropXMax()) - ymax = int(300 * inConfig.getCropYMax()) - cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2) - - width = inConfig.getCropXMax()-inConfig.getCropXMin() - height = inConfig.getCropYMax()-inConfig.getCropYMin() - - # Facial landmarks from the second NN - landmarks_layer = queues[i*4 + 2].get().getFirstLayerFp16() - landmarks = np.array(landmarks_layer).reshape(5, 2) - - landmarks_xy = [] - for landmark in landmarks: - cv2.circle(cropped_frame, (int(48*landmark[0]), int(48*landmark[1])), 3, color) - x = int((landmark[0] * width + inConfig.getCropXMin()) * 300) - y = int((landmark[1] * height + inConfig.getCropYMin()) * 300) - landmarks_xy.append((x,y)) - - lr_landmarks[name] = landmarks_xy - # Display both mono/cropped frames - # cv2.imshow("mono_"+name, frame) - # cv2.imshow("crop_"+name, cropped_frame) - - # Combine the two mono frames - combined = cv2.addWeighted(left, 0.5, right, 0.5, 0) - - # 3D visualization - if len(lr_landmarks) == 2: - spatials = [] - for i in range(5): - coords1 = lr_landmarks['right'][i] - coords2 = lr_landmarks['left'][i] - - cv2.circle(left, coords2, 3, leftColor) - cv2.circle(right, coords1, 3, rightColor) - cv2.circle(combined, coords2, 3, leftColor) - cv2.circle(combined, coords1, 3, rightColor) - - # Visualize disparity line frame - cv2.line(combined, coords1, coords2, (0,0,255), 1) - - disparity = stereoInference.calculate_distance(coords1, coords2) - depth = stereoInference.calculate_depth(disparity) - # print(f"Disp {disparity}, depth {depth}") - spatial = stereoInference.calc_spatials(coords1, depth) - spatials.append(spatial) - - if i == 0: - y = 0 - y_delta = 18 - strings = [ - "Disparity: {:.0f} pixels".format(disparity), - "X: {:.2f} m".format(spatial[0]/1000), - "Y: {:.2f} m".format(spatial[1]/1000), - "Z: {:.2f} m".format(spatial[2]/1000), - ] - for s in strings: - y += y_delta - textHelper.putText(combined, s, (10, y)) - - if VISUALIZE: - # For 3d point projection. - pcl = o3d.geometry.PointCloud() - pcl.points = o3d.utility.Vector3dVector(spatials) - vis.clear_geometries() - vis.add_geometry(pcl) - # else: - # for i, s in enumerate(spatials): - # print(f"[Landmark {i}] X: {s[0]}, Y: {s[1]}, Z: {s[2]}") - - - cv2.imshow("Combined frame", np.concatenate((left, combined ,right), axis=1)) - - if VISUALIZE: - vis.poll_events() - vis.update_renderer() - - if cv2.waitKey(1) == ord('q'): - break +import blobconverter +import numpy as np +import math +import cv2 +import depthai as dai +import re + + +VISUALIZE = False + +if VISUALIZE: + import open3d as o3d + +class TextHelper: + def __init__(self) -> None: + self.bg_color = (0, 0, 0) + self.color = (255, 255, 255) + self.text_type = cv2.FONT_HERSHEY_SIMPLEX + self.line_type = cv2.LINE_AA + def putText(self, frame, text, coords): + cv2.putText(frame, text, coords, self.text_type, 0.5, self.bg_color, 4, self.line_type) + cv2.putText(frame, text, coords, self.text_type, 0.5, self.color, 1, self.line_type) + +openvinoVersion = "2021.4" +p = dai.Pipeline() +p.setOpenVINOVersion(version=dai.OpenVINO.Version.VERSION_2021_4) + +# Set resolution of mono cameras +resolution = dai.MonoCameraProperties.SensorResolution.THE_720_P + +# THE_720_P => 720 +resolution_num = int(re.findall("\d+", str(resolution))[0]) + +def populate_pipeline(p, name, resolution): + cam = p.create(dai.node.MonoCamera) + socket = dai.CameraBoardSocket.LEFT if name == "left" else dai.CameraBoardSocket.RIGHT + cam.setBoardSocket(socket) + cam.setResolution(resolution) + + # ImageManip for cropping (face detection NN requires input image of 300x300) and to change frame type + face_manip = p.create(dai.node.ImageManip) + face_manip.initialConfig.setResize(300, 300) + # The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case) + face_manip.initialConfig.setFrameType(dai.RawImgFrame.Type.BGR888p) + cam.out.link(face_manip.inputImage) + + # NN that detects faces in the image + face_nn = p.create(dai.node.MobileNetDetectionNetwork) + face_nn.setConfidenceThreshold(0.2) + face_nn.setBlobPath(blobconverter.from_zoo("face-detection-retail-0004", shaves=6, version=openvinoVersion)) + face_manip.out.link(face_nn.input) + + # Send mono frames to the host via XLink + cam_xout = p.create(dai.node.XLinkOut) + cam_xout.setStreamName("mono_" + name) + face_nn.passthrough.link(cam_xout.input) + + # Script node will take the output from the NN as an input, get the first bounding box + # and send ImageManipConfig to the manip_crop + image_manip_script = p.create(dai.node.Script) + image_manip_script.inputs['nn_in'].setBlocking(False) + image_manip_script.inputs['nn_in'].setQueueSize(1) + face_nn.out.link(image_manip_script.inputs['nn_in']) + image_manip_script.setScript(""" +import time +def limit_roi(det): + if det.xmin <= 0: det.xmin = 0.001 + if det.ymin <= 0: det.ymin = 0.001 + if det.xmax >= 1: det.xmax = 0.999 + if det.ymax >= 1: det.ymax = 0.999 + +while True: + face_dets = node.io['nn_in'].get().detections + # node.warn(f"Faces detected: {len(face_dets)}") + for det in face_dets: + limit_roi(det) + # node.warn(f"Detection rect: {det.xmin}, {det.ymin}, {det.xmax}, {det.ymax}") + cfg = ImageManipConfig() + cfg.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax) + cfg.setResize(48, 48) + cfg.setKeepAspectRatio(False) + node.io['to_manip'].send(cfg) + # node.warn(f"1 from nn_in: {det.xmin}, {det.ymin}, {det.xmax}, {det.ymax}") + """) + + # This ImageManip will crop the mono frame based on the NN detections. Resulting image will be the cropped + # face that was detected by the face-detection NN. + manip_crop = p.create(dai.node.ImageManip) + face_nn.passthrough.link(manip_crop.inputImage) + image_manip_script.outputs['to_manip'].link(manip_crop.inputConfig) + manip_crop.initialConfig.setResize(48, 48) + manip_crop.inputConfig.setWaitForMessage(False) + + # Send ImageManipConfig to host so it can visualize the landmarks + config_xout = p.create(dai.node.XLinkOut) + config_xout.setStreamName("config_" + name) + image_manip_script.outputs['to_manip'].link(config_xout.input) + + crop_xout = p.create(dai.node.XLinkOut) + crop_xout.setStreamName("crop_" + name) + manip_crop.out.link(crop_xout.input) + + # Second NN that detcts landmarks from the cropped 48x48 face + landmarks_nn = p.create(dai.node.NeuralNetwork) + landmarks_nn.setBlobPath(blobconverter.from_zoo("landmarks-regression-retail-0009", shaves=6, version=openvinoVersion)) + manip_crop.out.link(landmarks_nn.input) + + landmarks_nn_xout = p.create(dai.node.XLinkOut) + landmarks_nn_xout.setStreamName("landmarks_" + name) + landmarks_nn.out.link(landmarks_nn_xout.input) + +populate_pipeline(p, "right", resolution) +populate_pipeline(p, "left", resolution) + +class StereoInference: + def __init__(self, device: dai.Device, resolution_num, width, heigth) -> None: + calibData = device.readCalibration() + baseline = calibData.getBaselineDistance(useSpecTranslation=True) * 10 # mm + + # Original mono frames shape + self.original_heigth = resolution_num + self.original_width = 640 if resolution_num == 400 else 1280 + self.hfov = calibData.getFov(dai.CameraBoardSocket.RIGHT) + + focalLength = self.get_focal_length_pixels(self.original_width, self.hfov) + self.dispScaleFactor = baseline * focalLength + + # Cropped frame shape + self.mono_width = width + self.mono_heigth = heigth + # Our coords are normalized for 300x300 image. 300x300 was downscaled from + # 720x720 (by ImageManip), so we need to multiple coords by 2.4 to get the correct disparity. + self.resize_factor = self.original_heigth / self.mono_heigth + + def get_focal_length_pixels(self, pixel_width, hfov): + return pixel_width * 0.5 / math.tan(hfov * 0.5 * math.pi/180) + + def calculate_depth(self, disparity_pixels: float): + try: + return self.dispScaleFactor / disparity_pixels + except ZeroDivisionError: + return 0 # Or inf? + + def calculate_distance(self, c1, c2): + # Our coords are normalized for 300x300 image. 300x300 was downscaled from 720x720 (by ImageManip), + # so we need to multiple coords by 2.4 (if using 720P resolution) to get the correct disparity. + c1 = np.array(c1) * self.resize_factor + c2 = np.array(c2) * self.resize_factor + + x_delta = c1[0] - c2[0] + y_delta = c1[1] - c2[1] + return math.sqrt(x_delta ** 2 + y_delta ** 2) + + def calc_angle(self, offset): + return math.atan(math.tan(self.hfov / 2.0) * offset / (self.original_width / 2.0)) + + def calc_spatials(self, coords, depth): + x, y = coords + bb_x_pos = x - self.mono_width / 2 + bb_y_pos = y - self.mono_heigth / 2 + + angle_x = self.calc_angle(bb_x_pos) + angle_y = self.calc_angle(bb_y_pos) + + z = depth + x = z * math.tan(angle_x) + y = -z * math.tan(angle_y) + # print(f"x {x}, y {y}, z {z}") + return [x,y,z] + +# Pipeline is defined, now we can connect to the device +with dai.Device(p.getOpenVINOVersion()) as device: + cams = device.getConnectedCameras() + depth_enabled = dai.CameraBoardSocket.LEFT in cams and dai.CameraBoardSocket.RIGHT in cams + if not depth_enabled: + raise RuntimeError("Unable to run this experiment on device without depth capabilities! (Available cameras: {})".format(cams)) + device.startPipeline(p) + # Set device log level - to see logs from the Script node + device.setLogLevel(dai.LogLevel.INFO) + device.setLogOutputLevel(dai.LogLevel.INFO) + + stereoInference = StereoInference(device, resolution_num, width=300, heigth=300) + + if VISUALIZE: + vis = o3d.visualization.Visualizer() + vis.create_window() + + # Start pipeline + queues = [] + for name in ["left", "right"]: + queues.append(device.getOutputQueue(name="mono_"+name, maxSize=4, blocking=False)) + queues.append(device.getOutputQueue(name="crop_"+name, maxSize=4, blocking=False)) + queues.append(device.getOutputQueue(name="landmarks_"+name, maxSize=4, blocking=False)) + queues.append(device.getOutputQueue(name="config_"+name, maxSize=4, blocking=False)) + + disparity_frame = None + left = None + right = None + leftColor = (255,0,0) + rightColor = (0,255,0) + textHelper = TextHelper() + + while True: + lr_landmarks = {} + for i in range(2): + name = "left" if i == 1 else "right" + color = leftColor if name == "left" else rightColor + + # 300x300 Mono image frame + frame = queues[i*4].get().getCvFrame() + + if name == "left": left = frame + else: right = frame + + # Cropped+streched (48x48) mono image frame + cropped_frame = queues[i*4 + 1].get().getCvFrame() + + inConfig = queues[i*4 + 3].tryGet() + if inConfig is not None: + xmin = int(300 * inConfig.getCropXMin()) + ymin = int(300 * inConfig.getCropYMin()) + xmax = int(300 * inConfig.getCropXMax()) + ymax = int(300 * inConfig.getCropYMax()) + cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2) + + width = inConfig.getCropXMax()-inConfig.getCropXMin() + height = inConfig.getCropYMax()-inConfig.getCropYMin() + + # Facial landmarks from the second NN + landmarks_layer = queues[i*4 + 2].get().getFirstLayerFp16() + landmarks = np.array(landmarks_layer).reshape(5, 2) + + landmarks_xy = [] + for landmark in landmarks: + cv2.circle(cropped_frame, (int(48*landmark[0]), int(48*landmark[1])), 3, color) + x = int((landmark[0] * width + inConfig.getCropXMin()) * 300) + y = int((landmark[1] * height + inConfig.getCropYMin()) * 300) + landmarks_xy.append((x,y)) + + lr_landmarks[name] = landmarks_xy + # Display both mono/cropped frames + # cv2.imshow("mono_"+name, frame) + # cv2.imshow("crop_"+name, cropped_frame) + + # Combine the two mono frames + combined = cv2.addWeighted(left, 0.5, right, 0.5, 0) + + # 3D visualization + if len(lr_landmarks) == 2: + spatials = [] + for i in range(5): + coords1 = lr_landmarks['right'][i] + coords2 = lr_landmarks['left'][i] + + cv2.circle(left, coords2, 3, leftColor) + cv2.circle(right, coords1, 3, rightColor) + cv2.circle(combined, coords2, 3, leftColor) + cv2.circle(combined, coords1, 3, rightColor) + + # Visualize disparity line frame + cv2.line(combined, coords1, coords2, (0,0,255), 1) + + disparity = stereoInference.calculate_distance(coords1, coords2) + depth = stereoInference.calculate_depth(disparity) + # print(f"Disp {disparity}, depth {depth}") + spatial = stereoInference.calc_spatials(coords1, depth) + spatials.append(spatial) + + if i == 0: + y = 0 + y_delta = 18 + strings = [ + "Disparity: {:.0f} pixels".format(disparity), + "X: {:.2f} m".format(spatial[0]/1000), + "Y: {:.2f} m".format(spatial[1]/1000), + "Z: {:.2f} m".format(spatial[2]/1000), + ] + for s in strings: + y += y_delta + textHelper.putText(combined, s, (10, y)) + + if VISUALIZE: + # For 3d point projection. + pcl = o3d.geometry.PointCloud() + pcl.points = o3d.utility.Vector3dVector(spatials) + vis.clear_geometries() + vis.add_geometry(pcl) + # else: + # for i, s in enumerate(spatials): + # print(f"[Landmark {i}] X: {s[0]}, Y: {s[1]}, Z: {s[2]}") + + + cv2.imshow("Combined frame", np.concatenate((left, combined ,right), axis=1)) + + if VISUALIZE: + vis.poll_events() + vis.update_renderer() + + if cv2.waitKey(1) == ord('q'): + break diff --git a/gen2-webrtc-streaming/client/build/client.js b/gen2-webrtc-streaming/client/build/client.js deleted file mode 100644 index a6bf77aa0..000000000 --- a/gen2-webrtc-streaming/client/build/client.js +++ /dev/null @@ -1,68 +0,0 @@ -/* - * ATTENTION: The "eval" devtool has been used (maybe by default in mode: "development"). - * This devtool is neither made for production nor for readable output files. - * It uses "eval()" calls to create a separate source file in the browser devtools. - * If you are trying to read the output file, select a different devtool (https://webpack.js.org/configuration/devtool/) - * or disable the default devtool with "devtool: false". - * If you are looking for production-ready output files, see mode: "production" (https://webpack.js.org/configuration/mode/). - */ -var WebRTC; -/******/ (() => { // webpackBootstrap -/******/ "use strict"; -/******/ var __webpack_modules__ = ({ - -/***/ "./src/client.mjs": -/*!************************!*\ - !*** ./src/client.mjs ***! - \************************/ -/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => { - -eval("__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ \"dataChannel\": () => (/* binding */ dataChannel),\n/* harmony export */ \"webrtcInstance\": () => (/* binding */ webrtcInstance),\n/* harmony export */ \"start\": () => (/* binding */ start),\n/* harmony export */ \"stop\": () => (/* binding */ stop)\n/* harmony export */ });\nfunction _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError(\"Cannot call a class as a function\"); } }\n\nfunction _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if (\"value\" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }\n\nfunction _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); Object.defineProperty(Constructor, \"prototype\", { writable: false }); return Constructor; }\n\nfunction _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }\n\nvar WebRTC = /*#__PURE__*/function () {\n function WebRTC() {\n var _this = this;\n\n _classCallCheck(this, WebRTC);\n\n _defineProperty(this, \"config\", {\n sdpSemantics: 'unified-plan'\n });\n\n this.pc = new RTCPeerConnection(this.config); // register some listeners to help debugging\n\n this.pc.addEventListener('icegatheringstatechange', function () {\n return console.log(\"[PC] ICE Gathering state: \", _this.pc.iceConnectionState);\n }, false);\n console.log(\"[PC] ICE Gathering state: \", this.pc.iceGatheringState);\n this.pc.addEventListener('iceconnectionstatechange', function () {\n return console.log(\"[PC] ICE Connection state: \", _this.pc.iceConnectionState);\n }, false);\n console.log(\"[PC] ICE Connection state: \", this.pc.iceConnectionState);\n this.pc.addEventListener('signalingstatechange', function () {\n return console.log(\"[PC] Signaling state: \", _this.pc.signalingState);\n }, false);\n console.log(\"[PC] Signaling state: \", this.pc.signalingState);\n }\n\n _createClass(WebRTC, [{\n key: \"negotiate\",\n value: function negotiate() {\n var _this2 = this;\n\n return this.pc.createOffer().then(function (offer) {\n return _this2.pc.setLocalDescription(offer);\n }).then(function () {\n return new Promise(function (resolve) {\n if (_this2.pc.iceGatheringState === 'complete') {\n resolve();\n } else {\n var checkState = function checkState() {\n if (pc.iceGatheringState === 'complete') {\n pc.removeEventListener('icegatheringstatechange', checkState);\n resolve();\n }\n };\n\n var pc = _this2.pc;\n\n _this2.pc.addEventListener('icegatheringstatechange', checkState);\n }\n });\n }).then(function () {\n return fetch('/offer', {\n body: JSON.stringify({\n sdp: _this2.pc.localDescription.sdp,\n type: _this2.pc.localDescription.type,\n options: Object.fromEntries(new FormData(document.getElementById('options-form')))\n }),\n headers: {\n 'Content-Type': 'application/json'\n },\n method: 'POST'\n });\n }).then(function (response) {\n return response.json();\n }).then(function (answer) {\n return _this2.pc.setRemoteDescription(answer);\n })[\"catch\"](function (e) {\n return alert(e);\n });\n }\n }, {\n key: \"start\",\n value: function start() {\n return this.negotiate();\n }\n }, {\n key: \"createDataChannel\",\n value: function createDataChannel(name, onClose, onOpen, onMessage) {\n var dc = this.pc.createDataChannel(name, {\n ordered: true\n });\n dc.onclose = onClose;\n dc.onopen = onOpen;\n dc.onmessage = onMessage;\n return dc;\n }\n }, {\n key: \"stop\",\n value: function stop() {\n if (this.pc.getTransceivers) {\n this.pc.getTransceivers().forEach(function (transceiver) {\n return transceiver.stop && transceiver.stop();\n });\n }\n\n this.pc.getSenders().forEach(function (sender) {\n return sender.track && sender.track.stop();\n });\n this.pc.close();\n }\n }, {\n key: \"addMediaHandles\",\n value: function addMediaHandles(onAudio, onVideo) {\n if (onVideo) {\n this.pc.addTransceiver(\"video\");\n }\n\n if (onAudio) {\n this.pc.addTransceiver(\"audio\");\n }\n\n this.pc.addEventListener('track', function (evt) {\n if (evt.track.kind === 'video' && onVideo) return onVideo(evt);\n if (evt.track.kind === 'audio' && onAudio) return onAudio(evt);\n });\n }\n }]);\n\n return WebRTC;\n}();\n\nvar dataChannel;\nvar webrtcInstance;\n\nfunction onMessage(evt) {\n var action = JSON.parse(evt.data);\n console.log(action);\n}\n\nfunction start() {\n webrtcInstance = new WebRTC();\n dataChannel = webrtcInstance.createDataChannel('pingChannel', function () {\n return console.log(\"[DC] closed\");\n }, function () {\n return console.log(\"[DC] opened\");\n }, onMessage);\n webrtcInstance.addMediaHandles(null, function (evt) {\n return document.getElementById('video').srcObject = evt.streams[0];\n });\n webrtcInstance.start();\n}\nfunction stop() {\n if (dataChannel) {\n dataChannel.send(JSON.stringify({\n 'type': 'STREAM_CLOSED'\n }));\n }\n\n setTimeout(function () {\n return webrtcInstance.stop();\n }, 100);\n}\n\n//# sourceURL=webpack://WebRTC/./src/client.mjs?"); - -/***/ }) - -/******/ }); -/************************************************************************/ -/******/ // The require scope -/******/ var __webpack_require__ = {}; -/******/ -/************************************************************************/ -/******/ /* webpack/runtime/define property getters */ -/******/ (() => { -/******/ // define getter functions for harmony exports -/******/ __webpack_require__.d = (exports, definition) => { -/******/ for(var key in definition) { -/******/ if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) { -/******/ Object.defineProperty(exports, key, { enumerable: true, get: definition[key] }); -/******/ } -/******/ } -/******/ }; -/******/ })(); -/******/ -/******/ /* webpack/runtime/hasOwnProperty shorthand */ -/******/ (() => { -/******/ __webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop)) -/******/ })(); -/******/ -/******/ /* webpack/runtime/make namespace object */ -/******/ (() => { -/******/ // define __esModule on exports -/******/ __webpack_require__.r = (exports) => { -/******/ if(typeof Symbol !== 'undefined' && Symbol.toStringTag) { -/******/ Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' }); -/******/ } -/******/ Object.defineProperty(exports, '__esModule', { value: true }); -/******/ }; -/******/ })(); -/******/ -/************************************************************************/ -/******/ -/******/ // startup -/******/ // Load entry module and return exports -/******/ // This entry module can't be inlined because the eval devtool is used. -/******/ var __webpack_exports__ = {}; -/******/ __webpack_modules__["./src/client.mjs"](0, __webpack_exports__, __webpack_require__); -/******/ WebRTC = __webpack_exports__; -/******/ -/******/ })() -; \ No newline at end of file diff --git a/gen2-wls-filter/sdk/main.py b/gen2-wls-filter/sdk/main.py new file mode 100755 index 000000000..e2d1e179f --- /dev/null +++ b/gen2-wls-filter/sdk/main.py @@ -0,0 +1,37 @@ +from functools import partial + +import cv2 + +from depthai_sdk import OakCamera + + +class Trackbar: + def __init__(self, trackbar_name, window_name, minValue, maxValue, defaultValue, handler): + cv2.createTrackbar(trackbar_name, window_name, minValue, maxValue, handler) + cv2.setTrackbarPos(trackbar_name, window_name, defaultValue) + + +def on_trackbar_change_lambda(visualizer_, value): + visualizer_.stereo(wls_lambda=value * 100) + + +def on_trackbar_change_sigma(visualizer_, value): + visualizer_.stereo(wls_sigma=value / float(10)) + + +with OakCamera() as oak: + stereo = oak.create_stereo('400p', fps=30) + stereo.config_postprocessing( + wls_filter=True, + wls_lambda=8000, + wls_sigma=1.5 + ) + + visualizer = oak.visualize(stereo.out.disparity) + + window_name = '0_disparity' + cv2.namedWindow(window_name) + Trackbar('Lambda', window_name, 0, 255, 80, partial(on_trackbar_change_lambda, visualizer)) + Trackbar('Sigma', window_name, 0, 100, 15, partial(on_trackbar_change_sigma, visualizer)) + + oak.start(blocking=True) diff --git a/gen2-wls-filter/sdk/requirements.txt b/gen2-wls-filter/sdk/requirements.txt new file mode 100644 index 000000000..f9581c25e --- /dev/null +++ b/gen2-wls-filter/sdk/requirements.txt @@ -0,0 +1,3 @@ +opencv-contrib-python +depthai +depthai-sdk \ No newline at end of file diff --git a/gen2-yolo/README.md b/gen2-yolo/README.md index 8b4e4aeb6..4aac93836 100644 --- a/gen2-yolo/README.md +++ b/gen2-yolo/README.md @@ -6,8 +6,9 @@ This repository contains code for various Yolo experiments: | Directory | Decoding | Version | Description | | :-------------: | :------: | :--------------------------: | ------------------------------------------------------------ | -| `car-detection` | device | V3-tiny, V4-tiny | Car detection using YoloV3-tiny and YoloV4-tiny with on-device decoding. | -| `device-decoding` | device | V3, V3-tiny, V4, V4-tiny, V5 | General object detection using any of the versions for which we support on-device decoding. **Use this if you want to see an example of how to run your own model!** | +| `main.py` | device | From https://tools.luxonis.com | Run your custom trained YOLO model that was converted using the tools.luxonis.com. Uses [DepthAI-SDK](https://docs.luxonis.com/projects/sdk/en/latest/) | +| `device-decoding` | device | V3, V3-tiny, V4, V4-tiny, V5 | General object detection using any of the versions for which we support on-device decoding. Uses [DepthAI-API]https://docs.luxonis.com/projects/api/en/latest/) | +| `car-detection` | device | V3-tiny, V4-tiny | Car detection using YoloV3-tiny and YoloV4-tiny with on-device decoding ([DepthAI-SDK](https://docs.luxonis.com/projects/sdk/en/latest/)). | | `host-decoding` | host | V5 | Object detection using YoloV5 and on-host decoding. | | `yolox` | host | X | Object detection without anchors using YOLOX-tiny with on-host decoding. | | `yolop` | host | P | Vehicle detection, road segmentation, and lane segmentation using YOLOP on OAK with on-host decoding. | diff --git a/gen2-yolo/car-detection/main.py b/gen2-yolo/car-detection/main.py index 4cef78de4..84d226aca 100644 --- a/gen2-yolo/car-detection/main.py +++ b/gen2-yolo/car-detection/main.py @@ -1,59 +1,25 @@ -from depthai_sdk import Previews, FPSHandler -from depthai_sdk.managers import PipelineManager, PreviewManager, BlobManager, NNetManager import depthai as dai import cv2 import argparse +from pathlib import Path +from depthai_sdk import OakCamera +import json -# parse arguments parser = argparse.ArgumentParser() parser.add_argument("-m", "--model", help="Provide model path for inference", - default='models/yolo_v3_tiny_openvino_2021.3_6shave.blob', type=str) + default='models/yolo_v4_tiny_openvino_2021.3_6shave.blob', type=str) parser.add_argument("-c", "--config", help="Provide config path for inference", default='yolo-tiny.json', type=str) -parser.add_argument("--height", help="Input shape height", default=320, type=int) -parser.add_argument("--width", help="Input shape width", default=512, type=int) args = parser.parse_args() -CONFIG_PATH = args.config -H, W = args.height, args.width +with OakCamera(replay='cars-tracking-above-01') as oak: + color = oak.create_camera('color') -# create pipeline manager and camera -pm = PipelineManager() -pm.createColorCam(previewSize=(W, H), xout=True) + nn = oak.create_nn(args.model, color, nnType='yolo') -# create yolo node -bm = BlobManager(blobPath=args.model) -nm = NNetManager(inputSize=(W, H), nnFamily="YOLO") -nm.readConfig(CONFIG_PATH) -nn = nm.createNN(pipeline=pm.pipeline, nodes=pm.nodes, source=Previews.color.name, - blobPath=bm.getBlob(shaves=6, openvinoVersion=pm.pipeline.getOpenVINOVersion())) -pm.addNn(nn) - -# initialize pipeline -with dai.Device(pm.pipeline) as device: - - fpsHandler = FPSHandler() - pv = PreviewManager(display=[Previews.color.name], fpsHandler=fpsHandler) - - pv.createQueues(device) - nm.createQueues(device) - - nnData = [] - - while True: - # parse outputs - pv.prepareFrames() - inNn = nm.outputQueue.tryGet() - - if inNn is not None: - # count FPS - fpsHandler.tick("color") - - nnData = nm.decode(inNn) - - nm.draw(pv, nnData) - pv.showFrames() - - if cv2.waitKey(1) == ord('q'): - break + with open(str(Path(args.config).resolve())) as file: + conf = json.load(file) + nn.config_yolo_from_metadata(conf['nn_config']['NN_specific_metadata']) + oak.visualize(nn.out.passthrough, fps=True) # 1080P -> 720P + oak.start(blocking=True) diff --git a/gen2-yolo/car-detection/requirements.txt b/gen2-yolo/car-detection/requirements.txt index ad5b004df..1feac6f19 100644 --- a/gen2-yolo/car-detection/requirements.txt +++ b/gen2-yolo/car-detection/requirements.txt @@ -1,5 +1,2 @@ -opencv-python -depthai==2.16.0.0 -depthai-sdk>=1.1.6 -numpy>=1.18.5 +depthai-sdk==1.9.1 gdown>=4.4 \ No newline at end of file diff --git a/gen2-yolo/device-decoding/README.md b/gen2-yolo/device-decoding/README.md index a508ed0de..f72c63009 100644 --- a/gen2-yolo/device-decoding/README.md +++ b/gen2-yolo/device-decoding/README.md @@ -2,7 +2,7 @@ ![Yolo-on-device](https://user-images.githubusercontent.com/56075061/144863222-a52be87e-b1f0-4a0a-b39b-f865bbb6e4a4.png) -This repository contains the code for running Yolo object detection with on-device decoding with DepthAI SDK (`main.py`) or DepthAI API (`main_api.py`) directly. Currently, the supported versions are: +This repository contains the code for running Yolo object detection with on-device decoding with [DepthAI SDK](https://docs.luxonis.com/projects/sdk/en/latest/) (`main.py`) or [DepthAI API](https://docs.luxonis.com/projects/api/en/latest/) (`main_api.py`) directly. Currently, the supported versions are: * YoloV3 & YoloV3-tiny, * YoloV4 & YoloV4-tiny, @@ -26,17 +26,34 @@ As the models have to be exported to OpenVINO IR in a certain way, we provide th ``` 2. Run the script ``` - python3 main.py -m -c + python3 main.py --config ``` or ``` - python3 main_api.py -m -c + python3 main_api.py -m --config ``` where: * `` is the **name of the model** from DepthAI model zoo (https://zoo.luxonis.com) or **relative path to the blob** file. Please check our model zoo to see which pre-trained models are available. * `` is the **relative path** to the JSON with metadata (input shape, anchors, labels, ...) of the Yolo model. +#### Usage example + +After downloading the .zip file from [tools.luxonis.com](https://tools.luxonis.com), extract them to the `model` folder, so you would have json file at `./model/yolo.json`: + +``` +Mode LastWriteTime Length Name +---- ------------- ------ ---- +-a---- 11/4/2022 1:45 PM 28 .gitignore +-a---- 9/15/2022 4:54 AM 3525428 yolo.bin +-a---- 9/15/2022 4:54 AM 1001 yolo.json +-a---- 9/15/2022 4:54 AM 182902 yolo.xml +-a---- 9/15/2022 4:54 AM 3569472 yolo.4_6shave.blob +``` + +You can then run `python main.py --config model/yolo.json` to run your object detection model on the OAK camera. To configure the pipeline, you can edit `main.py` script, and SDK documentation can be [found here](https://docs.luxonis.com/projects/sdk/en/latest/). + + ## JSONs We already provide some JSONs for common Yolo versions. You can edit them and set them up for your model, as described in the **next steps** section in the mentioned tutorials. In case you are changing some of the parameters in the tutorial, you should edit the corresponding parameters. In general, the settings in the JSON should follow the settings in the CFG of the model. For YoloV5, the default settings should be the same as for YoloV3. diff --git a/gen2-yolo/device-decoding/main.py b/gen2-yolo/device-decoding/main.py index 53a57b3c7..b4c133898 100644 --- a/gen2-yolo/device-decoding/main.py +++ b/gen2-yolo/device-decoding/main.py @@ -1,64 +1,13 @@ -from depthai_sdk import Previews, FPSHandler -from depthai_sdk.managers import PipelineManager, PreviewManager, BlobManager, NNetManager -import depthai as dai -import cv2 +from depthai_sdk import OakCamera, ArgsParser import argparse -from pathlib import Path # parse arguments parser = argparse.ArgumentParser() -parser.add_argument("-m", "--model", help="Provide model path for inference", - default='yolov4_tiny_coco_416x416', type=str) -parser.add_argument("-c", "--config", help="Provide config path for inference", - default='json/yolov4-tiny.json', type=str) -args = parser.parse_args() -CONFIG_PATH = args.config - -# create blob, NN, and preview managers -if Path(args.model).exists(): - # initialize blob manager with path to the blob - bm = BlobManager(blobPath=args.model) -else: - # initialize blob manager with the name of the model otherwise - bm = BlobManager(zooName=args.model) - -nm = NNetManager(nnFamily="YOLO", inputSize=4) -nm.readConfig(CONFIG_PATH) # this will also parse the correct input size - -pm = PipelineManager() -pm.createColorCam(previewSize=nm.inputSize, xout=True) - -# create preview manager -fpsHandler = FPSHandler() -pv = PreviewManager(display=[Previews.color.name], fpsHandler=fpsHandler) - -# create NN with managers -nn = nm.createNN(pipeline=pm.pipeline, nodes=pm.nodes, source=Previews.color.name, - blobPath=bm.getBlob(shaves=6, openvinoVersion=pm.pipeline.getOpenVINOVersion(), zooType="depthai")) -pm.addNn(nn) - -# initialize pipeline -with dai.Device(pm.pipeline) as device: - # create outputs - pv.createQueues(device) - nm.createQueues(device) - - nnData = [] - - while True: - - # parse outputs - pv.prepareFrames() - inNn = nm.outputQueue.tryGet() - - if inNn is not None: - nnData = nm.decode(inNn) - # count FPS - fpsHandler.tick("color") - - nm.draw(pv, nnData) - pv.showFrames() - - if cv2.waitKey(1) == ord('q'): - break - +parser.add_argument("-conf", "--config", help="Trained YOLO json config path", default='model/yolo.json', type=str) +args = ArgsParser.parseArgs(parser) + +with OakCamera(args=args) as oak: + color = oak.create_camera('color') + nn = oak.create_nn(args['config'], color, nnType='yolo', spatial=True) + oak.visualize(nn, fps=True) + oak.start(blocking=True) diff --git a/collecting-training-data/README.md b/outdated/collecting-training-data/README.md similarity index 100% rename from collecting-training-data/README.md rename to outdated/collecting-training-data/README.md diff --git a/coronamask/README.md b/outdated/coronamask/README.md similarity index 100% rename from coronamask/README.md rename to outdated/coronamask/README.md diff --git a/gaze-estimation/README.md b/outdated/gaze-estimation/README.md similarity index 100% rename from gaze-estimation/README.md rename to outdated/gaze-estimation/README.md diff --git a/imu-publishing/README.md b/outdated/imu-publishing/README.md similarity index 100% rename from imu-publishing/README.md rename to outdated/imu-publishing/README.md diff --git a/mjpeg-streaming/README.md b/outdated/mjpeg-streaming/README.md similarity index 100% rename from mjpeg-streaming/README.md rename to outdated/mjpeg-streaming/README.md diff --git a/pcl-projection-rgb/README.md b/outdated/pcl-projection-rgb/README.md similarity index 100% rename from pcl-projection-rgb/README.md rename to outdated/pcl-projection-rgb/README.md diff --git a/pedestrian-reidentification/README.md b/outdated/pedestrian-reidentification/README.md similarity index 100% rename from pedestrian-reidentification/README.md rename to outdated/pedestrian-reidentification/README.md diff --git a/people-counter/README.md b/outdated/people-counter/README.md similarity index 100% rename from people-counter/README.md rename to outdated/people-counter/README.md diff --git a/people-counter/requirements.txt b/outdated/people-counter/requirements.txt similarity index 100% rename from people-counter/requirements.txt rename to outdated/people-counter/requirements.txt diff --git a/people-tracker/README.md b/outdated/people-tracker/README.md similarity index 100% rename from people-tracker/README.md rename to outdated/people-tracker/README.md diff --git a/people-tracker/requirements.txt b/outdated/people-tracker/requirements.txt similarity index 100% rename from people-tracker/requirements.txt rename to outdated/people-tracker/requirements.txt diff --git a/point-cloud-projection/README.md b/outdated/point-cloud-projection/README.md similarity index 100% rename from point-cloud-projection/README.md rename to outdated/point-cloud-projection/README.md diff --git a/rtsp-streaming/README.md b/outdated/rtsp-streaming/README.md similarity index 100% rename from rtsp-streaming/README.md rename to outdated/rtsp-streaming/README.md diff --git a/stereo_on_host/README.md b/outdated/stereo_on_host/README.md similarity index 100% rename from stereo_on_host/README.md rename to outdated/stereo_on_host/README.md diff --git a/triangulation-3D-visualizer/README.md b/outdated/triangulation-3D-visualizer/README.md similarity index 100% rename from triangulation-3D-visualizer/README.md rename to outdated/triangulation-3D-visualizer/README.md diff --git a/two-stage-inference/README.md b/outdated/two-stage-inference/README.md similarity index 100% rename from two-stage-inference/README.md rename to outdated/two-stage-inference/README.md diff --git a/sdk_todo/sdk-efficient-det/README.md b/sdk_todo/sdk-efficient-det/README.md new file mode 100644 index 000000000..77239c8f2 --- /dev/null +++ b/sdk_todo/sdk-efficient-det/README.md @@ -0,0 +1,31 @@ +# EfficientDet + +You can read more about the EfficientDet model in [automl's repo](https://github.com/google/automl/tree/master/efficientdet). + +The NN model is taken from PINTOs [model-zoo](https://github.com/PINTO0309/PINTO_model_zoo/tree/main/018_EfficientDet). +In this experiment we have used `EfficientDet-lite0`, which is the most lightweight one. + +Instructions on how to compile the model yourself: +- Download the `lite0` zip from PINTO's [model-zoo](https://github.com/PINTO0309/PINTO_model_zoo/tree/main/018_EfficientDet) +- Navigate to `FP16/myriad` where you will find model in IR format (`.bin` and `.xml`) +- Compile the IR model into blob ([instructions here](https://docs.luxonis.com/en/latest/pages/model_conversion/)). I have used the online converter. **Note** here that the model's input layer is of type FP16 and you have to specify that as **MyriadX compile params**: `-ip FP16` + +## Demo + +[![Watch the demo](https://user-images.githubusercontent.com/18037362/117892266-4c5bb980-b2b0-11eb-9c0c-68f5da6c2759.gif)](https://www.youtube.com/watch?v=UHXWj9TNGrM) + +## Installation + +``` +python3 -m pip install -r requirements.txt +``` + +If you install requirements, you will use the [NN performance improvements](https://github.com/luxonis/depthai-python/pull/209) branch. This helps with boosting the NN FPS for ~10%. + +## Usage + +Run the application + +``` +python3 main.py +``` \ No newline at end of file diff --git a/sdk_todo/sdk-efficient-det/main.py b/sdk_todo/sdk-efficient-det/main.py new file mode 100644 index 000000000..41f704ad1 --- /dev/null +++ b/sdk_todo/sdk-efficient-det/main.py @@ -0,0 +1,64 @@ +import cv2 +import numpy as np +from depthai import NNData + +from depthai_sdk import OakCamera, Detections + +CONF_THRESHOLD = 0.4 +SHAPE = 320 +coco_90 = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "12", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "26", "backpack", "umbrella", "29", "30", "handbag", "tie", + "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", + "skateboard", "surfboard", "tennis racket", "bottle", "45", "wine glass", "cup", "fork", "knife", "spoon", + "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", + "chair", "couch", "potted plant", "bed", "66", "dining table", "68", "69", "toilet", "71", "tv", "laptop", + "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "83", + "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"] + + +def decode(nn_data: NNData): + bb = np.array(nn_data.getLayerFp16('Identity')).reshape(25, 4) + label = nn_data.getLayerInt32('Identity_1') + conf = nn_data.getLayerFp16('Identity_2') + + dets = Detections(nn_data) + # for det in bb: + + +def callback(packet, visualizer): + frame = packet.frame + nn_data = packet.img_detections + + bb = np.array(nn_data.getLayerFp16('Identity')).reshape(25, 4) + label = nn_data.getLayerInt32('Identity_1') + conf = nn_data.getLayerFp16('Identity_2') + + for i in range(len(conf)): + if CONF_THRESHOLD < conf[i]: + bb_det = bb[i] + # Limit the bounding box to 0..1 + bb_det[bb_det > 1] = 1 + bb_det[bb_det < 0] = 0 + xy_min = (int(bb_det[1] * SHAPE), int(bb_det[0] * SHAPE)) + xy_max = (int(bb_det[3] * SHAPE), int(bb_det[2] * SHAPE)) + # Display detection's BB, label and confidence on the frame + cv2.rectangle(frame, xy_min, xy_max, (255, 0, 0), 2) + cv2.putText(frame, coco_90[label[i]], (xy_min[0] + 10, xy_min[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) + cv2.putText(frame, f"{int(conf[i] * 100)}%", (xy_min[0] + 10, xy_min[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, + 0.5, 255) + + cv2.imshow('EfficientDet', frame) + + +with OakCamera() as oak: + color = oak.create_camera('color', resolution='1080p') + color.node.setFp16(True) + + raise NotImplementedError('TODO: FP16 frames') + + nn = oak.create_nn('models/efficientdet_lite0_2021.3_6shaves.blob', color) + nn.config_nn(aspect_ratio_resize_mode='crop') + + oak.visualize(color, callback=callback) + oak.start(blocking=True) diff --git a/sdk_todo/sdk-efficient-det/models/efficientdet_lite0_2021.3_6shaves.blob b/sdk_todo/sdk-efficient-det/models/efficientdet_lite0_2021.3_6shaves.blob new file mode 100644 index 000000000..ab9fe987f Binary files /dev/null and b/sdk_todo/sdk-efficient-det/models/efficientdet_lite0_2021.3_6shaves.blob differ diff --git a/gen2-multiple-devices/multi-cam-calibration/requirements.txt b/sdk_todo/sdk-efficient-det/requirements.txt similarity index 63% rename from gen2-multiple-devices/multi-cam-calibration/requirements.txt rename to sdk_todo/sdk-efficient-det/requirements.txt index 368c0d7a7..25bbe237e 100644 --- a/gen2-multiple-devices/multi-cam-calibration/requirements.txt +++ b/sdk_todo/sdk-efficient-det/requirements.txt @@ -1,4 +1,3 @@ opencv-python numpy -blobconverter==1.2.8 depthai==2.16.0.0 \ No newline at end of file diff --git a/sdk_todo/sdk-gaze-estimation/README.md b/sdk_todo/sdk-gaze-estimation/README.md new file mode 100644 index 000000000..29fe050e7 --- /dev/null +++ b/sdk_todo/sdk-gaze-estimation/README.md @@ -0,0 +1,46 @@ +[中文文档](README.zh-CN.md) + +# [Gen2] Gaze estimation + +This example demonstrates how to run 3 stage inference (3-series, 2-parallel) on DepthAI using Gen2 Pipeline Builder. + +[![Gaze Example Demo](https://user-images.githubusercontent.com/5244214/106155937-4fa7bb00-6181-11eb-8c23-21abe12f7fe4.gif)](https://user-images.githubusercontent.com/5244214/106155520-0f483d00-6181-11eb-8b95-a2cb73cc4bac.mp4) + + +Original OpenVINO demo, on which this example was made, is official [here](https://docs.openvinotoolkit.org/2021.1/omz_demos_gaze_estimation_demo_README.html) from Intel and implemented with the NCS2 with nice diagrams and explanation, [here](https://github.com/LCTyrell/Gaze_pointer_controller), from @LCTyrell. + +![graph](https://user-images.githubusercontent.com/32992551/103378235-de4fec00-4a9e-11eb-88b2-621180f7edef.jpeg) + +Figure: @LCTyrell + +## Pre-requisites + +Install requirements: +``` +python3 -m pip install -r requirements.txt +``` + +## Usage + +``` +usage: main.py [-h] [-nd] [-cam] [-vid VIDEO] + +optional arguments: + -h, --help show this help message and exit + -nd, --no-debug Prevent debug output + -cam, --camera Use DepthAI 4K RGB camera for inference (conflicts with -vid) + -vid VIDEO, --video VIDEO + Path to video file to be used for inference (conflicts with -cam) +``` + +To use with a video file, run the script with the following arguments + +``` +python3 main.py -vid ./demo.mp4 +``` + +To use with DepthAI 4K RGB camera, use instead + +``` +python3 main.py -cam +``` diff --git a/sdk_todo/sdk-gaze-estimation/README.zh-CN.md b/sdk_todo/sdk-gaze-estimation/README.zh-CN.md new file mode 100644 index 000000000..76552eaf2 --- /dev/null +++ b/sdk_todo/sdk-gaze-estimation/README.zh-CN.md @@ -0,0 +1,47 @@ +[英文文档](README.md) + +# [Gen2] 凝视估计 + +本示例演示了如何使用Gen2 Pipeline Builder在DepthAI上运行3阶段推理(3序列,2并行)。 + +[![Gaze Example Demo](https://user-images.githubusercontent.com/5244214/106155937-4fa7bb00-6181-11eb-8c23-21abe12f7fe4.gif)](https://user-images.githubusercontent.com/5244214/106155520-0f483d00-6181-11eb-8b95-a2cb73cc4bac.mp4) + + +原OpenVINO演示,在其上做这个例子,是官方[在这里](https://docs.openvinotoolkit.org/2021.1/omz_demos_gaze_estimation_demo_README.html), 从英特尔,并与NCS2实现漂亮的图表和说明, [在这里](https://github.com/LCTyrell/Gaze_pointer_controller) @LCTyrell。 + +![graph](https://user-images.githubusercontent.com/32992551/103378235-de4fec00-4a9e-11eb-88b2-621180f7edef.jpeg) + +图: @LCTyrell + +## 先决条件 + +1. 购买DepthAI模型(请参见 [shop.luxonis.com](https://shop.luxonis.com/)) +2. 安装条件 + ```bash + python3 -m pip install -r requirements.txt + ``` + +## 用法 + +``` +用法: main.py [-h] [-nd] [-cam] [-vid VIDEO] + +可选参数: + -h, --help 显示此帮助消息并退出 + -nd, --no-debug 禁止调试输出 + -cam, --camera 使用DepthAI 4K RGB相机进行推理(与-vid冲突) + -vid VIDEO, --video VIDEO + 用于推理的视频文件的路径(与-cam冲突) +``` + +要与视频文件一起使用,请使用以下参数运行脚本 + +``` +python3 main.py -vid ./demo.mp4 +``` + +要与DepthAI 4K RGB相机一起使用,请改用 + +``` +python3 main.py -cam +``` diff --git a/sdk_todo/sdk-gaze-estimation/demo.mp4 b/sdk_todo/sdk-gaze-estimation/demo.mp4 new file mode 100644 index 000000000..df02b8d4e Binary files /dev/null and b/sdk_todo/sdk-gaze-estimation/demo.mp4 differ diff --git a/sdk_todo/sdk-gaze-estimation/main.py b/sdk_todo/sdk-gaze-estimation/main.py new file mode 100644 index 000000000..c2e7025e9 --- /dev/null +++ b/sdk_todo/sdk-gaze-estimation/main.py @@ -0,0 +1,33 @@ +import cv2 +import numpy as np + +from depthai_sdk import OakCamera, AspectRatioResizeMode +from depthai_sdk.oak_outputs.normalize_bb import NormalizeBoundingBox + +CONFIDENCE = 0.7 + + +def callback(packet, visualizer): + nn_data = packet.img_detections + bboxes = np.array(nn_data.getFirstLayerFp16()) + bboxes = bboxes.reshape((bboxes.size // 7, 7)) + + bboxes = bboxes[bboxes[:, 2] > CONFIDENCE][:, 3:7] + + for bbox in bboxes: + bbox = NormalizeBoundingBox((384, 384), AspectRatioResizeMode.LETTERBOX).normalize(packet.frame, bbox) + cv2.rectangle(packet.frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (232, 36, 87), 2) + + cv2.imshow('Gaze estimation', packet.frame) + + +with OakCamera() as oak: + color = oak.create_camera('color') + + raise NotImplementedError('TODO: Multiple inputs are not supported yet') + + face_detection = oak.create_nn('face-detection-retail-0004', color) + gaze_estimation = oak.create_nn('gaze-estimation-adas-0002', face_detection) + + oak.callback(gaze_estimation, callback=callback) + oak.start(blocking=True) diff --git a/sdk_todo/sdk-gaze-estimation/requirements.txt b/sdk_todo/sdk-gaze-estimation/requirements.txt new file mode 100644 index 000000000..5d101aa67 --- /dev/null +++ b/sdk_todo/sdk-gaze-estimation/requirements.txt @@ -0,0 +1,4 @@ +opencv-python==4.5.1.48 +imutils==0.5.4 +blobconverter==1.2.9 +depthai==2.16.0.0 \ No newline at end of file diff --git a/sdk_todo/sdk-human-machine-safety/README.md b/sdk_todo/sdk-human-machine-safety/README.md new file mode 100644 index 000000000..54e5714fa --- /dev/null +++ b/sdk_todo/sdk-human-machine-safety/README.md @@ -0,0 +1,26 @@ +# Gen2 Human-Machine safety + +This example demonstrates how to detect dangerous objects and calculates distance to a human hand (palm). +It uses mobilenet spatial detection netork node to get spatial coordinates of dangerous objects and palm +detection network to detect human palm. Script `palm_detection.py` handles decoding of the [palm detection network](https://google.github.io/mediapipe/solutions/hands#palm-detection-model) and returns the bounding box of a detected palm. +Instead of sending the bounding box of the detected palm back to device to the`SpatialLocationCalculator`, this example +uses function `def calc_spatials(self, bbox, depth):` to calculate spatial coordinates on the host (using bbox and depth map). After we have spatial coordiantes of both the dangerous object and the palm, we calculate the spatial distance of the two and if it's blow the threshold `WARNING_DIST`, it will output a warning. + +## Demo: + +[![Watch the demo](https://user-images.githubusercontent.com/18037362/121198687-a1202f00-c872-11eb-949a-df9f1167494f.gif)](https://www.youtube.com/watch?v=BcjZLaCYGi4) + +## Pre-requisites + +Install requirements: +```bash +python3 -m pip install -r requirements.txt +``` + +## Usage + +```bash + python3 people-detection.py +``` + +> Press 'q' to exit the program. diff --git a/sdk_todo/sdk-human-machine-safety/anchors_palm.npy b/sdk_todo/sdk-human-machine-safety/anchors_palm.npy new file mode 100644 index 000000000..3ce25a3a2 Binary files /dev/null and b/sdk_todo/sdk-human-machine-safety/anchors_palm.npy differ diff --git a/sdk_todo/sdk-human-machine-safety/main.py b/sdk_todo/sdk-human-machine-safety/main.py new file mode 100644 index 000000000..e7f9ba3b4 --- /dev/null +++ b/sdk_todo/sdk-human-machine-safety/main.py @@ -0,0 +1,31 @@ +import blobconverter +import cv2 + +from depthai_sdk import OakCamera +from palm_detection import PalmDetection + +pd = PalmDetection() + + +def callback(sync_dict: dict): + palm_detection_packet = sync_dict['2_out;0_preview'] + mobilenet_packet = sync_dict['3_out;0_preview'] + + frame = pd.run_palm(palm_detection_packet.frame, palm_detection_packet.img_detections) + + cv2.imshow('Machine safety', frame) + + +with OakCamera() as oak: + color = oak.create_camera('color') + stereo = oak.create_stereo(resolution='800p') + + raise NotImplementedError('TODO') + + palm_nn_path = blobconverter.from_zoo(name='palm_detection_128x128', shaves=6, zoo_type='depthai') + palm_detection = oak.create_nn(palm_nn_path, color, spatial=stereo, nn_type='mobilenet') + + mobilenet = oak.create_nn('mobilenet-ssd', color, spatial=stereo) + + oak.sync([palm_detection.out.main, mobilenet.out.main], callback=callback) + oak.start(blocking=True) diff --git a/sdk_todo/sdk-human-machine-safety/palm_detection.py b/sdk_todo/sdk-human-machine-safety/palm_detection.py new file mode 100644 index 000000000..a66167a5c --- /dev/null +++ b/sdk_todo/sdk-human-machine-safety/palm_detection.py @@ -0,0 +1,163 @@ +import numpy as np + +class PalmDetection: + def run_palm(self, frame, nn_data): + """ + Each palm detection is a tensor consisting of 19 numbers: + - ymin, xmin, ymax, xmax + - x,y-coordinates for the 7 key_points + - confidence score + :return: + """ + if nn_data is None: + return + shape = (128, 128) + num_keypoints = 7 + min_score_thresh = 0.7 + anchors = np.load("anchors_palm.npy") + + # Run the neural network + results = self.to_tensor_result(nn_data) + + raw_box_tensor = results.get("regressors").reshape(-1, 896, 18) # regress + raw_score_tensor = results.get("classificators").reshape(-1, 896, 1) # classification + + detections = self.raw_to_detections(raw_box_tensor, raw_score_tensor, anchors, shape, num_keypoints) + + palm_coords = [ + self.frame_norm(frame, *obj[:4]) + for det in detections + for obj in det + if obj[-1] > min_score_thresh + ] + + palm_confs = [ + obj[-1] for det in detections for obj in det if obj[-1] > min_score_thresh + ] + + if len(palm_coords) == 0: + return + + return self.non_max_suppression( + boxes=np.concatenate(palm_coords).reshape(-1, 4), + probs=palm_confs, + overlapThresh=0.1, + ) + + def sigmoid(self, x): + return (1.0 + np.tanh(0.5 * x)) * 0.5 + + def decode_boxes(self, raw_boxes, anchors, shape, num_keypoints): + """ + Converts the predictions into actual coordinates using the anchor boxes. + Processes the entire batch at once. + """ + boxes = np.zeros_like(raw_boxes) + x_scale, y_scale = shape + + x_center = raw_boxes[..., 0] / x_scale * anchors[:, 2] + anchors[:, 0] + y_center = raw_boxes[..., 1] / y_scale * anchors[:, 3] + anchors[:, 1] + + w = raw_boxes[..., 2] / x_scale * anchors[:, 2] + h = raw_boxes[..., 3] / y_scale * anchors[:, 3] + + boxes[..., 1] = y_center - h / 2.0 # xmin + boxes[..., 0] = x_center - w / 2.0 # ymin + boxes[..., 3] = y_center + h / 2.0 # xmax + boxes[..., 2] = x_center + w / 2.0 # ymax + + for k in range(num_keypoints): + offset = 4 + k * 2 + keypoint_x = raw_boxes[..., offset] / x_scale * anchors[:, 2] + anchors[:, 0] + keypoint_y = ( + raw_boxes[..., offset + 1] / y_scale * anchors[:, 3] + anchors[:, 1] + ) + boxes[..., offset] = keypoint_x + boxes[..., offset + 1] = keypoint_y + + return boxes + + def raw_to_detections(self, raw_box_tensor, raw_score_tensor, anchors_, shape, num_keypoints): + """ + + This function converts these two "raw" tensors into proper detections. + Returns a list of (num_detections, 17) tensors, one for each image in + the batch. + + This is based on the source code from: + mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc + mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.proto + """ + detection_boxes = self.decode_boxes(raw_box_tensor, anchors_, shape, num_keypoints) + detection_scores = self.sigmoid(raw_score_tensor).squeeze(-1) + output_detections = [] + for i in range(raw_box_tensor.shape[0]): + boxes = detection_boxes[i] + scores = np.expand_dims(detection_scores[i], -1) + output_detections.append(np.concatenate((boxes, scores), -1)) + return output_detections + + def non_max_suppression(self, boxes, probs=None, angles=None, overlapThresh=0.3): + if len(boxes) == 0: + return [], [] + + if boxes.dtype.kind == "i": + boxes = boxes.astype("float") + + pick = [] + + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + area = (x2 - x1 + 1) * (y2 - y1 + 1) + idxs = y2 + + if probs is not None: + idxs = probs + + idxs = np.argsort(idxs) + + while len(idxs) > 0: + last = len(idxs) - 1 + i = idxs[last] + pick.append(i) + + xx1 = np.maximum(x1[i], x1[idxs[:last]]) + yy1 = np.maximum(y1[i], y1[idxs[:last]]) + xx2 = np.minimum(x2[i], x2[idxs[:last]]) + yy2 = np.minimum(y2[i], y2[idxs[:last]]) + + w = np.maximum(0, xx2 - xx1 + 1) + h = np.maximum(0, yy2 - yy1 + 1) + + overlap = (w * h) / area[idxs[:last]] + + idxs = np.delete( + idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0])) + ) + + if angles is not None: + return boxes[pick].astype("int"), angles[pick] + return boxes[pick].astype("int") + + def to_tensor_result(self, packet): + return { + name: np.array(packet.getLayerFp16(name)) + for name in [tensor.name for tensor in packet.getRaw().tensors] + } + + def frame_norm(self, frame, *xy_vals): + """ + nn data, being the bounding box locations, are in <0..1> range - + they need to be normalized with frame width/height + + :param frame: + :param xy_vals: the bounding box locations + :return: + """ + return ( + np.clip(np.array(xy_vals), 0, 1) + * np.array(frame.shape[:2] * (len(xy_vals) // 2))[::-1] + ).astype(int) diff --git a/sdk_todo/sdk-human-machine-safety/requirements.txt b/sdk_todo/sdk-human-machine-safety/requirements.txt new file mode 100644 index 000000000..1d08ed369 --- /dev/null +++ b/sdk_todo/sdk-human-machine-safety/requirements.txt @@ -0,0 +1,4 @@ +depthai==2.16.0.0 +blobconverter==1.3.0 +opencv-python +numpy \ No newline at end of file