diff --git a/bindings/python/src/pipeline/datatype/TransformDataBindings.cpp b/bindings/python/src/pipeline/datatype/TransformDataBindings.cpp index 60ff9e0678..2ea3f57104 100644 --- a/bindings/python/src/pipeline/datatype/TransformDataBindings.cpp +++ b/bindings/python/src/pipeline/datatype/TransformDataBindings.cpp @@ -31,7 +31,19 @@ void bind_transformdata(pybind11::module& m, void* pCallstack) { // Metadata / raw transformData.def(py::init<>()) + .def(py::init(), + py::arg("x"), + py::arg("y"), + py::arg("z"), + py::arg("qx"), + py::arg("qy"), + py::arg("qz"), + py::arg("qw"), + py::arg("frameID"), + py::arg("parentFrameID")) .def("__repr__", &TransformData::str) + .def_readwrite("frameID", &TransformData::frameID) + .def_readwrite("parentFrameID", &TransformData::parentFrameID) .def("getTranslation", &TransformData::getTranslation, DOC(dai, TransformData, getTranslation)) .def("getRotationEuler", &TransformData::getRotationEuler, DOC(dai, TransformData, getRotationEuler)) .def("getQuaternion", &TransformData::getQuaternion, DOC(dai, TransformData, getQuaternion)); diff --git a/examples/python/Transforms/transform_example.py b/examples/python/Transforms/transform_example.py new file mode 100644 index 0000000000..5263045c46 --- /dev/null +++ b/examples/python/Transforms/transform_example.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 + +from pathlib import Path +import sys +import cv2 +import depthai as dai +import numpy as np +import time + +modelDescription = dai.NNModelDescription("yolov6-nano") +FPS = 30 + +class SpatialVisualizer(dai.node.HostNode): + def __init__(self): + dai.node.HostNode.__init__(self) + self.sendProcessingToPipeline(True) + def build(self, depth:dai.Node.Output, detections: dai.Node.Output, rgb: dai.Node.Output): + self.link_args(depth, detections, rgb) # Must match the inputs to the process method + + def process(self, depthPreview, detections, rgbPreview): + depthPreview = depthPreview.getCvFrame() + rgbPreview = rgbPreview.getCvFrame() + depthFrameColor = self.processDepthFrame(depthPreview) + self.displayResults(rgbPreview, depthFrameColor, detections.detections) + + def processDepthFrame(self, depthFrame): + depth_downscaled = depthFrame[::4] + if np.all(depth_downscaled == 0): + min_depth = 0 + else: + min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1) + max_depth = np.percentile(depth_downscaled, 99) + depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8) + return cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT) + + def displayResults(self, rgbFrame, depthFrameColor, detections): + height, width, _ = rgbFrame.shape + for detection in detections: + self.drawBoundingBoxes(depthFrameColor, detection) + self.drawDetections(rgbFrame, detection, width, height) + + cv2.imshow("depth", depthFrameColor) + cv2.imshow("rgb", rgbFrame) + if cv2.waitKey(1) == ord('q'): + self.stopPipeline() + + def drawBoundingBoxes(self, depthFrameColor, detection): + roiData = detection.boundingBoxMapping + roi = roiData.roi + roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0]) + topLeft = roi.topLeft() + bottomRight = roi.bottomRight() + cv2.rectangle(depthFrameColor, (int(topLeft.x), int(topLeft.y)), (int(bottomRight.x), int(bottomRight.y)), (255, 255, 255), 1) + + def drawDetections(self, frame, detection, frameWidth, frameHeight): + x1 = int(detection.xmin * frameWidth) + x2 = int(detection.xmax * frameWidth) + y1 = int(detection.ymin * frameHeight) + y2 = int(detection.ymax * frameHeight) + try: + label = self.labelMap[detection.label] # Ensure labelMap is accessible + except IndexError: + label = detection.label + color = (255, 255, 255) + cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) + cv2.putText(frame, "{:.2f}".format(detection.confidence * 100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) + cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) + cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) + cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) + cv2.rectangle(frame, (x1, y1), (x2, y2), color, 1) + + def getDetectionInGlobalFrame(self, detection: dai.SpatialImgDetection): + detTransform = dai.TransformData(detection.spatialCoordinates.x, detection.spatialCoordinates.y, detection.spatialCoordinates.z, "detections", "left_camera_frame") + globalTransHandler = self.getParentPipeline().getGlobalTransformHandler() + globalTransHandler.addTransform(detTransform) + # detTransformWorld = globalTransHandler.lookupTransform(detTransform, "world") + detTransformWorld = globalTransHandler.lookupTransform("detections", "world") + +# Creates the pipeline and a default device implicitly +with dai.Pipeline() as p: + # Define sources and outputs + camRgb = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A) + monoLeft = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_B) + monoRight = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_C) + + stereo = p.create(dai.node.StereoDepth) + spatialDetectionNetwork = p.create(dai.node.SpatialDetectionNetwork).build(camRgb, stereo, modelDescription, fps=FPS) + visualizer = p.create(SpatialVisualizer) + + # setting node configs + platform = p.getDefaultDevice().getPlatform() + if platform == dai.Platform.RVC2: + # For RVC2, width must be divisible by 16 + stereo.setOutputSize(640, 400) + + spatialDetectionNetwork.input.setBlocking(False) + spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5) + spatialDetectionNetwork.setDepthLowerThreshold(100) + spatialDetectionNetwork.setDepthUpperThreshold(5000) + + # Linking + monoLeft.requestOutput((640, 400)).link(stereo.left) + monoRight.requestOutput((640, 400)).link(stereo.right) + visualizer.labelMap = spatialDetectionNetwork.getClasses() + + visualizer.build(stereo.depth, spatialDetectionNetwork.out, spatialDetectionNetwork.passthrough) + + + p.run() diff --git a/include/depthai/pipeline/GlobalTransformHandler.hpp b/include/depthai/pipeline/GlobalTransformHandler.hpp new file mode 100644 index 0000000000..2fb9104af4 --- /dev/null +++ b/include/depthai/pipeline/GlobalTransformHandler.hpp @@ -0,0 +1,52 @@ +#pragma once + +#include + +#include "depthai/common/Timestamp.hpp" +#include "depthai/pipeline/datatype/TransformData.hpp" + +namespace dai { + +namespace transforms { + +enum class DaiCoordinateSystem { RDF = 0, FLU }; + + +class DeviceTransformHandler { + public: + DeviceTransformHandler(const DeviceTransformHandler&) = default; + DeviceTransformHandler(DeviceTransformHandler&&) = default; + DeviceTransformHandler& operator=(const DeviceTransformHandler&) = default; + DeviceTransformHandler& operator=(DeviceTransformHandler&&) = default; + void initializeDeviceTransformTree(); + void setCoordinateSystem(DaiCoordinateSystem system); + private: + DaiCoordinateSystem coordSys; +}; + +class GlobalTransformHandler { + public: + + GlobalTransformHandler(const GlobalTransformHandler&) = default; + GlobalTransformHandler(GlobalTransformHandler&&) = default; + GlobalTransformHandler& operator=(const GlobalTransformHandler&) = default; + GlobalTransformHandler& operator=(GlobalTransformHandler&&) = default; + void addTransform(std::shared_ptr transform); + void addStaticTransform(std::shared_ptr transform); + void removeTransform(const std::string& transformName); + std::shared_ptr lookupTransform(std::shared_ptr from, + std::shared_ptr to, + dai::Timestamp maxInterval); + std::shared_ptr lookupTransform(const std::string& from, const std::string& to, dai::Timestamp maxInterval); + std::shared_ptr lookupTransform(std::shared_ptr from, const std::string& to, dai::Timestamp maxInterval); + std::shared_ptr lookupTransform(const std::string& from, std::shared_ptr to, dai::Timestamp maxInterval); + std::vector> getTransformTree(); + void setCoordinateSystem(DaiCoordinateSystem system); + + private: + std::vector> transforms; + std::vector> staticTransforms; + DaiCoordinateSystem coordSys; +}; +} // namespace transforms +} // namespace dai diff --git a/include/depthai/pipeline/datatype/TransformData.hpp b/include/depthai/pipeline/datatype/TransformData.hpp index b27d6edb69..6e73944295 100644 --- a/include/depthai/pipeline/datatype/TransformData.hpp +++ b/include/depthai/pipeline/datatype/TransformData.hpp @@ -22,10 +22,11 @@ class TransformData : public Buffer { * Construct TransformData message. */ TransformData(); - TransformData(const Transform& transform); - TransformData(const std::array, 4>& data); - TransformData(double x, double y, double z, double qx, double qy, double qz, double qw); - TransformData(double x, double y, double z, double roll, double pitch, double yaw); + TransformData(const Transform& transform, const std::string& frameId = "", const std::string& parentFrameID = ""); + TransformData(const std::array, 4>& data, const std::string& frameId = "", const std::string& parentFrameID = ""); + TransformData( + double x, double y, double z, double qx, double qy, double qz, double qw, const std::string& frameID = "", const std::string& parentFrameID = ""); + TransformData(double x, double y, double z, double roll, double pitch, double yaw, const std::string& frameID = "", const std::string& parentFrameID = ""); #ifdef DEPTHAI_HAVE_RTABMAP_SUPPORT TransformData(const rtabmap::Transform& transformRTABMap); @@ -35,6 +36,8 @@ class TransformData : public Buffer { /// Transform Transform transform; + std::string frameID; + std::string parentFrameID; void serialize(std::vector& metadata, DatatypeEnum& datatype) const override { metadata = utility::serialize(*this); @@ -45,7 +48,7 @@ class TransformData : public Buffer { Point3d getRotationEuler() const; Quaterniond getQuaternion() const; - DEPTHAI_SERIALIZE(TransformData, Buffer::sequenceNum, Buffer::ts, Buffer::tsDevice, transform); + DEPTHAI_SERIALIZE(TransformData, Buffer::sequenceNum, Buffer::ts, Buffer::tsDevice, transform, frameID, parentFrameID); }; } // namespace dai diff --git a/src/pipeline/datatype/TransformData.cpp b/src/pipeline/datatype/TransformData.cpp index ce18770e93..2bcb213ea3 100644 --- a/src/pipeline/datatype/TransformData.cpp +++ b/src/pipeline/datatype/TransformData.cpp @@ -9,9 +9,13 @@ namespace dai { TransformData::TransformData() {} -TransformData::TransformData(const Transform& transform) : transform(transform) {} -TransformData::TransformData(const std::array, 4>& data) : transform({data}) {} -TransformData::TransformData(double x, double y, double z, double qx, double qy, double qz, double qw) { +TransformData::TransformData(const Transform& transform, const std::string& frameID, const std::string& parentFrameID) + : transform(transform), frameID(frameID), parentFrameID(parentFrameID) {} +TransformData::TransformData(const std::array, 4>& data, const std::string& frameID, const std::string& parentFrameID) + : transform({data}), frameID(frameID), parentFrameID(parentFrameID) {} +TransformData::TransformData( + double x, double y, double z, double qx, double qy, double qz, double qw, const std::string& frameID, const std::string& parentFrameID) + : frameID(frameID), parentFrameID(parentFrameID) { // x,y,z,qx,qy,qz,qw to homography matrix double n = 1.0 / sqrt(qx * qx + qy * qy + qz * qz + qw * qw); qx *= n; @@ -23,7 +27,8 @@ TransformData::TransformData(double x, double y, double z, double qx, double qy, {2.0 * qx * qz - 2.0 * qy * qw, 2.0 * qy * qz + 2.0 * qx * qw, 1.0 - 2.0 * qx * qx - 2.0 * qy * qy, z}, {0.0, 0.0, 0.0, 1.0}}}; } -TransformData::TransformData(double x, double y, double z, double roll, double pitch, double yaw) { +TransformData::TransformData(double x, double y, double z, double roll, double pitch, double yaw, const std::string& frameID, const std::string& parentFrameID) + : frameID(frameID), parentFrameID(parentFrameID) { // x,y,z,r,p,yw to homography matrix double cr = cos(roll), sr = sin(roll); double cp = cos(pitch), sp = sin(pitch);