Skip to content

Commit 2261aca

Browse files
committed
Implement suggestions
1 parent c04833a commit 2261aca

File tree

13 files changed

+67
-215
lines changed

13 files changed

+67
-215
lines changed

bindings/python/src/pipeline/CommonBindings.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,22 @@ void CommonBindings::bind(pybind11::module& m, void* pCallstack) {
100100
///////////////////////////////////////////////////////////////////////
101101

102102
keypoint.def(py::init<>())
103-
.def(py::init<Point3f, float, uint32_t>(), py::arg("coordinates"), py::arg("confidence") = 0.f, py::arg("label") = 0)
104-
.def(py::init<Point2f, float, uint32_t>(), py::arg("coordinates"), py::arg("confidence") = 0.f, py::arg("label") = 0)
105-
.def(py::init<float, float, float, float, uint32_t>(), py::arg("x"), py::arg("y"), py::arg("z"), py::arg("confidence") = 0.f, py::arg("label") = 0)
106-
.def_readwrite("imageCoordinates", &Keypoint::imageCoordinates)
107-
.def_readwrite("confidence", &Keypoint::confidence)
108-
.def_readwrite("label", &Keypoint::label);
103+
.def(py::init<Point3f, float, uint32_t>(), py::arg("coordinates"), py::arg("confidence") = 0.f, py::arg("label") = 0, DOC(dai, Keypoint, Keypoint))
104+
.def(py::init<Point2f, float, uint32_t>(), py::arg("coordinates"), py::arg("confidence") = 0.f, py::arg("label") = 0, DOC(dai, Keypoint, Keypoint))
105+
.def(py::init<float, float, float, float, uint32_t>(),
106+
py::arg("x"),
107+
py::arg("y"),
108+
py::arg("z"),
109+
py::arg("confidence") = 0.f,
110+
py::arg("label") = 0,
111+
DOC(dai, Keypoint, Keypoint))
112+
.def_readwrite("imageCoordinates", &Keypoint::imageCoordinates, DOC(dai, Keypoint, imageCoordinates))
113+
.def_readwrite("confidence", &Keypoint::confidence, DOC(dai, Keypoint, confidence))
114+
.def_readwrite("label", &Keypoint::label, DOC(dai, Keypoint, label));
109115

110116
keypointsList.def(py::init<>())
111-
.def(py::init<std::vector<Keypoint>, std::vector<Edge>>(), py::arg("keypoints"), py::arg("edges"))
112-
.def(py::init<std::vector<Keypoint>>(), py::arg("keypoints"))
117+
.def(py::init<std::vector<Keypoint>, std::vector<Edge>>(), py::arg("keypoints"), py::arg("edges"), DOC(dai, KeypointsListT, KeypointsListT))
118+
.def(py::init<std::vector<Keypoint>>(), py::arg("keypoints"), DOC(dai, KeypointsListT, KeypointsListT))
113119
.def(
114120
"setKeypoints",
115121
[](KeypointsList& self, const std::vector<Keypoint>& kps) { self.Base::setKeypoints(kps); },
@@ -422,14 +428,12 @@ void CommonBindings::bind(pybind11::module& m, void* pCallstack) {
422428
.def_readwrite("anchors", &DetectionParserOptions::anchors)
423429
.def_readwrite("anchorMasks", &DetectionParserOptions::anchorMasks)
424430
.def_readwrite("iouThreshold", &DetectionParserOptions::iouThreshold)
425-
.def_readwrite("inputWidth", &DetectionParserOptions::inputWidth)
426-
.def_readwrite("inputHeight", &DetectionParserOptions::inputHeight)
427431
.def_readwrite("decodingFamily", &DetectionParserOptions::decodingFamily)
428432
.def_readwrite("keypointEdges", &DetectionParserOptions::keypointEdges)
429433
.def_readwrite("anchorsV2", &DetectionParserOptions::anchorsV2)
430434
.def_readwrite("decodeKeypoints", &DetectionParserOptions::decodeKeypoints)
431435
.def_readwrite("numKeypoints", &DetectionParserOptions::nKeypoints)
432-
.def_readwrite("outputNames", &DetectionParserOptions::outputNames);
436+
.def_readwrite("outputNames", &DetectionParserOptions::outputNamesToUse);
433437

434438
cameraExposureOffset.value("START", CameraExposureOffset::START).value("MIDDLE", CameraExposureOffset::MIDDLE).value("END", CameraExposureOffset::END);
435439

bindings/python/src/pipeline/datatype/ImgDetectionsBindings.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,12 @@ void bind_imgdetections(pybind11::module& m, void* pCallstack) {
125125
.def("setTransformation", [](ImgDetections& msg, const std::optional<ImgTransformation>& transformation) { msg.transformation = transformation; })
126126
.def("getSegmentationMaskWidth", &ImgDetections::getSegmentationMaskWidth, DOC(dai, ImgDetectionsT, getSegmentationMaskWidth))
127127
.def("getSegmentationMaskHeight", &ImgDetections::getSegmentationMaskHeight, DOC(dai, ImgDetectionsT, getSegmentationMaskHeight))
128-
.def("setMask", &ImgDetections::setMask, py::arg("mask"), py::arg("width"), py::arg("height"), DOC(dai, ImgDetectionsT, setMask))
128+
.def(
129+
"setMask", &ImgDetections::setSegmentationMask, py::arg("mask"), py::arg("width"), py::arg("height"), DOC(dai, ImgDetectionsT, setSegmentationMask))
129130
.def("getMaskData", &ImgDetections::getMaskData, DOC(dai, ImgDetectionsT, getMaskData))
130-
.def("getSegmentationMaskAsImgFrame", &ImgDetections::getSegmentationMaskAsImgFrame, DOC(dai, ImgDetectionsT, getSegmentationMaskAsImgFrame))
131+
.def("getSegmentationMaskAsImgFrame", &ImgDetections::getSegmentationMask, DOC(dai, ImgDetectionsT, getSegmentationMask))
131132
#ifdef DEPTHAI_HAVE_OPENCV_SUPPORT
132-
.def(
133-
"getSegmentationMask", [](ImgDetections& self) { return self.getSegmentationMask(false); }, DOC(dai, ImgDetectionsT, getSegmentationMask))
134-
.def("setSegmentationMask", &ImgDetections::setSegmentationMask, py::arg("mask"), DOC(dai, ImgDetectionsT, setSegmentationMask))
133+
.def("setSegmentationMask", &ImgDetections::setCvSegmentationMask, py::arg("mask"), DOC(dai, ImgDetectionsT, setCvSegmentationMask))
135134
.def(
136135
"getCvSegmentationMask",
137136
[](ImgDetections& self) { return self.getCvSegmentationMask(&g_numpyAllocator); },

bindings/python/src/pipeline/node/DetectionParserBindings.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ void bind_detectionparser(pybind11::module& m, void* pCallstack) {
6868
.def("setSubtype", &DetectionParser::setSubtype, py::arg("subtype"), DOC(dai, node, DetectionParser, setSubtype))
6969
.def("setDecodeKeypoints", &DetectionParser::setDecodeKeypoints, py::arg("decode"), DOC(dai, node, DetectionParser, setDecodeKeypoints))
7070
.def("setDecodeSegmentation", &DetectionParser::setDecodeSegmentation, py::arg("decode"), DOC(dai, node, DetectionParser, setDecodeSegmentation))
71-
.def("setNKeypoints", &DetectionParser::setNKeypoints, py::arg("nKeypoints"), DOC(dai, node, DetectionParser, setNKeypoints))
71+
.def("setNumKeypoints", &DetectionParser::setNumKeypoints, py::arg("numKeypoints"), DOC(dai, node, DetectionParser, setNumKeypoints))
7272
.def("setClasses", &DetectionParser::setClasses, py::arg("classes"), DOC(dai, node, DetectionParser, setClasses))
7373
.def("setStrides", &DetectionParser::setStrides, py::arg("strides"), DOC(dai, node, DetectionParser, setStrides))
7474
.def("setKeypointEdges", &DetectionParser::setKeypointEdges, py::arg("edges"), DOC(dai, node, DetectionParser, setKeypointEdges))

examples/cpp/DetectionNetwork/RVC4/detection_and_keypoints.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ int main() {
2424
dai::NNModelDescription modelDescription;
2525
modelDescription.model = "luxonis/yolov8-large-pose-estimation:coco-640x352:1868e39";
2626
detectionNetwork->build(cameraNode, modelDescription);
27-
auto labelMap = detectionNetwork->getClasses();
2827

2928
// Create output queues
3029
auto qRgb = detectionNetwork->passthrough.createOutputQueue();
@@ -62,7 +61,7 @@ int main() {
6261
auto bbox = frameNorm(frame, dai::Point2f(detection.xmin, detection.ymin), dai::Point2f(detection.xmax, detection.ymax));
6362

6463
// Draw label
65-
cv::putText(frame, labelMap.value()[detection.label], cv::Point(bbox.x + 10, bbox.y + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, textColor);
64+
cv::putText(frame, detection.labelName, cv::Point(bbox.x + 10, bbox.y + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, textColor);
6665

6766
// Draw confidence
6867
cv::putText(frame,

examples/cpp/DetectionNetwork/RVC4/detection_and_segmentation.cpp

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <cstddef>
55
#include <cstdio>
66
#include <iostream>
7+
#include <map>
78
#include <opencv2/core.hpp>
89
#include <opencv2/opencv.hpp>
910

@@ -29,7 +30,6 @@ int main() {
2930
dai::NNModelDescription modelDescription;
3031
modelDescription.model = "luxonis/yolov8-instance-segmentation-large:coco-640x480";
3132
detectionNetwork->build(cameraNode, modelDescription);
32-
auto labelMap = detectionNetwork->getClasses();
3333

3434
// Create output queues
3535
auto qRgb = detectionNetwork->passthrough.createOutputQueue();
@@ -65,24 +65,30 @@ int main() {
6565
if(inDet != nullptr) {
6666
counter++;
6767

68-
// Get all labels as sorted list
6968
auto labels = std::set<int>();
69+
std::map<int, std::string> labelNameByIndex;
7070
for(const auto& detection : inDet->detections) {
7171
labels.insert(detection.label);
72+
labelNameByIndex.emplace(detection.label, detection.labelName);
73+
}
74+
75+
std::vector<std::string> labelNames;
76+
labelNames.reserve(labelNameByIndex.size());
77+
for(const auto& label : labels) {
78+
const auto it = labelNameByIndex.find(label);
79+
if(it != labelNameByIndex.end()) {
80+
labelNames.push_back(it->second);
81+
}
7282
}
7383
std::list<int> labelsList(labels.begin(), labels.end());
7484
labelsList.sort();
7585
std::vector<int> labelsVector(labelsList.begin(), labelsList.end());
7686

77-
std::vector<std::string> labelMaps;
78-
for(const auto& label : labelsList) {
79-
labelMaps.push_back(labelMap->at(label));
80-
};
8187
cv::putText(sidePanel, "Press index to filter by class:", cv::Point(10, 20), cv::FONT_HERSHEY_TRIPLEX, 0.7, cv::Scalar(0, 0, 0), 1);
8288

83-
for(size_t i = 0; i < labelMaps.size(); i++) {
89+
for(size_t i = 0; i < labelNames.size(); i++) {
8490
cv::putText(sidePanel,
85-
std::to_string(i) + " - " + labelMaps[i],
91+
std::to_string(i + 1) + " - " + labelNames[i],
8692
cv::Point(10, 40 + static_cast<int>(i) * 20),
8793
cv::FONT_HERSHEY_TRIPLEX,
8894
0.7,
@@ -98,7 +104,7 @@ int main() {
98104
} else if(key >= '1' && key <= '9') {
99105
int index = key - '1';
100106
if(index < static_cast<int>(labelsList.size())) {
101-
std::printf("Filtering by label: %s\n", labelMaps[index].c_str());
107+
std::printf("Filtering by label: %s\n", labelNames[index].c_str());
102108
filteredLabel = labelsVector[index];
103109
}
104110
}
@@ -111,7 +117,7 @@ int main() {
111117
std::optional<cv::Mat> segmentationMask;
112118

113119
if(filteredLabel == -1) {
114-
segmentationMask = inDet->getSegmentationMask();
120+
segmentationMask = inDet->getCvSegmentationMask();
115121
} else {
116122
segmentationMask = inDet->getCvSegmentationMaskByClass(filteredLabel);
117123
detections.erase(
@@ -137,7 +143,7 @@ int main() {
137143
auto bbox = frameNorm(frame, dai::Point2f(detection.xmin, detection.ymin), dai::Point2f(detection.xmax, detection.ymax));
138144

139145
// Draw label
140-
cv::putText(frame, labelMap.value()[detection.label], cv::Point(bbox.x + 10, bbox.y + 20), cv::FONT_HERSHEY_TRIPLEX, 0.7, textColor);
146+
cv::putText(frame, detection.labelName, cv::Point(bbox.x + 10, bbox.y + 20), cv::FONT_HERSHEY_TRIPLEX, 0.7, textColor);
141147

142148
// Draw confidence
143149
cv::putText(frame,
@@ -155,13 +161,9 @@ int main() {
155161
// cv::imshow("side panel", sidePanel);
156162
// Show the frame
157163
cv::imshow("rgb", frame);
158-
159-
auto currentTime = std::chrono::steady_clock::now();
160-
float fps = counter / std::chrono::duration<float>(currentTime - startTime).count();
161-
std::cout << "FPS: " << fps << std::endl;
162164
}
163165
}
164166
}
165167

166168
return 0;
167-
}
169+
}

examples/python/DetectionNetwork/RVC4/detection_and_keypoints.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def displayFrame(name, frame):
3737
)
3838
cv2.putText(
3939
frame,
40-
labelMap[detection.label],
40+
detection.labelName,
4141
(bbox[0] + 10, bbox[1] + 20),
4242
cv2.FONT_HERSHEY_TRIPLEX,
4343
0.5,

examples/python/DetectionNetwork/RVC4/detection_and_segmentation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
cameraNode = pipeline.create(dai.node.Camera).build()
1111
detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription("luxonis/yolov8-instance-segmentation-large:coco-640x480"))
1212
labelMap = detectionNetwork.getClasses()
13-
13+
assert labelMap is not None
1414
qRgb = detectionNetwork.passthrough.createOutputQueue()
1515
qDet = detectionNetwork.out.createOutputQueue()
1616

@@ -37,7 +37,7 @@ def displayFrame(frame):
3737
)
3838
cv2.putText(
3939
frame,
40-
labelMap[detection.label],
40+
detection.labelName,
4141
(bbox[0] + 10, bbox[1] + 20),
4242
cv2.FONT_HERSHEY_TRIPLEX,
4343
0.7,

include/depthai/common/DetectionParserOptions.hpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ struct DetectionParserOptions {
2020
DetectionNetworkType nnFamily;
2121
std::string subtype;
2222
float confidenceThreshold;
23-
int inputWidth;
24-
int inputHeight;
2523

2624
/// YOLO specific network properties
2725
YoloDecodingFamily decodingFamily = YoloDecodingFamily::TLBR; // top left bottom right anchor free
@@ -35,7 +33,7 @@ struct DetectionParserOptions {
3533
std::vector<int> strides = {8, 16, 32};
3634
std::vector<float> anchors;
3735
std::map<std::string, std::vector<int>> anchorMasks;
38-
std::vector<std::string> outputNames;
36+
std::vector<std::string> outputNamesToUse;
3937
/// see YoloDetectionNetwork::setAnchors() for format
4038
std::vector<std::vector<std::vector<float>>> anchorsV2;
4139
float iouThreshold;
@@ -46,8 +44,6 @@ DEPTHAI_SERIALIZE_EXT(DetectionParserOptions,
4644
nnFamily,
4745
subtype,
4846
confidenceThreshold,
49-
inputWidth,
50-
inputHeight,
5147
decodingFamily,
5248
decodeKeypoints,
5349
decodeSegmentation,
@@ -58,7 +54,7 @@ DEPTHAI_SERIALIZE_EXT(DetectionParserOptions,
5854
strides,
5955
anchors,
6056
anchorMasks,
61-
outputNames,
57+
outputNamesToUse,
6258
anchorsV2,
6359
iouThreshold,
6460
keypointEdges);

include/depthai/pipeline/datatype/ImgDetectionsT.hpp

Lines changed: 3 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,6 @@
88
#include "depthai/pipeline/datatype/Buffer.hpp"
99
#include "depthai/pipeline/datatype/ImgFrame.hpp"
1010

11-
#ifdef DEPTHAI_XTENSOR_SUPPORT
12-
#include <xtensor/containers/xadapt.hpp>
13-
#include <xtensor/containers/xbuffer_adaptor.hpp>
14-
#include <xtensor/core/xlayout.hpp>
15-
#include <xtensor/core/xmath.hpp>
16-
#include <xtensor/core/xtensor_forward.hpp>
17-
18-
#endif
19-
2011
#ifdef DEPTHAI_HAVE_OPENCV_SUPPORT
2112
#include <opencv2/core/mat.hpp>
2213
#include <opencv2/opencv.hpp>
@@ -39,42 +30,6 @@ class ImgDetectionsT : public Buffer {
3930
ImgDetectionsT() = default;
4031
virtual ~ImgDetectionsT() = default;
4132

42-
// Iterator support
43-
using value_type = DetectionT;
44-
using iterator = typename std::vector<DetectionT>::iterator;
45-
using const_iterator = typename std::vector<DetectionT>::const_iterator;
46-
47-
iterator begin() noexcept {
48-
return detections.begin();
49-
}
50-
iterator end() noexcept {
51-
return detections.end();
52-
}
53-
const_iterator begin() const noexcept {
54-
return detections.begin();
55-
}
56-
const_iterator end() const noexcept {
57-
return detections.end();
58-
}
59-
const_iterator cbegin() const noexcept {
60-
return detections.cbegin();
61-
}
62-
const_iterator cend() const noexcept {
63-
return detections.cend();
64-
}
65-
bool empty() const noexcept {
66-
return detections.empty();
67-
}
68-
size_t size() const noexcept {
69-
return detections.size();
70-
}
71-
value_type& operator[](size_t i) {
72-
return detections[i];
73-
}
74-
const value_type& operator[](size_t i) const {
75-
return detections[i];
76-
}
77-
7833
/*
7934
* Common API
8035
*/
@@ -93,38 +48,14 @@ class ImgDetectionsT : public Buffer {
9348
* Sets the segmentation mask from a vector of bytes, along with width and height.
9449
* The size of the vector must be equal to width * height.
9550
*/
96-
void setMask(const std::vector<std::uint8_t>& mask, size_t width, size_t height);
51+
void setSegmentationMask(const std::vector<std::uint8_t>& mask, size_t width, size_t height);
9752

9853
/*
9954
* Returns a copy of the segmentation mask data as a vector of bytes. If mask data is not set, returns std::nullopt.
10055
*/
10156
std::optional<std::vector<std::uint8_t>> getMaskData() const;
10257

103-
std::optional<dai::ImgFrame> getSegmentationMaskAsImgFrame() const;
104-
105-
// Optional - xtensor support
106-
#ifdef DEPTHAI_XTENSOR_SUPPORT
107-
/**
108-
* @note This API only available if xtensor support is enabled
109-
*/
110-
using XArray2D = xt::xtensor<std::uint8_t, 2, xt::layout_type::row_major>;
111-
112-
/**
113-
* Returns a copy of the segmentation mask data as a 2D array. If mask data is not set, returns std::nullopt.
114-
*/
115-
std::optional<XArray2D> getTensorSegmentationMask() const;
116-
117-
/**
118-
* Sets the segmentation mask from a 2D xtensor array.
119-
*/
120-
ImgDetectionsT& setTensorSegmentationMask(XArray2D mask);
121-
122-
/*
123-
* Returns a binary mask where pixels belonging to the instance index are set to 1, others to 0. If mask data is not set, returns std::nullopt.
124-
*/
125-
std::optional<XArray2D> getTensorSegmentationMaskByIndex(uint8_t index) const;
126-
127-
#endif
58+
std::optional<dai::ImgFrame> getSegmentationMask() const;
12859

12960
// Optional - OpenCV support
13061
#ifdef DEPTHAI_HAVE_OPENCV_SUPPORT
@@ -137,14 +68,7 @@ class ImgDetectionsT : public Buffer {
13768
*
13869
* @param frame Input cv::Mat frame from which to copy the data
13970
*/
140-
ImgDetectionsT& setSegmentationMask(cv::Mat mask);
141-
142-
/**
143-
* Retrieves data as cv::Mat with specified width, height and type. If mask data is not set, returns std::nullopt.
144-
*
145-
* @param copy If false only a reference to data is made, otherwise a copy
146-
*/
147-
std::optional<cv::Mat> getSegmentationMask(bool copy = false);
71+
void setCvSegmentationMask(cv::Mat mask);
14872

14973
/**
15074
* Retrieves data as cv::Mat with specified width and height. If mask data is not set, returns std::nullopt.

include/depthai/pipeline/node/DetectionParser.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ class DetectionParser : public DeviceNodeCRTP<DeviceNode, DetectionParser, Detec
194194
/**
195195
* Set number of keypoints to decode. Automatically enables keypoints decoding.
196196
*/
197-
void setNKeypoints(int nKeypoints);
197+
void setNumKeypoints(int numKeypoints);
198198

199199
/**
200200
* Set strides for yolo models

0 commit comments

Comments
 (0)