-
Notifications
You must be signed in to change notification settings - Fork 19
CVS-160560 nanobindings for keypoint detection and segmentation #258
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
18e2551
6e8dca8
9ec7c68
6cbfc27
6c78d65
745fdcf
d706107
5ce76c3
b54f769
00f8593
bafaa5f
ecef0fa
752647b
605ab88
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| /* | ||
| * Copyright (C) 2025 Intel Corporation | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
| #include <nanobind/ndarray.h> | ||
| #include <nanobind/operators.h> | ||
| #include <nanobind/stl/map.h> | ||
| #include <nanobind/stl/string.h> | ||
| #include <nanobind/stl/unique_ptr.h> | ||
| #include <nanobind/stl/vector.h> | ||
|
|
||
| #include "models/anomaly_model.h" | ||
| #include "models/results.h" | ||
| #include "py_utils.hpp" | ||
|
|
||
| namespace pyutils = vision::nanobind::utils; | ||
|
|
||
| void init_anomaly_detection(nb::module_& m) { | ||
| nb::class_<AnomalyModel, ImageModel>(m, "AnomalyDetection") | ||
| .def_static( | ||
| "create_model", | ||
| [](const std::string& model_path, | ||
| const std::map<std::string, nb::object>& configuration, | ||
| bool preload, | ||
| const std::string& device) { | ||
| auto ov_any_config = ov::AnyMap(); | ||
| for (const auto& item : configuration) { | ||
| ov_any_config[item.first] = pyutils::py_object_to_any(item.second, item.first); | ||
| } | ||
|
|
||
| return AnomalyModel::create_model(model_path, ov_any_config, preload, device); | ||
| }, | ||
| nb::arg("model_path"), | ||
| nb::arg("configuration") = ov::AnyMap({}), | ||
| nb::arg("preload") = true, | ||
| nb::arg("device") = "AUTO") | ||
|
|
||
| .def("__call__", | ||
| [](AnomalyModel& self, const nb::ndarray<>& input) { | ||
| return self.infer(pyutils::wrap_np_mat(input)); | ||
| }) | ||
| .def("infer_batch", | ||
| [](AnomalyModel& self, const std::vector<nb::ndarray<>> inputs) { | ||
| std::vector<ImageInputData> input_mats; | ||
| input_mats.reserve(inputs.size()); | ||
|
|
||
| for (const auto& input : inputs) { | ||
| input_mats.push_back(pyutils::wrap_np_mat(input)); | ||
| } | ||
|
|
||
| return self.inferBatch(input_mats); | ||
| }) | ||
| .def_prop_ro_static("__model__", [](nb::object) { | ||
| return AnomalyModel::ModelType; | ||
| }); | ||
|
|
||
| nb::class_<AnomalyResult, ResultBase>(m, "AnomalyResult") | ||
| .def(nb::init<int64_t, std::shared_ptr<MetaData>>(), nb::arg("frameId") = -1, nb::arg("metaData") = nullptr) | ||
| .def_prop_ro( | ||
| "anomaly_map", | ||
| [](AnomalyResult& r) { | ||
| return nb::ndarray<uint8_t, nb::numpy, nb::c_contig>(r.anomaly_map.data, | ||
| {static_cast<size_t>(r.anomaly_map.rows), | ||
| static_cast<size_t>(r.anomaly_map.cols), | ||
| static_cast<size_t>(r.anomaly_map.channels())}); | ||
| }, | ||
| nb::rv_policy::reference_internal) | ||
| .def_prop_ro( | ||
| "pred_boxes", | ||
| [](AnomalyResult& r) { | ||
| return nb::ndarray<int, nb::numpy, nb::c_contig>(r.pred_boxes.data(), | ||
| {static_cast<size_t>(r.pred_boxes.size()), 4}); | ||
| }, | ||
| nb::rv_policy::reference_internal) | ||
| .def_ro("pred_label", &AnomalyResult::pred_label) | ||
| .def_prop_ro( | ||
| "pred_mask", | ||
| [](AnomalyResult& r) { | ||
| return nb::ndarray<uint8_t, nb::numpy, nb::c_contig>(r.pred_mask.data, | ||
| {static_cast<size_t>(r.pred_mask.rows), | ||
| static_cast<size_t>(r.pred_mask.cols), | ||
| static_cast<size_t>(r.pred_mask.channels())}); | ||
| }, | ||
| nb::rv_policy::reference_internal) | ||
| .def_ro("pred_score", &AnomalyResult::pred_score); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| /* | ||
| * Copyright (C) 2025 Intel Corporation | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| #include <nanobind/ndarray.h> | ||
| #include <nanobind/operators.h> | ||
| #include <nanobind/stl/map.h> | ||
| #include <nanobind/stl/string.h> | ||
| #include <nanobind/stl/unique_ptr.h> | ||
| #include <nanobind/stl/vector.h> | ||
|
|
||
| #include "models/instance_segmentation.h" | ||
| #include "models/results.h" | ||
| #include "py_utils.hpp" | ||
|
|
||
| namespace pyutils = vision::nanobind::utils; | ||
|
|
||
| void init_instance_segmentation(nb::module_& m) { | ||
| nb::class_<MaskRCNNModel, ImageModel>(m, "MaskRCNNModel") | ||
| .def_static( | ||
| "create_model", | ||
| [](const std::string& model_path, | ||
| const std::map<std::string, nb::object>& configuration, | ||
| bool preload, | ||
| const std::string& device) { | ||
| auto ov_any_config = ov::AnyMap(); | ||
| for (const auto& item : configuration) { | ||
| ov_any_config[item.first] = pyutils::py_object_to_any(item.second, item.first); | ||
| } | ||
|
|
||
| return MaskRCNNModel::create_model(model_path, ov_any_config, preload, device); | ||
| }, | ||
| nb::arg("model_path"), | ||
| nb::arg("configuration") = ov::AnyMap({}), | ||
| nb::arg("preload") = true, | ||
| nb::arg("device") = "AUTO") | ||
|
|
||
| .def("__call__", | ||
| [](MaskRCNNModel& self, const nb::ndarray<>& input) { | ||
| return self.infer(pyutils::wrap_np_mat(input)); | ||
| }) | ||
| .def("infer_batch", | ||
| [](MaskRCNNModel& self, const std::vector<nb::ndarray<>> inputs) { | ||
| std::vector<ImageInputData> input_mats; | ||
| input_mats.reserve(inputs.size()); | ||
|
|
||
| for (const auto& input : inputs) { | ||
| input_mats.push_back(pyutils::wrap_np_mat(input)); | ||
| } | ||
|
|
||
| return self.inferBatch(input_mats); | ||
| }) | ||
| .def_prop_ro_static("__model__", [](nb::object) { | ||
| return MaskRCNNModel::ModelType; | ||
| }); | ||
|
|
||
| nb::class_<InstanceSegmentationResult, ResultBase>(m, "InstanceSegmentationResult") | ||
| .def(nb::init<int64_t, std::shared_ptr<MetaData>>(), nb::arg("frameId") = -1, nb::arg("metaData") = nullptr) | ||
| .def_prop_ro( | ||
| "feature_vector", | ||
| [](InstanceSegmentationResult& r) { | ||
| if (!r.feature_vector) { | ||
| return nb::ndarray<float, nb::numpy, nb::c_contig>(); | ||
| } | ||
|
|
||
| return nb::ndarray<float, nb::numpy, nb::c_contig>(r.feature_vector.data(), | ||
| r.feature_vector.get_shape().size(), | ||
| r.feature_vector.get_shape().data()); | ||
| }, | ||
| nb::rv_policy::reference_internal); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| /* | ||
| * Copyright (C) 2025 Intel Corporation | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
| #include <nanobind/ndarray.h> | ||
| #include <nanobind/operators.h> | ||
| #include <nanobind/stl/map.h> | ||
| #include <nanobind/stl/string.h> | ||
| #include <nanobind/stl/unique_ptr.h> | ||
| #include <nanobind/stl/vector.h> | ||
|
|
||
| #include "models/keypoint_detection.h" | ||
| #include "models/results.h" | ||
| #include "py_utils.hpp" | ||
|
|
||
| namespace pyutils = vision::nanobind::utils; | ||
|
|
||
| void init_keypoint_detection(nb::module_& m) { | ||
| nb::class_<KeypointDetectionModel, ImageModel>(m, "KeypointDetectionModel") | ||
| .def_static( | ||
| "create_model", | ||
| [](const std::string& model_path, | ||
| const std::map<std::string, nb::object>& configuration, | ||
| bool preload, | ||
| const std::string& device) { | ||
| auto ov_any_config = ov::AnyMap(); | ||
| for (const auto& item : configuration) { | ||
| ov_any_config[item.first] = pyutils::py_object_to_any(item.second, item.first); | ||
| } | ||
|
|
||
| return KeypointDetectionModel::create_model(model_path, ov_any_config, preload, device); | ||
| }, | ||
| nb::arg("model_path"), | ||
| nb::arg("configuration") = ov::AnyMap({}), | ||
| nb::arg("preload") = true, | ||
| nb::arg("device") = "AUTO") | ||
|
|
||
| .def("__call__", | ||
| [](KeypointDetectionModel& self, const nb::ndarray<>& input) { | ||
| return self.infer(pyutils::wrap_np_mat(input)); | ||
| }) | ||
| .def("infer_batch", | ||
| [](KeypointDetectionModel& self, const std::vector<nb::ndarray<>> inputs) { | ||
| std::vector<ImageInputData> input_mats; | ||
| input_mats.reserve(inputs.size()); | ||
|
|
||
| for (const auto& input : inputs) { | ||
| input_mats.push_back(pyutils::wrap_np_mat(input)); | ||
| } | ||
|
|
||
| return self.inferBatch(input_mats); | ||
| }) | ||
| .def_prop_ro_static("__model__", [](nb::object) { | ||
| return KeypointDetectionModel::ModelType; | ||
| }); | ||
|
|
||
| nb::class_<KeypointDetectionResult, ResultBase>(m, "KeypointDetectionResult") | ||
| .def(nb::init<int64_t, std::shared_ptr<MetaData>>(), nb::arg("frameId") = -1, nb::arg("metaData") = nullptr) | ||
| .def_prop_ro( | ||
| "keypoints", | ||
| [](const KeypointDetectionResult& result) { | ||
| if (!result.poses.empty()) { | ||
| return nb::ndarray<float, nb::numpy, nb::c_contig>( | ||
| const_cast<void*>(static_cast<const void*>(result.poses[0].keypoints.data())), | ||
| {static_cast<size_t>(result.poses[0].keypoints.size()), 2}); | ||
| } | ||
| return nb::ndarray<float, nb::numpy, nb::c_contig>(); | ||
| }, | ||
| nb::rv_policy::reference_internal) | ||
| .def_prop_ro( | ||
| "scores", | ||
| [](const KeypointDetectionResult& result) { | ||
| if (!result.poses.empty()) { | ||
| return nb::ndarray<float, nb::numpy, nb::c_contig>( | ||
| const_cast<void*>(static_cast<const void*>(result.poses[0].scores.data())), | ||
| {static_cast<size_t>(result.poses[0].scores.size())}); | ||
| } | ||
| return nb::ndarray<float, nb::numpy, nb::c_contig>(); | ||
| }, | ||
| nb::rv_policy::reference_internal); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,103 @@ | ||
| /* | ||
| * Copyright (C) 2025 Intel Corporation | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| #include <nanobind/ndarray.h> | ||
| #include <nanobind/operators.h> | ||
| #include <nanobind/stl/map.h> | ||
| #include <nanobind/stl/string.h> | ||
| #include <nanobind/stl/unique_ptr.h> | ||
| #include <nanobind/stl/vector.h> | ||
|
|
||
| #include "models/results.h" | ||
| #include "models/segmentation_model.h" | ||
| #include "py_utils.hpp" | ||
|
|
||
| namespace pyutils = vision::nanobind::utils; | ||
|
|
||
| void init_segmentation(nb::module_& m) { | ||
| nb::class_<SegmentationModel, ImageModel>(m, "SegmentationModel") | ||
| .def_static( | ||
| "create_model", | ||
| [](const std::string& model_path, | ||
| const std::map<std::string, nb::object>& configuration, | ||
| bool preload, | ||
| const std::string& device) { | ||
| auto ov_any_config = ov::AnyMap(); | ||
| for (const auto& item : configuration) { | ||
| ov_any_config[item.first] = pyutils::py_object_to_any(item.second, item.first); | ||
| } | ||
|
|
||
| return SegmentationModel::create_model(model_path, ov_any_config, preload, device); | ||
| }, | ||
| nb::arg("model_path"), | ||
| nb::arg("configuration") = ov::AnyMap({}), | ||
| nb::arg("preload") = true, | ||
| nb::arg("device") = "AUTO") | ||
|
|
||
| .def("__call__", | ||
| [](SegmentationModel& self, const nb::ndarray<>& input) { | ||
| return self.infer(pyutils::wrap_np_mat(input)); | ||
| }) | ||
| .def("infer_batch", | ||
| [](SegmentationModel& self, const std::vector<nb::ndarray<>> inputs) { | ||
| std::vector<ImageInputData> input_mats; | ||
| input_mats.reserve(inputs.size()); | ||
|
|
||
| for (const auto& input : inputs) { | ||
| input_mats.push_back(pyutils::wrap_np_mat(input)); | ||
| } | ||
|
|
||
| return self.inferBatch(input_mats); | ||
| }) | ||
| .def_prop_ro_static("__model__", [](nb::object) { | ||
| return SegmentationModel::ModelType; | ||
| }); | ||
|
|
||
| nb::class_<ImageResult, ResultBase>(m, "ImageResult") | ||
| .def(nb::init<int64_t, std::shared_ptr<MetaData>>(), nb::arg("frameId") = -1, nb::arg("metaData") = nullptr) | ||
| .def_prop_ro( | ||
| "resultImage", | ||
| [](ImageResult& r) { | ||
| return nb::ndarray<uint8_t, nb::numpy, nb::c_contig>(r.resultImage.data, | ||
| {static_cast<size_t>(r.resultImage.rows), | ||
| static_cast<size_t>(r.resultImage.cols), | ||
| static_cast<size_t>(r.resultImage.channels())}); | ||
| }, | ||
| nb::rv_policy::reference_internal) | ||
| .def_prop_ro( | ||
| "feature_vector", | ||
| [](ResultBase& r) { | ||
| ImageResultWithSoftPrediction ir = r.asRef<ImageResultWithSoftPrediction>(); | ||
| if (!ir.feature_vector) { | ||
| return nb::ndarray<float, nb::numpy, nb::c_contig>(); | ||
| } | ||
|
|
||
| return nb::ndarray<float, nb::numpy, nb::c_contig>(ir.feature_vector.data(), | ||
| ir.feature_vector.get_shape().size(), | ||
| ir.feature_vector.get_shape().data()); | ||
| }, | ||
| nb::rv_policy::reference_internal) | ||
| .def_prop_ro( | ||
| "soft_prediction", | ||
| [](ResultBase& r) { | ||
| ImageResultWithSoftPrediction ir = r.asRef<ImageResultWithSoftPrediction>(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would that work if the result entity is actually an
|
||
| return nb::ndarray<float, nb::numpy, nb::c_contig>( | ||
| ir.soft_prediction.data, | ||
| {static_cast<size_t>(ir.soft_prediction.rows), | ||
| static_cast<size_t>(ir.soft_prediction.cols), | ||
| static_cast<size_t>(ir.soft_prediction.channels())}); | ||
| }, | ||
| nb::rv_policy::reference_internal) | ||
| .def_prop_ro( | ||
| "saliency_map", | ||
| [](ResultBase& r) { | ||
| ImageResultWithSoftPrediction ir = r.asRef<ImageResultWithSoftPrediction>(); | ||
| return nb::ndarray<float, nb::numpy, nb::c_contig>(ir.saliency_map.data, | ||
| {static_cast<size_t>(ir.saliency_map.rows), | ||
| static_cast<size_t>(ir.saliency_map.cols), | ||
| static_cast<size_t>(ir.saliency_map.channels())}); | ||
| }, | ||
| nb::rv_policy::reference_internal); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Iseg result also contains a saliency map as a vector of
cv::Mat. Perhaps, it's not that easy to expose but there is a chance a vector ofnb::ndarray<uint8_t...would work here