Skip to content
86 changes: 86 additions & 0 deletions src/cpp/py_bindings/py_anomaly.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright (C) 2025 Intel Corporation
* SPDX-License-Identifier: Apache-2.0
*/
#include <nanobind/ndarray.h>
#include <nanobind/operators.h>
#include <nanobind/stl/map.h>
#include <nanobind/stl/string.h>
#include <nanobind/stl/unique_ptr.h>
#include <nanobind/stl/vector.h>

#include "models/anomaly_model.h"
#include "models/results.h"
#include "py_utils.hpp"

namespace pyutils = vision::nanobind::utils;

void init_anomaly_detection(nb::module_& m) {
nb::class_<AnomalyModel, ImageModel>(m, "AnomalyDetection")
.def_static(
"create_model",
[](const std::string& model_path,
const std::map<std::string, nb::object>& configuration,
bool preload,
const std::string& device) {
auto ov_any_config = ov::AnyMap();
for (const auto& item : configuration) {
ov_any_config[item.first] = pyutils::py_object_to_any(item.second, item.first);
}

return AnomalyModel::create_model(model_path, ov_any_config, preload, device);
},
nb::arg("model_path"),
nb::arg("configuration") = ov::AnyMap({}),
nb::arg("preload") = true,
nb::arg("device") = "AUTO")

.def("__call__",
[](AnomalyModel& self, const nb::ndarray<>& input) {
return self.infer(pyutils::wrap_np_mat(input));
})
.def("infer_batch",
[](AnomalyModel& self, const std::vector<nb::ndarray<>> inputs) {
std::vector<ImageInputData> input_mats;
input_mats.reserve(inputs.size());

for (const auto& input : inputs) {
input_mats.push_back(pyutils::wrap_np_mat(input));
}

return self.inferBatch(input_mats);
})
.def_prop_ro_static("__model__", [](nb::object) {
return AnomalyModel::ModelType;
});

nb::class_<AnomalyResult, ResultBase>(m, "AnomalyResult")
.def(nb::init<int64_t, std::shared_ptr<MetaData>>(), nb::arg("frameId") = -1, nb::arg("metaData") = nullptr)
.def_prop_ro(
"anomaly_map",
[](AnomalyResult& r) {
return nb::ndarray<uint8_t, nb::numpy, nb::c_contig>(r.anomaly_map.data,
{static_cast<size_t>(r.anomaly_map.rows),
static_cast<size_t>(r.anomaly_map.cols),
static_cast<size_t>(r.anomaly_map.channels())});
},
nb::rv_policy::reference_internal)
.def_prop_ro(
"pred_boxes",
[](AnomalyResult& r) {
return nb::ndarray<int, nb::numpy, nb::c_contig>(r.pred_boxes.data(),
{static_cast<size_t>(r.pred_boxes.size()), 4});
},
nb::rv_policy::reference_internal)
.def_ro("pred_label", &AnomalyResult::pred_label)
.def_prop_ro(
"pred_mask",
[](AnomalyResult& r) {
return nb::ndarray<uint8_t, nb::numpy, nb::c_contig>(r.pred_mask.data,
{static_cast<size_t>(r.pred_mask.rows),
static_cast<size_t>(r.pred_mask.cols),
static_cast<size_t>(r.pred_mask.channels())});
},
nb::rv_policy::reference_internal)
.def_ro("pred_score", &AnomalyResult::pred_score);
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ void init_classification(nb::module_& m) {
nb::class_<ClassificationResult::Classification>(m, "Classification")
.def(nb::init<unsigned int, const std::string, float>())
.def_rw("id", &ClassificationResult::Classification::id)
.def_rw("label", &ClassificationResult::Classification::label)
.def_rw("name", &ClassificationResult::Classification::label)
.def_rw("score", &ClassificationResult::Classification::score);

nb::class_<ClassificationResult, ResultBase>(m, "ClassificationResult")
Expand All @@ -39,6 +39,18 @@ void init_classification(nb::module_& m) {
r.feature_vector.get_shape().data());
},
nb::rv_policy::reference_internal)
.def_prop_ro(
"raw_scores",
[](ClassificationResult& r) {
if (!r.raw_scores) {
return nb::ndarray<float, nb::numpy, nb::c_contig>();
}

return nb::ndarray<float, nb::numpy, nb::c_contig>(r.raw_scores.data(),
r.raw_scores.get_shape().size(),
r.raw_scores.get_shape().data());
},
nb::rv_policy::reference_internal)
.def_prop_ro(
"saliency_map",
[](ClassificationResult& r) {
Expand Down Expand Up @@ -75,14 +87,18 @@ void init_classification(nb::module_& m) {
[](ClassificationModel& self, const nb::ndarray<>& input) {
return self.infer(pyutils::wrap_np_mat(input));
})
.def("infer_batch", [](ClassificationModel& self, const std::vector<nb::ndarray<>> inputs) {
std::vector<ImageInputData> input_mats;
input_mats.reserve(inputs.size());
.def("infer_batch",
[](ClassificationModel& self, const std::vector<nb::ndarray<>> inputs) {
std::vector<ImageInputData> input_mats;
input_mats.reserve(inputs.size());

for (const auto& input : inputs) {
input_mats.push_back(pyutils::wrap_np_mat(input));
}
for (const auto& input : inputs) {
input_mats.push_back(pyutils::wrap_np_mat(input));
}

return self.inferBatch(input_mats);
return self.inferBatch(input_mats);
})
.def_prop_ro_static("__model__", [](nb::object) {
return ClassificationModel::ModelType;
});
}
72 changes: 72 additions & 0 deletions src/cpp/py_bindings/py_instance_segmentation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright (C) 2025 Intel Corporation
* SPDX-License-Identifier: Apache-2.0
*/

#include <nanobind/ndarray.h>
#include <nanobind/operators.h>
#include <nanobind/stl/map.h>
#include <nanobind/stl/string.h>
#include <nanobind/stl/unique_ptr.h>
#include <nanobind/stl/vector.h>

#include "models/instance_segmentation.h"
#include "models/results.h"
#include "py_utils.hpp"

namespace pyutils = vision::nanobind::utils;

void init_instance_segmentation(nb::module_& m) {
nb::class_<MaskRCNNModel, ImageModel>(m, "MaskRCNNModel")
.def_static(
"create_model",
[](const std::string& model_path,
const std::map<std::string, nb::object>& configuration,
bool preload,
const std::string& device) {
auto ov_any_config = ov::AnyMap();
for (const auto& item : configuration) {
ov_any_config[item.first] = pyutils::py_object_to_any(item.second, item.first);
}

return MaskRCNNModel::create_model(model_path, ov_any_config, preload, device);
},
nb::arg("model_path"),
nb::arg("configuration") = ov::AnyMap({}),
nb::arg("preload") = true,
nb::arg("device") = "AUTO")

.def("__call__",
[](MaskRCNNModel& self, const nb::ndarray<>& input) {
return self.infer(pyutils::wrap_np_mat(input));
})
.def("infer_batch",
[](MaskRCNNModel& self, const std::vector<nb::ndarray<>> inputs) {
std::vector<ImageInputData> input_mats;
input_mats.reserve(inputs.size());

for (const auto& input : inputs) {
input_mats.push_back(pyutils::wrap_np_mat(input));
}

return self.inferBatch(input_mats);
})
.def_prop_ro_static("__model__", [](nb::object) {
return MaskRCNNModel::ModelType;
});

nb::class_<InstanceSegmentationResult, ResultBase>(m, "InstanceSegmentationResult")
.def(nb::init<int64_t, std::shared_ptr<MetaData>>(), nb::arg("frameId") = -1, nb::arg("metaData") = nullptr)
.def_prop_ro(
"feature_vector",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Iseg result also contains a saliency map as a vector of cv::Mat. Perhaps, it's not that easy to expose but there is a chance a vector of nb::ndarray<uint8_t... would work here

[](InstanceSegmentationResult& r) {
if (!r.feature_vector) {
return nb::ndarray<float, nb::numpy, nb::c_contig>();
}

return nb::ndarray<float, nb::numpy, nb::c_contig>(r.feature_vector.data(),
r.feature_vector.get_shape().size(),
r.feature_vector.get_shape().data());
},
nb::rv_policy::reference_internal);
}
81 changes: 81 additions & 0 deletions src/cpp/py_bindings/py_keypoint_detection.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright (C) 2025 Intel Corporation
* SPDX-License-Identifier: Apache-2.0
*/
#include <nanobind/ndarray.h>
#include <nanobind/operators.h>
#include <nanobind/stl/map.h>
#include <nanobind/stl/string.h>
#include <nanobind/stl/unique_ptr.h>
#include <nanobind/stl/vector.h>

#include "models/keypoint_detection.h"
#include "models/results.h"
#include "py_utils.hpp"

namespace pyutils = vision::nanobind::utils;

void init_keypoint_detection(nb::module_& m) {
nb::class_<KeypointDetectionModel, ImageModel>(m, "KeypointDetectionModel")
.def_static(
"create_model",
[](const std::string& model_path,
const std::map<std::string, nb::object>& configuration,
bool preload,
const std::string& device) {
auto ov_any_config = ov::AnyMap();
for (const auto& item : configuration) {
ov_any_config[item.first] = pyutils::py_object_to_any(item.second, item.first);
}

return KeypointDetectionModel::create_model(model_path, ov_any_config, preload, device);
},
nb::arg("model_path"),
nb::arg("configuration") = ov::AnyMap({}),
nb::arg("preload") = true,
nb::arg("device") = "AUTO")

.def("__call__",
[](KeypointDetectionModel& self, const nb::ndarray<>& input) {
return self.infer(pyutils::wrap_np_mat(input));
})
.def("infer_batch",
[](KeypointDetectionModel& self, const std::vector<nb::ndarray<>> inputs) {
std::vector<ImageInputData> input_mats;
input_mats.reserve(inputs.size());

for (const auto& input : inputs) {
input_mats.push_back(pyutils::wrap_np_mat(input));
}

return self.inferBatch(input_mats);
})
.def_prop_ro_static("__model__", [](nb::object) {
return KeypointDetectionModel::ModelType;
});

nb::class_<KeypointDetectionResult, ResultBase>(m, "KeypointDetectionResult")
.def(nb::init<int64_t, std::shared_ptr<MetaData>>(), nb::arg("frameId") = -1, nb::arg("metaData") = nullptr)
.def_prop_ro(
"keypoints",
[](const KeypointDetectionResult& result) {
if (!result.poses.empty()) {
return nb::ndarray<float, nb::numpy, nb::c_contig>(
const_cast<void*>(static_cast<const void*>(result.poses[0].keypoints.data())),
{static_cast<size_t>(result.poses[0].keypoints.size()), 2});
}
return nb::ndarray<float, nb::numpy, nb::c_contig>();
},
nb::rv_policy::reference_internal)
.def_prop_ro(
"scores",
[](const KeypointDetectionResult& result) {
if (!result.poses.empty()) {
return nb::ndarray<float, nb::numpy, nb::c_contig>(
const_cast<void*>(static_cast<const void*>(result.poses[0].scores.data())),
{static_cast<size_t>(result.poses[0].scores.size())});
}
return nb::ndarray<float, nb::numpy, nb::c_contig>();
},
nb::rv_policy::reference_internal);
}
103 changes: 103 additions & 0 deletions src/cpp/py_bindings/py_segmentation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Copyright (C) 2025 Intel Corporation
* SPDX-License-Identifier: Apache-2.0
*/

#include <nanobind/ndarray.h>
#include <nanobind/operators.h>
#include <nanobind/stl/map.h>
#include <nanobind/stl/string.h>
#include <nanobind/stl/unique_ptr.h>
#include <nanobind/stl/vector.h>

#include "models/results.h"
#include "models/segmentation_model.h"
#include "py_utils.hpp"

namespace pyutils = vision::nanobind::utils;

void init_segmentation(nb::module_& m) {
nb::class_<SegmentationModel, ImageModel>(m, "SegmentationModel")
.def_static(
"create_model",
[](const std::string& model_path,
const std::map<std::string, nb::object>& configuration,
bool preload,
const std::string& device) {
auto ov_any_config = ov::AnyMap();
for (const auto& item : configuration) {
ov_any_config[item.first] = pyutils::py_object_to_any(item.second, item.first);
}

return SegmentationModel::create_model(model_path, ov_any_config, preload, device);
},
nb::arg("model_path"),
nb::arg("configuration") = ov::AnyMap({}),
nb::arg("preload") = true,
nb::arg("device") = "AUTO")

.def("__call__",
[](SegmentationModel& self, const nb::ndarray<>& input) {
return self.infer(pyutils::wrap_np_mat(input));
})
.def("infer_batch",
[](SegmentationModel& self, const std::vector<nb::ndarray<>> inputs) {
std::vector<ImageInputData> input_mats;
input_mats.reserve(inputs.size());

for (const auto& input : inputs) {
input_mats.push_back(pyutils::wrap_np_mat(input));
}

return self.inferBatch(input_mats);
})
.def_prop_ro_static("__model__", [](nb::object) {
return SegmentationModel::ModelType;
});

nb::class_<ImageResult, ResultBase>(m, "ImageResult")
.def(nb::init<int64_t, std::shared_ptr<MetaData>>(), nb::arg("frameId") = -1, nb::arg("metaData") = nullptr)
.def_prop_ro(
"resultImage",
[](ImageResult& r) {
return nb::ndarray<uint8_t, nb::numpy, nb::c_contig>(r.resultImage.data,
{static_cast<size_t>(r.resultImage.rows),
static_cast<size_t>(r.resultImage.cols),
static_cast<size_t>(r.resultImage.channels())});
},
nb::rv_policy::reference_internal)
.def_prop_ro(
"feature_vector",
[](ResultBase& r) {
ImageResultWithSoftPrediction ir = r.asRef<ImageResultWithSoftPrediction>();
if (!ir.feature_vector) {
return nb::ndarray<float, nb::numpy, nb::c_contig>();
}

return nb::ndarray<float, nb::numpy, nb::c_contig>(ir.feature_vector.data(),
ir.feature_vector.get_shape().size(),
ir.feature_vector.get_shape().data());
},
nb::rv_policy::reference_internal)
.def_prop_ro(
"soft_prediction",
[](ResultBase& r) {
ImageResultWithSoftPrediction ir = r.asRef<ImageResultWithSoftPrediction>();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would that work if the result entity is actually an ImageResult?
In sseg wrapper now we have the following logic:

if (return_soft_prediction) {
  return a ptr to ImageResultWithSoftPrediction
}
else 
  return a ptr to ImageResult

infer() method's return type is a ptr to ImageResult. I think that kind of design looks weird an overall inconvenient, because we always need to check if type conversion to ImageResultWithSoftPrediction is successful.
Related refactoring can be done in subsequent PR, here we just need to be sure that both the cases return_soft_prediction==true/false are handled correctly

return nb::ndarray<float, nb::numpy, nb::c_contig>(
ir.soft_prediction.data,
{static_cast<size_t>(ir.soft_prediction.rows),
static_cast<size_t>(ir.soft_prediction.cols),
static_cast<size_t>(ir.soft_prediction.channels())});
},
nb::rv_policy::reference_internal)
.def_prop_ro(
"saliency_map",
[](ResultBase& r) {
ImageResultWithSoftPrediction ir = r.asRef<ImageResultWithSoftPrediction>();
return nb::ndarray<float, nb::numpy, nb::c_contig>(ir.saliency_map.data,
{static_cast<size_t>(ir.saliency_map.rows),
static_cast<size_t>(ir.saliency_map.cols),
static_cast<size_t>(ir.saliency_map.channels())});
},
nb::rv_policy::reference_internal);
}
Loading
Loading