From 8b5add16d2e4a35524a2f0472f5504c1547d5881 Mon Sep 17 00:00:00 2001 From: Ronald Hecker Date: Wed, 23 Apr 2025 16:05:36 +0200 Subject: [PATCH 01/16] Wrap Result in Scene class This allows the virtual overrides not to bother with pointer logic. This is the first step. The next step should be to consolidate every model type to scene output without a sub ResultBase. This can now be done gradually --- src/cpp/models/include/models/anomaly_model.h | 6 +-- src/cpp/models/include/models/base_model.h | 10 ++-- .../include/models/classification_model.h | 12 ++--- .../models/include/models/detection_model.h | 4 +- .../include/models/detection_model_ssd.h | 6 +-- .../include/models/detection_model_yolo.h | 5 +- .../models/detection_model_yolov3_onnx.h | 2 +- .../include/models/detection_model_yolox.h | 2 +- .../include/models/instance_segmentation.h | 6 +-- .../include/models/keypoint_detection.h | 6 +-- src/cpp/models/include/models/results.h | 19 +++++++ .../include/models/segmentation_model.h | 6 +-- src/cpp/models/src/anomaly_model.cpp | 25 ++++------ src/cpp/models/src/base_model.cpp | 26 ++++------ src/cpp/models/src/classification_model.cpp | 50 +++++++++---------- src/cpp/models/src/detection_model.cpp | 15 ++---- src/cpp/models/src/detection_model_ssd.cpp | 27 +++++----- src/cpp/models/src/detection_model_yolo.cpp | 20 +++++--- .../src/detection_model_yolov3_onnx.cpp | 9 ++-- src/cpp/models/src/detection_model_yolox.cpp | 8 +-- src/cpp/models/src/instance_segmentation.cpp | 27 +++++----- src/cpp/models/src/keypoint_detection.cpp | 23 ++++----- src/cpp/models/src/segmentation_model.cpp | 29 +++++------ src/cpp/tilers/include/tilers/detection.h | 8 +-- .../include/tilers/instance_segmentation.h | 8 +-- .../include/tilers/semantic_segmentation.h | 6 +-- src/cpp/tilers/include/tilers/tiler_base.h | 10 ++-- src/cpp/tilers/src/detection.cpp | 32 ++++++------ src/cpp/tilers/src/instance_segmentation.cpp | 31 ++++++------ src/cpp/tilers/src/semantic_segmentation.cpp | 34 ++++++------- src/cpp/tilers/src/tiler_base.cpp | 10 ++-- tests/cpp/accuracy/test_YOLOv8.cpp | 2 +- tests/cpp/accuracy/test_accuracy.cpp | 26 +++++----- 33 files changed, 249 insertions(+), 261 deletions(-) diff --git a/src/cpp/models/include/models/anomaly_model.h b/src/cpp/models/include/models/anomaly_model.h index 1cc5be22..6dd55d91 100644 --- a/src/cpp/models/include/models/anomaly_model.h +++ b/src/cpp/models/include/models/anomaly_model.h @@ -23,9 +23,9 @@ class AnomalyModel : public BaseModel { const std::string& device = "AUTO"); static std::unique_ptr create_model(std::shared_ptr& adapter); - virtual std::unique_ptr infer(const ImageInputData& inputData); - virtual std::vector> inferBatch(const std::vector& inputImgs); - std::unique_ptr postprocess(InferenceResult& infResult) override; + virtual std::unique_ptr infer(const ImageInputData& inputData); + virtual std::vector> inferBatch(const std::vector& inputImgs); + std::unique_ptr postprocess(InferenceResult& infResult) override; friend std::ostream& operator<<(std::ostream& os, std::unique_ptr& model); diff --git a/src/cpp/models/include/models/base_model.h b/src/cpp/models/include/models/base_model.h index 85131805..383bc865 100644 --- a/src/cpp/models/include/models/base_model.h +++ b/src/cpp/models/include/models/base_model.h @@ -38,7 +38,7 @@ class BaseModel { BaseModel(std::shared_ptr& adapter, const ov::AnyMap& configuration = {}); virtual std::shared_ptr preprocess(const InputData& inputData, InferenceInput& input); - virtual std::unique_ptr postprocess(InferenceResult& infResult) = 0; + virtual std::unique_ptr postprocess(InferenceResult& infResult) = 0; void load(ov::Core& core, const std::string& device, size_t num_infer_requests = 1); @@ -49,7 +49,7 @@ class BaseModel { virtual void awaitAll(); virtual void awaitAny(); virtual void setCallback( - std::function, const ov::AnyMap& callback_args)> callback); + std::function, const ov::AnyMap& callback_args)> callback); std::shared_ptr getModel(); std::shared_ptr getInferenceAdapter(); @@ -67,8 +67,8 @@ class BaseModel { const std::vector& scale, const std::type_info& dtype = typeid(int)); virtual void inferAsync(const ImageInputData& inputData, const ov::AnyMap& callback_args = {}); - std::unique_ptr inferImage(const ImageInputData& inputData); - std::vector> inferBatchImage(const std::vector& inputData); + std::unique_ptr inferImage(const ImageInputData& inputData); + std::vector> inferBatchImage(const std::vector& inputData); protected: RESIZE_MODE selectResizeMode(const std::string& resize_type); @@ -104,5 +104,5 @@ class BaseModel { std::shared_ptr inferenceAdapter; std::map inputsLayouts; ov::Layout getInputLayout(const ov::Output& input); - std::function, const ov::AnyMap&)> lastCallback; + std::function, const ov::AnyMap&)> lastCallback; }; diff --git a/src/cpp/models/include/models/classification_model.h b/src/cpp/models/include/models/classification_model.h index 88ac03bc..529d5e13 100644 --- a/src/cpp/models/include/models/classification_model.h +++ b/src/cpp/models/include/models/classification_model.h @@ -99,10 +99,10 @@ class ClassificationModel : public BaseModel { const std::string& device = "AUTO"); static std::unique_ptr create_model(std::shared_ptr& adapter); - std::unique_ptr postprocess(InferenceResult& infResult) override; + std::unique_ptr postprocess(InferenceResult& infResult) override; - virtual std::unique_ptr infer(const ImageInputData& inputData); - virtual std::vector> inferBatch(const std::vector& inputImgs); + virtual std::unique_ptr infer(const ImageInputData& inputData); + virtual std::vector> inferBatch(const std::vector& inputImgs); static std::string ModelType; protected: @@ -119,8 +119,8 @@ class ClassificationModel : public BaseModel { void init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority); void prepareInputsOutputs(std::shared_ptr& model) override; void updateModelInfo() override; - std::unique_ptr get_multilabel_predictions(InferenceResult& infResult, bool add_raw_scores); - std::unique_ptr get_multiclass_predictions(InferenceResult& infResult, bool add_raw_scores); - std::unique_ptr get_hierarchical_predictions(InferenceResult& infResult, bool add_raw_scores); + std::unique_ptr get_multilabel_predictions(InferenceResult& infResult, bool add_raw_scores); + std::unique_ptr get_multiclass_predictions(InferenceResult& infResult, bool add_raw_scores); + std::unique_ptr get_hierarchical_predictions(InferenceResult& infResult, bool add_raw_scores); ov::Tensor reorder_saliency_maps(const ov::Tensor&); }; diff --git a/src/cpp/models/include/models/detection_model.h b/src/cpp/models/include/models/detection_model.h index 16ba8cf8..9ec7ab8f 100644 --- a/src/cpp/models/include/models/detection_model.h +++ b/src/cpp/models/include/models/detection_model.h @@ -25,8 +25,8 @@ class DetectionModel : public BaseModel { const std::string& device = "AUTO"); static std::unique_ptr create_model(std::shared_ptr& adapter); - virtual std::unique_ptr infer(const ImageInputData& inputData); - virtual std::vector> inferBatch(const std::vector& inputImgs); + virtual std::unique_ptr infer(const ImageInputData& inputData); + virtual std::vector> inferBatch(const std::vector& inputImgs); protected: float confidence_threshold = 0.5f; diff --git a/src/cpp/models/include/models/detection_model_ssd.h b/src/cpp/models/include/models/detection_model_ssd.h index acb3060f..188301c5 100644 --- a/src/cpp/models/include/models/detection_model_ssd.h +++ b/src/cpp/models/include/models/detection_model_ssd.h @@ -25,12 +25,12 @@ class ModelSSD : public DetectionModel { public: using DetectionModel::DetectionModel; std::shared_ptr preprocess(const InputData& inputData, InferenceInput& input) override; - std::unique_ptr postprocess(InferenceResult& infResult) override; + std::unique_ptr postprocess(InferenceResult& infResult) override; static std::string ModelType; protected: - std::unique_ptr postprocessSingleOutput(InferenceResult& infResult); - std::unique_ptr postprocessMultipleOutputs(InferenceResult& infResult); + std::unique_ptr postprocessSingleOutput(InferenceResult& infResult); + std::unique_ptr postprocessMultipleOutputs(InferenceResult& infResult); void prepareInputsOutputs(std::shared_ptr& model) override; void prepareSingleOutput(std::shared_ptr& model); void prepareMultipleOutputs(std::shared_ptr& model); diff --git a/src/cpp/models/include/models/detection_model_yolo.h b/src/cpp/models/include/models/detection_model_yolo.h index 56055588..40ffad20 100644 --- a/src/cpp/models/include/models/detection_model_yolo.h +++ b/src/cpp/models/include/models/detection_model_yolo.h @@ -18,7 +18,6 @@ struct DetectedObject; struct InferenceResult; -struct ResultBase; class ModelYolo : public DetectionModelExt { protected: @@ -46,7 +45,7 @@ class ModelYolo : public DetectionModelExt { ModelYolo(std::shared_ptr& model, const ov::AnyMap& configuration); ModelYolo(std::shared_ptr& adapter); - std::unique_ptr postprocess(InferenceResult& infResult) override; + std::unique_ptr postprocess(InferenceResult& infResult) override; protected: void prepareInputsOutputs(std::shared_ptr& model) override; @@ -82,7 +81,7 @@ class YOLOv5 : public DetectionModelExt { public: YOLOv5(std::shared_ptr& model, const ov::AnyMap& configuration); YOLOv5(std::shared_ptr& adapter); - std::unique_ptr postprocess(InferenceResult& infResult) override; + std::unique_ptr postprocess(InferenceResult& infResult) override; static std::string ModelType; }; diff --git a/src/cpp/models/include/models/detection_model_yolov3_onnx.h b/src/cpp/models/include/models/detection_model_yolov3_onnx.h index 9dead24d..a11c900a 100644 --- a/src/cpp/models/include/models/detection_model_yolov3_onnx.h +++ b/src/cpp/models/include/models/detection_model_yolov3_onnx.h @@ -17,7 +17,7 @@ class ModelYoloV3ONNX : public DetectionModel { ModelYoloV3ONNX(std::shared_ptr& adapter); using DetectionModel::DetectionModel; - std::unique_ptr postprocess(InferenceResult& infResult) override; + std::unique_ptr postprocess(InferenceResult& infResult) override; std::shared_ptr preprocess(const InputData& inputData, InferenceInput& input) override; protected: diff --git a/src/cpp/models/include/models/detection_model_yolox.h b/src/cpp/models/include/models/detection_model_yolox.h index bc747ee5..1849ba68 100644 --- a/src/cpp/models/include/models/detection_model_yolox.h +++ b/src/cpp/models/include/models/detection_model_yolox.h @@ -17,7 +17,7 @@ class ModelYoloX : public DetectionModelExt { ModelYoloX(std::shared_ptr& adapter); using DetectionModelExt::DetectionModelExt; - std::unique_ptr postprocess(InferenceResult& infResult) override; + std::unique_ptr postprocess(InferenceResult& infResult) override; std::shared_ptr preprocess(const InputData& inputData, InferenceInput& input) override; static std::string ModelType; diff --git a/src/cpp/models/include/models/instance_segmentation.h b/src/cpp/models/include/models/instance_segmentation.h index c6cadce7..21c94690 100644 --- a/src/cpp/models/include/models/instance_segmentation.h +++ b/src/cpp/models/include/models/instance_segmentation.h @@ -30,10 +30,10 @@ class MaskRCNNModel : public BaseModel { const std::string& device = "AUTO"); static std::unique_ptr create_model(std::shared_ptr& adapter); - std::unique_ptr postprocess(InferenceResult& infResult) override; + std::unique_ptr postprocess(InferenceResult& infResult) override; - virtual std::unique_ptr infer(const ImageInputData& inputData); - virtual std::vector> inferBatch( + virtual std::unique_ptr infer(const ImageInputData& inputData); + virtual std::vector> inferBatch( const std::vector& inputImgs); static std::string ModelType; bool postprocess_semantic_masks = true; diff --git a/src/cpp/models/include/models/keypoint_detection.h b/src/cpp/models/include/models/keypoint_detection.h index 15d21cba..6fbea292 100644 --- a/src/cpp/models/include/models/keypoint_detection.h +++ b/src/cpp/models/include/models/keypoint_detection.h @@ -29,10 +29,10 @@ class KeypointDetectionModel : public BaseModel { const std::string& device = "AUTO"); static std::unique_ptr create_model(std::shared_ptr& adapter); - std::unique_ptr postprocess(InferenceResult& infResult) override; + std::unique_ptr postprocess(InferenceResult& infResult) override; - virtual std::unique_ptr infer(const ImageInputData& inputData); - virtual std::vector> inferBatch( + virtual std::unique_ptr infer(const ImageInputData& inputData); + virtual std::vector> inferBatch( const std::vector& inputImgs); static std::string ModelType; diff --git a/src/cpp/models/include/models/results.h b/src/cpp/models/include/models/results.h index 1a648723..d87849b5 100644 --- a/src/cpp/models/include/models/results.h +++ b/src/cpp/models/include/models/results.h @@ -16,6 +16,7 @@ #include "internal_model_data.h" struct MetaData; + struct ResultBase { ResultBase(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) : frameId(frameId), @@ -377,3 +378,21 @@ struct KeypointDetectionResult : public ResultBase { : ResultBase(frameId, metaData) {} std::vector poses; }; + + +class Scene { +public: + Scene(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) + : frameId(frameId), + metaData(metaData) {} + + int64_t frameId; + std::shared_ptr metaData; + + std::unique_ptr detection_result; + std::unique_ptr classification_result; + std::unique_ptr keypoint_detection_result; + std::unique_ptr anomaly_result; + std::unique_ptr instance_segmentation_result; + std::unique_ptr image_result; +}; diff --git a/src/cpp/models/include/models/segmentation_model.h b/src/cpp/models/include/models/segmentation_model.h index 922828f7..d802e026 100644 --- a/src/cpp/models/include/models/segmentation_model.h +++ b/src/cpp/models/include/models/segmentation_model.h @@ -31,10 +31,10 @@ class SegmentationModel : public BaseModel { const std::string& device = "AUTO"); static std::unique_ptr create_model(std::shared_ptr& adapter); - std::unique_ptr postprocess(InferenceResult& infResult) override; + std::unique_ptr postprocess(InferenceResult& infResult) override; - virtual std::unique_ptr infer(const ImageInputData& inputData); - virtual std::vector> inferBatch(const std::vector& inputImgs); + virtual std::unique_ptr infer(const ImageInputData& inputData); + virtual std::vector> inferBatch(const std::vector& inputImgs); static std::string ModelType; std::vector getContours(const ImageResultWithSoftPrediction& imageResult); diff --git a/src/cpp/models/src/anomaly_model.cpp b/src/cpp/models/src/anomaly_model.cpp index eeccf08a..1cd1a9e8 100644 --- a/src/cpp/models/src/anomaly_model.cpp +++ b/src/cpp/models/src/anomaly_model.cpp @@ -38,23 +38,15 @@ AnomalyModel::AnomalyModel(std::shared_ptr& adapter, const ov: init_from_config(configuration, adapter->getModelConfig()); } -std::unique_ptr AnomalyModel::infer(const ImageInputData& inputData) { - auto result = BaseModel::inferImage(inputData); - - return std::unique_ptr(static_cast(result.release())); +std::unique_ptr AnomalyModel::infer(const ImageInputData& inputData) { + return BaseModel::inferImage(inputData); } -std::vector> AnomalyModel::inferBatch(const std::vector& inputImgs) { - auto results = BaseModel::inferBatchImage(inputImgs); - std::vector> anoResults; - anoResults.reserve(results.size()); - for (auto& result : results) { - anoResults.emplace_back(static_cast(result.release())); - } - return anoResults; +std::vector> AnomalyModel::inferBatch(const std::vector& inputImgs) { + return BaseModel::inferBatchImage(inputImgs); } -std::unique_ptr AnomalyModel::postprocess(InferenceResult& infResult) { +std::unique_ptr AnomalyModel::postprocess(InferenceResult& infResult) { ov::Tensor predictions = infResult.outputsData[outputNames[0]]; const auto& inputImgSize = infResult.internalModelData->asRef(); @@ -95,13 +87,16 @@ std::unique_ptr AnomalyModel::postprocess(InferenceResult& infResult pred_boxes = getBoxes(pred_mask); } - AnomalyResult* result = new AnomalyResult(infResult.frameId, infResult.metaData); + auto scene = std::make_unique(infResult.frameId, infResult.metaData); + auto result = std::make_unique(infResult.frameId, infResult.metaData); result->anomaly_map = std::move(anomaly_map); result->pred_score = pred_score; result->pred_label = std::move(pred_label); result->pred_mask = std::move(pred_mask); result->pred_boxes = std::move(pred_boxes); - return std::unique_ptr(result); + + scene->anomaly_result = std::move(result); + return scene; } cv::Mat AnomalyModel::normalize(cv::Mat& tensor, float threshold) { diff --git a/src/cpp/models/src/base_model.cpp b/src/cpp/models/src/base_model.cpp index 6fd83d73..8db177ff 100644 --- a/src/cpp/models/src/base_model.cpp +++ b/src/cpp/models/src/base_model.cpp @@ -25,10 +25,10 @@ namespace { class TmpCallbackSetter { public: BaseModel* model; - std::function, const ov::AnyMap&)> last_callback; + std::function, const ov::AnyMap&)> last_callback; TmpCallbackSetter(BaseModel* model_, - std::function, const ov::AnyMap&)> tmp_callback, - std::function, const ov::AnyMap&)> last_callback_) + std::function, const ov::AnyMap&)> tmp_callback, + std::function, const ov::AnyMap&)> last_callback_) : model(model_), last_callback(last_callback_) { model->setCallback(tmp_callback); @@ -37,7 +37,7 @@ class TmpCallbackSetter { if (last_callback) { model->setCallback(last_callback); } else { - model->setCallback([](std::unique_ptr, const ov::AnyMap&) {}); + model->setCallback([](std::unique_ptr, const ov::AnyMap&) {}); } } }; @@ -106,7 +106,7 @@ void BaseModel::awaitAny() { } void BaseModel::setCallback( - std::function, const ov::AnyMap& callback_args)> callback) { + std::function, const ov::AnyMap& callback_args)> callback) { lastCallback = callback; inferenceAdapter->setCallback([this, callback](ov::InferRequest request, CallbackData args) { InferenceResult result; @@ -121,9 +121,7 @@ void BaseModel::setCallback( if (model_data_iter != args->end()) { result.internalModelData = std::move(model_data_iter->second.as>()); } - auto retVal = this->postprocess(result); - *retVal = static_cast(result); - callback(std::move(retVal), args ? *args : ov::AnyMap()); + callback(std::move(this->postprocess(result)), args ? *args : ov::AnyMap()); }); } @@ -215,7 +213,7 @@ BaseModel::BaseModel(std::shared_ptr& adapter, const ov::AnyMa init_from_config(configuration, adapter->getModelConfig()); } -std::unique_ptr BaseModel::inferImage(const ImageInputData& inputData) { +std::unique_ptr BaseModel::inferImage(const ImageInputData& inputData) { InferenceInput inputs; InferenceResult result; auto internalModelData = this->preprocess(inputData, inputs); @@ -223,21 +221,19 @@ std::unique_ptr BaseModel::inferImage(const ImageInputData& inputDat result.outputsData = inferenceAdapter->infer(inputs); result.internalModelData = std::move(internalModelData); - auto retVal = this->postprocess(result); - *retVal = static_cast(result); - return retVal; + return this->postprocess(result); } -std::vector> BaseModel::inferBatchImage(const std::vector& inputImgs) { +std::vector> BaseModel::inferBatchImage(const std::vector& inputImgs) { std::vector> inputData; inputData.reserve(inputImgs.size()); for (const auto& img : inputImgs) { inputData.push_back(img); } - auto results = std::vector>(inputData.size()); + auto results = std::vector>(inputData.size()); auto setter = TmpCallbackSetter( this, - [&](std::unique_ptr result, const ov::AnyMap& callback_args) { + [&](std::unique_ptr result, const ov::AnyMap& callback_args) { size_t id = callback_args.find("id")->second.as(); results[id] = std::move(result); }, diff --git a/src/cpp/models/src/classification_model.cpp b/src/cpp/models/src/classification_model.cpp index a9d281e1..5ca38cfa 100644 --- a/src/cpp/models/src/classification_model.cpp +++ b/src/cpp/models/src/classification_model.cpp @@ -293,8 +293,8 @@ std::unique_ptr ClassificationModel::create_model(std::shar return classifier; } -std::unique_ptr ClassificationModel::postprocess(InferenceResult& infResult) { - std::unique_ptr result; +std::unique_ptr ClassificationModel::postprocess(InferenceResult& infResult) { + std::unique_ptr result; if (multilabel) { result = get_multilabel_predictions(infResult, output_raw_scores); } else if (hierarchical) { @@ -303,7 +303,7 @@ std::unique_ptr ClassificationModel::postprocess(InferenceResult& in result = get_multiclass_predictions(infResult, output_raw_scores); } - ClassificationResult* cls_res = static_cast(result.get()); + auto& cls_res = result->classification_result; auto saliency_map_iter = infResult.outputsData.find(saliency_map_name); if (saliency_map_iter != infResult.outputsData.end()) { cls_res->saliency_map = std::move(saliency_map_iter->second); @@ -313,16 +313,17 @@ std::unique_ptr ClassificationModel::postprocess(InferenceResult& in if (feature_vector_iter != infResult.outputsData.end()) { cls_res->feature_vector = std::move(feature_vector_iter->second); } + return result; } -std::unique_ptr ClassificationModel::get_multilabel_predictions(InferenceResult& infResult, +std::unique_ptr ClassificationModel::get_multilabel_predictions(InferenceResult& infResult, bool add_raw_scores) { const ov::Tensor& logitsTensor = infResult.outputsData.find(outputNames[0])->second; const float* logitsPtr = logitsTensor.data(); - ClassificationResult* result = new ClassificationResult(infResult.frameId, infResult.metaData); - auto retVal = std::unique_ptr(result); + auto scene = std::make_unique(infResult.frameId, infResult.metaData); + auto result = std::make_unique(infResult.frameId, infResult.metaData); auto raw_scores = ov::Tensor(); float* raw_scoresPtr = nullptr; @@ -343,12 +344,14 @@ std::unique_ptr ClassificationModel::get_multilabel_predictions(Infe } } - return retVal; + scene->classification_result = std::move(result); + return scene; } -std::unique_ptr ClassificationModel::get_hierarchical_predictions(InferenceResult& infResult, +std::unique_ptr ClassificationModel::get_hierarchical_predictions(InferenceResult& infResult, bool add_raw_scores) { - ClassificationResult* result = new ClassificationResult(infResult.frameId, infResult.metaData); + auto scene = std::make_unique(infResult.frameId, infResult.metaData); + auto result = std::make_unique(infResult.frameId, infResult.metaData); const ov::Tensor& logitsTensor = infResult.outputsData.find(outputNames[0])->second; float* logitsPtr = logitsTensor.data(); @@ -396,13 +399,13 @@ std::unique_ptr ClassificationModel::get_hierarchical_predictions(In auto resolved_labels = resolver->resolve_labels(predicted_labels, predicted_scores); - auto retVal = std::unique_ptr(result); result->topLabels.reserve(resolved_labels.size()); for (const auto& label : resolved_labels) { result->topLabels.emplace_back(hierarchical_info.label_to_idx[label.first], label.first, label.second); } - return retVal; + scene->classification_result = std::move(result); + return scene; } ov::Tensor ClassificationModel::reorder_saliency_maps(const ov::Tensor& source_maps) { @@ -426,16 +429,15 @@ ov::Tensor ClassificationModel::reorder_saliency_maps(const ov::Tensor& source_m return reordered_maps; } -std::unique_ptr ClassificationModel::get_multiclass_predictions(InferenceResult& infResult, +std::unique_ptr ClassificationModel::get_multiclass_predictions(InferenceResult& infResult, bool add_raw_scores) { const ov::Tensor& indicesTensor = infResult.outputsData.find(indices_name)->second; const int* indicesPtr = indicesTensor.data(); const ov::Tensor& scoresTensor = infResult.outputsData.find(scores_name)->second; const float* scoresPtr = scoresTensor.data(); - ClassificationResult* result = new ClassificationResult(infResult.frameId, infResult.metaData); - auto retVal = std::unique_ptr(result); - + auto scene = std::make_unique(infResult.frameId, infResult.metaData); + auto result = std::make_unique(infResult.frameId, infResult.metaData); if (add_raw_scores) { const ov::Tensor& logitsTensor = infResult.outputsData.find(raw_scores_name)->second; result->raw_scores = ov::Tensor(logitsTensor.get_element_type(), logitsTensor.get_shape()); @@ -452,7 +454,8 @@ std::unique_ptr ClassificationModel::get_multiclass_predictions(Infe result->topLabels.emplace_back(ind, labels[ind], scoresPtr[i]); } - return retVal; + scene->classification_result = std::move(result); + return scene; } void ClassificationModel::prepareInputsOutputs(std::shared_ptr& model) { @@ -535,20 +538,13 @@ void ClassificationModel::prepareInputsOutputs(std::shared_ptr& model append_xai_names(model->outputs(), outputNames); } -std::unique_ptr ClassificationModel::infer(const ImageInputData& inputData) { - auto result = BaseModel::inferImage(inputData); - return std::unique_ptr(static_cast(result.release())); +std::unique_ptr ClassificationModel::infer(const ImageInputData& inputData) { + return BaseModel::inferImage(inputData); } -std::vector> ClassificationModel::inferBatch( +std::vector> ClassificationModel::inferBatch( const std::vector& inputImgs) { - auto results = BaseModel::inferBatchImage(inputImgs); - std::vector> clsResults; - clsResults.reserve(results.size()); - for (auto& result : results) { - clsResults.emplace_back(static_cast(result.release())); - } - return clsResults; + return BaseModel::inferBatchImage(inputImgs); } HierarchicalConfig::HierarchicalConfig(const std::string& json_repr) { diff --git a/src/cpp/models/src/detection_model.cpp b/src/cpp/models/src/detection_model.cpp index 6b55eeba..7f3ea895 100644 --- a/src/cpp/models/src/detection_model.cpp +++ b/src/cpp/models/src/detection_model.cpp @@ -102,17 +102,10 @@ std::unique_ptr DetectionModel::create_model(std::shared_ptr DetectionModel::infer(const ImageInputData& inputData) { - auto result = BaseModel::inferImage(inputData); - return std::unique_ptr(static_cast(result.release())); +std::unique_ptr DetectionModel::infer(const ImageInputData& inputData) { + return BaseModel::inferImage(inputData); } -std::vector> DetectionModel::inferBatch(const std::vector& inputImgs) { - auto results = BaseModel::inferBatchImage(inputImgs); - std::vector> detResults; - detResults.reserve(results.size()); - for (auto& result : results) { - detResults.emplace_back(static_cast(result.release())); - } - return detResults; +std::vector> DetectionModel::inferBatch(const std::vector& inputImgs) { + return BaseModel::inferBatchImage(inputImgs); } diff --git a/src/cpp/models/src/detection_model_ssd.cpp b/src/cpp/models/src/detection_model_ssd.cpp index f6f7c818..0eb968cf 100644 --- a/src/cpp/models/src/detection_model_ssd.cpp +++ b/src/cpp/models/src/detection_model_ssd.cpp @@ -93,30 +93,29 @@ std::shared_ptr ModelSSD::preprocess(const InputData& inputDa return DetectionModel::preprocess(inputData, input); } -std::unique_ptr ModelSSD::postprocess(InferenceResult& infResult) { - std::unique_ptr result = filterOutXai(outputNames).size() > 1 ? postprocessMultipleOutputs(infResult) +std::unique_ptr ModelSSD::postprocess(InferenceResult& infResult) { + std::unique_ptr result = filterOutXai(outputNames).size() > 1 ? postprocessMultipleOutputs(infResult) : postprocessSingleOutput(infResult); - DetectionResult* cls_res = static_cast(result.get()); auto saliency_map_iter = infResult.outputsData.find(saliency_map_name); if (saliency_map_iter != infResult.outputsData.end()) { - cls_res->saliency_map = std::move(saliency_map_iter->second); + result->detection_result->saliency_map = std::move(saliency_map_iter->second); } auto feature_vector_iter = infResult.outputsData.find(feature_vector_name); if (feature_vector_iter != infResult.outputsData.end()) { - cls_res->feature_vector = std::move(feature_vector_iter->second); + result->detection_result->feature_vector = std::move(feature_vector_iter->second); } return result; } -std::unique_ptr ModelSSD::postprocessSingleOutput(InferenceResult& infResult) { +std::unique_ptr ModelSSD::postprocessSingleOutput(InferenceResult& infResult) { const std::vector namesWithoutXai = filterOutXai(outputNames); assert(namesWithoutXai.size() == 1); const ov::Tensor& detectionsTensor = infResult.outputsData[namesWithoutXai[0]]; NumAndStep numAndStep = fromSingleOutput(detectionsTensor.get_shape()); const float* detections = detectionsTensor.data(); - DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); - auto retVal = std::unique_ptr(result); + auto scene = std::make_unique(infResult.frameId, infResult.metaData); + auto result = std::make_unique(infResult.frameId, infResult.metaData); const auto& internalData = infResult.internalModelData->asRef(); float floatInputImgWidth = float(internalData.inputImgWidth), @@ -168,10 +167,11 @@ std::unique_ptr ModelSSD::postprocessSingleOutput(InferenceResult& i } } - return retVal; + scene->detection_result = std::move(result); + return scene; } -std::unique_ptr ModelSSD::postprocessMultipleOutputs(InferenceResult& infResult) { +std::unique_ptr ModelSSD::postprocessMultipleOutputs(InferenceResult& infResult) { const std::vector namesWithoutXai = filterOutXai(outputNames); const float* boxes = infResult.outputsData[namesWithoutXai[0]].data(); NumAndStep numAndStep = fromMultipleOutputs(infResult.outputsData[namesWithoutXai[0]].get_shape()); @@ -179,8 +179,8 @@ std::unique_ptr ModelSSD::postprocessMultipleOutputs(InferenceResult const float* scores = namesWithoutXai.size() > 2 ? infResult.outputsData[namesWithoutXai[2]].data() : nullptr; - DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); - auto retVal = std::unique_ptr(result); + auto scene = std::make_unique(infResult.frameId, infResult.metaData); + auto result = std::make_unique(infResult.frameId, infResult.metaData); const auto& internalData = infResult.internalModelData->asRef(); float floatInputImgWidth = float(internalData.inputImgWidth), @@ -232,7 +232,8 @@ std::unique_ptr ModelSSD::postprocessMultipleOutputs(InferenceResult } } - return retVal; + scene->detection_result = std::move(result); + return scene; } void ModelSSD::prepareInputsOutputs(std::shared_ptr& model) { diff --git a/src/cpp/models/src/detection_model_yolo.cpp b/src/cpp/models/src/detection_model_yolo.cpp index 1698b8e6..49e83f8a 100644 --- a/src/cpp/models/src/detection_model_yolo.cpp +++ b/src/cpp/models/src/detection_model_yolo.cpp @@ -257,8 +257,9 @@ void ModelYolo::prepareInputsOutputs(std::shared_ptr& model) { } } -std::unique_ptr ModelYolo::postprocess(InferenceResult& infResult) { - DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); +std::unique_ptr ModelYolo::postprocess(InferenceResult& infResult) { + auto scene = std::make_unique(infResult.frameId, infResult.metaData); + auto result = std::make_unique(infResult.frameId, infResult.metaData); std::vector objects; // Parsing outputs @@ -308,7 +309,9 @@ std::unique_ptr ModelYolo::postprocess(InferenceResult& infResult) { } } - return std::unique_ptr(result); + scene->detection_result = std::move(result); + + return scene; } void ModelYolo::parseYOLOOutput(const std::string& output_name, @@ -566,7 +569,7 @@ YOLOv5::YOLOv5(std::shared_ptr& adapter) : DetectionModelExt(a init_from_config(adapter->getModelConfig(), ov::AnyMap{}); } -std::unique_ptr YOLOv5::postprocess(InferenceResult& infResult) { +std::unique_ptr YOLOv5::postprocess(InferenceResult& infResult) { if (1 != infResult.outputsData.size()) { throw std::runtime_error("YOLO: expect 1 output"); } @@ -609,8 +612,9 @@ std::unique_ptr YOLOv5::postprocess(InferenceResult& infResult) { } else { keep = multiclass_nms(boxes_with_class, confidences, iou_threshold, includeBoundaries, keep_top_k); } - DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); - auto base = std::unique_ptr(result); + auto scene = std::make_unique(infResult.frameId, infResult.metaData); + auto result = std::make_unique(infResult.frameId, infResult.metaData); + const auto& internalData = infResult.internalModelData->asRef(); float floatInputImgWidth = float(internalData.inputImgWidth), floatInputImgHeight = float(internalData.inputImgHeight); @@ -636,7 +640,9 @@ std::unique_ptr YOLOv5::postprocess(InferenceResult& infResult) { desc.label = getLabelName(desc.labelID); result->objects.push_back(desc); } - return base; + scene->detection_result = std::move(result); + + return scene; } std::string YOLOv8::ModelType = "YOLOv8"; diff --git a/src/cpp/models/src/detection_model_yolov3_onnx.cpp b/src/cpp/models/src/detection_model_yolov3_onnx.cpp index 68830220..f9ccf57a 100644 --- a/src/cpp/models/src/detection_model_yolov3_onnx.cpp +++ b/src/cpp/models/src/detection_model_yolov3_onnx.cpp @@ -116,7 +116,7 @@ float getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd) { } } // namespace -std::unique_ptr ModelYoloV3ONNX::postprocess(InferenceResult& infResult) { +std::unique_ptr ModelYoloV3ONNX::postprocess(InferenceResult& infResult) { // Get info about input image const auto imgWidth = infResult.internalModelData->asRef().inputImgWidth; const auto imgHeight = infResult.internalModelData->asRef().inputImgHeight; @@ -133,7 +133,8 @@ std::unique_ptr ModelYoloV3ONNX::postprocess(InferenceResult& infRes const auto boxShape = boxes.get_shape(); // Generate detection results - DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + auto scene = std::make_unique(infResult.frameId, infResult.metaData); + auto result = std::make_unique(infResult.frameId, infResult.metaData); size_t numberOfBoxes = indicesShape.size() == 3 ? indicesShape[1] : indicesShape[0]; size_t indicesStride = indicesShape.size() == 3 ? indicesShape[2] : indicesShape[1]; @@ -170,5 +171,7 @@ std::unique_ptr ModelYoloV3ONNX::postprocess(InferenceResult& infRes } } - return std::unique_ptr(result); + scene->detection_result = std::move(result); + + return scene; } diff --git a/src/cpp/models/src/detection_model_yolox.cpp b/src/cpp/models/src/detection_model_yolox.cpp index 3c4df1fe..421e4e32 100644 --- a/src/cpp/models/src/detection_model_yolox.cpp +++ b/src/cpp/models/src/detection_model_yolox.cpp @@ -126,7 +126,7 @@ std::shared_ptr ModelYoloX::preprocess(const InputData& input return std::make_shared(origImg.cols, origImg.rows, scale, scale); } -std::unique_ptr ModelYoloX::postprocess(InferenceResult& infResult) { +std::unique_ptr ModelYoloX::postprocess(InferenceResult& infResult) { // Get metadata about input image shape and scale const auto& scale = infResult.internalModelData->asRef(); @@ -136,7 +136,8 @@ std::unique_ptr ModelYoloX::postprocess(InferenceResult& infResult) float* outputPtr = output.data(); // Generate detection results - DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + auto scene = std::make_unique(infResult.frameId, infResult.metaData); + auto result = std::make_unique(infResult.frameId, infResult.metaData); // Update coordinates according to strides for (size_t box_index = 0; box_index < expandedStrides.size(); ++box_index) { @@ -200,5 +201,6 @@ std::unique_ptr ModelYoloX::postprocess(InferenceResult& infResult) result->objects.push_back(obj); } - return std::unique_ptr(result); + scene->detection_result = std::move(result); + return scene; } diff --git a/src/cpp/models/src/instance_segmentation.cpp b/src/cpp/models/src/instance_segmentation.cpp index 384fb057..5af1bf3a 100644 --- a/src/cpp/models/src/instance_segmentation.cpp +++ b/src/cpp/models/src/instance_segmentation.cpp @@ -281,7 +281,7 @@ void MaskRCNNModel::prepareInputsOutputs(std::shared_ptr& model) { append_xai_names(model->outputs(), outputNames); } -std::unique_ptr MaskRCNNModel::postprocess(InferenceResult& infResult) { +std::unique_ptr MaskRCNNModel::postprocess(InferenceResult& infResult) { const auto& internalData = infResult.internalModelData->asRef(); float floatInputImgWidth = float(internalData.inputImgWidth), floatInputImgHeight = float(internalData.inputImgHeight); @@ -300,8 +300,10 @@ std::unique_ptr MaskRCNNModel::postprocess(InferenceResult& infResul size_t objectSize = lbm.boxes.get_shape().back(); float* const masks = lbm.masks.data(); const cv::Size& masks_size{int(lbm.masks.get_shape()[3]), int(lbm.masks.get_shape()[2])}; - InstanceSegmentationResult* result = new InstanceSegmentationResult(infResult.frameId, infResult.metaData); - auto retVal = std::unique_ptr(result); + + auto scene = std::make_unique(infResult.frameId, infResult.metaData); + auto result = std::make_unique(infResult.frameId, infResult.metaData); + std::vector> saliency_maps; bool has_feature_vector_name = std::find(outputNames.begin(), outputNames.end(), feature_vector_name) != outputNames.end(); @@ -355,21 +357,16 @@ std::unique_ptr MaskRCNNModel::postprocess(InferenceResult& infResul if (has_feature_vector_name) { result->feature_vector = std::move(infResult.outputsData[feature_vector_name]); } - return retVal; + + scene->instance_segmentation_result = std::move(result); + return scene; } -std::unique_ptr MaskRCNNModel::infer(const ImageInputData& inputData) { - auto result = BaseModel::inferImage(inputData); - return std::unique_ptr(static_cast(result.release())); +std::unique_ptr MaskRCNNModel::infer(const ImageInputData& inputData) { + return BaseModel::inferImage(inputData); } -std::vector> MaskRCNNModel::inferBatch( +std::vector> MaskRCNNModel::inferBatch( const std::vector& inputImgs) { - auto results = BaseModel::inferBatchImage(inputImgs); - std::vector> isegResults; - isegResults.reserve(results.size()); - for (auto& result : results) { - isegResults.emplace_back(static_cast(result.release())); - } - return isegResults; + return BaseModel::inferBatchImage(inputImgs); } diff --git a/src/cpp/models/src/keypoint_detection.cpp b/src/cpp/models/src/keypoint_detection.cpp index 4fbe778c..9554cbb4 100644 --- a/src/cpp/models/src/keypoint_detection.cpp +++ b/src/cpp/models/src/keypoint_detection.cpp @@ -207,8 +207,9 @@ void KeypointDetectionModel::prepareInputsOutputs(std::shared_ptr& mo } } -std::unique_ptr KeypointDetectionModel::postprocess(InferenceResult& infResult) { - KeypointDetectionResult* result = new KeypointDetectionResult(infResult.frameId, infResult.metaData); +std::unique_ptr KeypointDetectionModel::postprocess(InferenceResult& infResult) { + auto scene = std::make_unique(infResult.frameId, infResult.metaData); + auto result = std::make_unique(infResult.frameId, infResult.metaData); const ov::Tensor& pred_x_tensor = infResult.outputsData.find(outputNames[0])->second; size_t shape_offset = pred_x_tensor.get_shape().size() == 3 ? 1 : 0; @@ -246,21 +247,15 @@ std::unique_ptr KeypointDetectionModel::postprocess(InferenceResult& result->poses.emplace_back( decode_simcc(pred_x_mat, pred_y_mat, {inverted_scale_x, inverted_scale_y}, {pad_left, pad_top}, apply_softmax)); - return std::unique_ptr(result); + scene->keypoint_detection_result = std::move(result); + return scene; } -std::unique_ptr KeypointDetectionModel::infer(const ImageInputData& inputData) { - auto result = BaseModel::inferImage(inputData); - return std::unique_ptr(static_cast(result.release())); +std::unique_ptr KeypointDetectionModel::infer(const ImageInputData& inputData) { + return BaseModel::inferImage(inputData); } -std::vector> KeypointDetectionModel::inferBatch( +std::vector> KeypointDetectionModel::inferBatch( const std::vector& inputImgs) { - auto results = BaseModel::inferBatchImage(inputImgs); - std::vector> kpDetResults; - kpDetResults.reserve(results.size()); - for (auto& result : results) { - kpDetResults.emplace_back(static_cast(result.release())); - } - return kpDetResults; + return BaseModel::inferBatchImage(inputImgs); } diff --git a/src/cpp/models/src/segmentation_model.cpp b/src/cpp/models/src/segmentation_model.cpp index deea1c87..19ad5158 100644 --- a/src/cpp/models/src/segmentation_model.cpp +++ b/src/cpp/models/src/segmentation_model.cpp @@ -219,7 +219,7 @@ void SegmentationModel::prepareInputsOutputs(std::shared_ptr& model) } } -std::unique_ptr SegmentationModel::postprocess(InferenceResult& infResult) { +std::unique_ptr SegmentationModel::postprocess(InferenceResult& infResult) { const auto& inputImgSize = infResult.internalModelData->asRef(); const auto& outputName = outputNames[0] == feature_vector_name ? outputNames[1] : outputNames[0]; const auto& outTensor = infResult.outputsData[outputName]; @@ -259,6 +259,7 @@ std::unique_ptr SegmentationModel::postprocess(InferenceResult& infR 0.0, cv::INTER_NEAREST); + auto scene = std::make_unique(infResult.frameId, infResult.metaData); if (return_soft_prediction) { ImageResultWithSoftPrediction* result = new ImageResultWithSoftPrediction(infResult.frameId, infResult.metaData); @@ -275,12 +276,13 @@ std::unique_ptr SegmentationModel::postprocess(InferenceResult& infR result->saliency_map = get_activation_map(soft_prediction); result->feature_vector = iter->second; } - return std::unique_ptr(result); + scene->image_result = std::unique_ptr(result); + } else { + auto result = std::make_unique(infResult.frameId, infResult.metaData); + result->resultImage = hard_prediction; + scene->image_result = std::move(result); } - - ImageResult* result = new ImageResult(infResult.frameId, infResult.metaData); - result->resultImage = hard_prediction; - return std::unique_ptr(result); + return scene; } std::vector SegmentationModel::getContours(const ImageResultWithSoftPrediction& imageResult) { @@ -315,17 +317,10 @@ std::vector SegmentationModel::getContours(const ImageResultWithSoftPre return combined_contours; } -std::unique_ptr SegmentationModel::infer(const ImageInputData& inputData) { - auto result = BaseModel::inferImage(inputData); - return std::unique_ptr(static_cast(result.release())); +std::unique_ptr SegmentationModel::infer(const ImageInputData& inputData) { + return BaseModel::inferImage(inputData); } -std::vector> SegmentationModel::inferBatch(const std::vector& inputImgs) { - auto results = BaseModel::inferBatchImage(inputImgs); - std::vector> segResults; - segResults.reserve(results.size()); - for (auto& result : results) { - segResults.emplace_back(static_cast(result.release())); - } - return segResults; +std::vector> SegmentationModel::inferBatch(const std::vector& inputImgs) { + return BaseModel::inferBatchImage(inputImgs); } diff --git a/src/cpp/tilers/include/tilers/detection.h b/src/cpp/tilers/include/tilers/detection.h index 8fde112b..07525681 100644 --- a/src/cpp/tilers/include/tilers/detection.h +++ b/src/cpp/tilers/include/tilers/detection.h @@ -15,14 +15,14 @@ class DetectionTiler : public TilerBase { ExecutionMode exec_mode = ExecutionMode::sync); virtual ~DetectionTiler() = default; - virtual std::unique_ptr run(const ImageInputData& inputData); + virtual std::unique_ptr run(const ImageInputData& inputData); protected: - virtual std::unique_ptr postprocess_tile(std::unique_ptr, const cv::Rect&); - virtual std::unique_ptr merge_results(const std::vector>&, + virtual std::unique_ptr postprocess_tile(std::unique_ptr, const cv::Rect&); + virtual std::unique_ptr merge_results(const std::vector>&, const cv::Size&, const std::vector&); - ov::Tensor merge_saliency_maps(const std::vector>&, + ov::Tensor merge_saliency_maps(const std::vector>&, const cv::Size&, const std::vector&); diff --git a/src/cpp/tilers/include/tilers/instance_segmentation.h b/src/cpp/tilers/include/tilers/instance_segmentation.h index 3ca20dcb..43574e82 100644 --- a/src/cpp/tilers/include/tilers/instance_segmentation.h +++ b/src/cpp/tilers/include/tilers/instance_segmentation.h @@ -14,17 +14,17 @@ class InstanceSegmentationTiler : public TilerBase { InstanceSegmentationTiler(std::shared_ptr model, const ov::AnyMap& configuration, ExecutionMode exec_mode = ExecutionMode::sync); - virtual std::unique_ptr run(const ImageInputData& inputData); + virtual std::unique_ptr run(const ImageInputData& inputData); virtual ~InstanceSegmentationTiler() = default; bool postprocess_semantic_masks = true; protected: - virtual std::unique_ptr postprocess_tile(std::unique_ptr, const cv::Rect&); - virtual std::unique_ptr merge_results(const std::vector>&, + virtual std::unique_ptr postprocess_tile(std::unique_ptr, const cv::Rect&); + virtual std::unique_ptr merge_results(const std::vector>&, const cv::Size&, const std::vector&); - std::vector> merge_saliency_maps(const std::vector>&, + std::vector> merge_saliency_maps(const std::vector>&, const cv::Size&, const std::vector&); diff --git a/src/cpp/tilers/include/tilers/semantic_segmentation.h b/src/cpp/tilers/include/tilers/semantic_segmentation.h index 4c9b9d1d..a9e3a951 100644 --- a/src/cpp/tilers/include/tilers/semantic_segmentation.h +++ b/src/cpp/tilers/include/tilers/semantic_segmentation.h @@ -14,12 +14,12 @@ class SemanticSegmentationTiler : public TilerBase { SemanticSegmentationTiler(std::shared_ptr model, const ov::AnyMap& configuration, ExecutionMode exec_mode = ExecutionMode::sync); - virtual std::unique_ptr run(const ImageInputData& inputData); + virtual std::unique_ptr run(const ImageInputData& inputData); virtual ~SemanticSegmentationTiler() = default; protected: - virtual std::unique_ptr postprocess_tile(std::unique_ptr, const cv::Rect&); - virtual std::unique_ptr merge_results(const std::vector>&, + virtual std::unique_ptr postprocess_tile(std::unique_ptr, const cv::Rect&); + virtual std::unique_ptr merge_results(const std::vector>&, const cv::Size&, const std::vector&); diff --git a/src/cpp/tilers/include/tilers/tiler_base.h b/src/cpp/tilers/include/tilers/tiler_base.h index 3fb45d1e..0df812c8 100644 --- a/src/cpp/tilers/include/tilers/tiler_base.h +++ b/src/cpp/tilers/include/tilers/tiler_base.h @@ -27,14 +27,14 @@ class TilerBase { virtual ~TilerBase() = default; protected: - virtual std::unique_ptr run_impl(const ImageInputData& inputData); + virtual std::unique_ptr run_impl(const ImageInputData& inputData); std::vector tile(const cv::Size&); std::vector filter_tiles(const cv::Mat&, const std::vector&); - std::unique_ptr predict_sync(const cv::Mat&, const std::vector&); - std::unique_ptr predict_async(const cv::Mat&, const std::vector&); + std::unique_ptr predict_sync(const cv::Mat&, const std::vector&); + std::unique_ptr predict_async(const cv::Mat&, const std::vector&); cv::Mat crop_tile(const cv::Mat&, const cv::Rect&); - virtual std::unique_ptr postprocess_tile(std::unique_ptr, const cv::Rect&) = 0; - virtual std::unique_ptr merge_results(const std::vector>&, + virtual std::unique_ptr postprocess_tile(std::unique_ptr, const cv::Rect&) = 0; + virtual std::unique_ptr merge_results(const std::vector>&, const cv::Size&, const std::vector&) = 0; diff --git a/src/cpp/tilers/src/detection.cpp b/src/cpp/tilers/src/detection.cpp index ec248664..72052e39 100644 --- a/src/cpp/tilers/src/detection.cpp +++ b/src/cpp/tilers/src/detection.cpp @@ -42,9 +42,9 @@ DetectionTiler::DetectionTiler(const std::shared_ptr& _model, max_pred_number = get_from_any_maps("max_pred_number", configuration, extra_config, max_pred_number); } -std::unique_ptr DetectionTiler::postprocess_tile(std::unique_ptr tile_result, +std::unique_ptr DetectionTiler::postprocess_tile(std::unique_ptr tile_result, const cv::Rect& coord) { - DetectionResult* det_res = static_cast(tile_result.get()); + auto& det_res = tile_result->detection_result; for (auto& det : det_res->objects) { det.x += coord.x; det.y += coord.y; @@ -66,19 +66,18 @@ std::unique_ptr DetectionTiler::postprocess_tile(std::unique_ptr DetectionTiler::merge_results(const std::vector>& tiles_results, +std::unique_ptr DetectionTiler::merge_results(const std::vector>& tiles_results, const cv::Size& image_size, const std::vector& tile_coords) { - DetectionResult* result = new DetectionResult(); - auto retVal = std::unique_ptr(result); + auto result = std::make_unique(); + auto scene = std::make_unique(); std::vector all_detections; std::vector> all_detections_refs; std::vector all_scores; for (const auto& result : tiles_results) { - DetectionResult* det_res = static_cast(result.get()); - for (auto& det : det_res->objects) { + for (auto& det : result->detection_result->objects) { all_detections.emplace_back(det.x, det.y, det.x + det.width, det.y + det.height, det.labelID); all_scores.push_back(det.confidence); all_detections_refs.push_back(det); @@ -93,7 +92,7 @@ std::unique_ptr DetectionTiler::merge_results(const std::vector(tiles_results.begin()->get()); + auto& det_res = tiles_results.begin()->get()->detection_result; if (det_res->feature_vector) { result->feature_vector = ov::Tensor(det_res->feature_vector.get_element_type(), det_res->feature_vector.get_shape()); @@ -110,8 +109,7 @@ std::unique_ptr DetectionTiler::merge_results(const std::vector(result.get()); - const float* current_feature_ptr = det_res->feature_vector.data(); + const float* current_feature_ptr = result->detection_result->feature_vector.data(); for (size_t i = 0; i < feature_size; ++i) { feature_ptr[i] += current_feature_ptr[i]; @@ -123,17 +121,18 @@ std::unique_ptr DetectionTiler::merge_results(const std::vectordetection_result = std::move(result); + + return scene; } -ov::Tensor DetectionTiler::merge_saliency_maps(const std::vector>& tiles_results, +ov::Tensor DetectionTiler::merge_saliency_maps(const std::vector>& tiles_results, const cv::Size& image_size, const std::vector& tile_coords) { std::vector all_saliency_maps; all_saliency_maps.reserve(tiles_results.size()); for (const auto& result : tiles_results) { - auto det_res = static_cast(result.get()); - all_saliency_maps.push_back(det_res->saliency_map); + all_saliency_maps.push_back(result->detection_result->saliency_map); } ov::Tensor image_saliency_map; @@ -219,7 +218,6 @@ ov::Tensor DetectionTiler::merge_saliency_maps(const std::vector DetectionTiler::run(const ImageInputData& inputData) { - auto result = this->run_impl(inputData); - return std::unique_ptr(static_cast(result.release())); +std::unique_ptr DetectionTiler::run(const ImageInputData& inputData) { + return this->run_impl(inputData); } diff --git a/src/cpp/tilers/src/instance_segmentation.cpp b/src/cpp/tilers/src/instance_segmentation.cpp index 211a4761..27454d96 100644 --- a/src/cpp/tilers/src/instance_segmentation.cpp +++ b/src/cpp/tilers/src/instance_segmentation.cpp @@ -49,15 +49,14 @@ InstanceSegmentationTiler::InstanceSegmentationTiler(std::shared_ptr max_pred_number = get_from_any_maps("max_pred_number", configuration, extra_config, max_pred_number); } -std::unique_ptr InstanceSegmentationTiler::run(const ImageInputData& inputData) { +std::unique_ptr InstanceSegmentationTiler::run(const ImageInputData& inputData) { auto setter = MaskRCNNModelParamsSetter(model); - auto result = this->run_impl(inputData); - return std::unique_ptr(static_cast(result.release())); + return this->run_impl(inputData); } -std::unique_ptr InstanceSegmentationTiler::postprocess_tile(std::unique_ptr tile_result, +std::unique_ptr InstanceSegmentationTiler::postprocess_tile(std::unique_ptr tile_result, const cv::Rect& coord) { - auto* iseg_res = static_cast(tile_result.get()); + auto& iseg_res = tile_result->instance_segmentation_result; for (auto& det : iseg_res->segmentedObjects) { det.x += coord.x; det.y += coord.y; @@ -73,20 +72,19 @@ std::unique_ptr InstanceSegmentationTiler::postprocess_tile(std::uni return tile_result; } -std::unique_ptr InstanceSegmentationTiler::merge_results( - const std::vector>& tiles_results, +std::unique_ptr InstanceSegmentationTiler::merge_results( + const std::vector>& tiles_results, const cv::Size& image_size, const std::vector& tile_coords) { - auto* result = new InstanceSegmentationResult(); - auto retVal = std::unique_ptr(result); + auto scene = std::make_unique(); + auto result = std::make_unique(); std::vector all_detections; std::vector> all_detections_ptrs; std::vector all_scores; for (const auto& result : tiles_results) { - auto* iseg_res = static_cast(result.get()); - for (auto& det : iseg_res->segmentedObjects) { + for (auto& det : result->instance_segmentation_result->segmentedObjects) { all_detections.emplace_back(det.x, det.y, det.x + det.width, det.y + det.height, det.labelID); all_scores.push_back(det.confidence); all_detections_ptrs.push_back(det); @@ -107,7 +105,7 @@ std::unique_ptr InstanceSegmentationTiler::merge_results( } if (tiles_results.size()) { - auto* iseg_res = static_cast(tiles_results.begin()->get()); + auto& iseg_res = tiles_results.begin()->get()->instance_segmentation_result; if (iseg_res->feature_vector) { result->feature_vector = ov::Tensor(iseg_res->feature_vector.get_element_type(), iseg_res->feature_vector.get_shape()); @@ -121,7 +119,7 @@ std::unique_ptr InstanceSegmentationTiler::merge_results( std::fill(feature_ptr, feature_ptr + feature_size, 0.f); for (const auto& result : tiles_results) { - auto* iseg_res = static_cast(result.get()); + auto& iseg_res = result->instance_segmentation_result; const float* current_feature_ptr = iseg_res->feature_vector.data(); for (size_t i = 0; i < feature_size; ++i) { @@ -136,17 +134,18 @@ std::unique_ptr InstanceSegmentationTiler::merge_results( result->saliency_map = merge_saliency_maps(tiles_results, image_size, tile_coords); - return retVal; + scene->instance_segmentation_result = std::move(result); + return scene; } std::vector> InstanceSegmentationTiler::merge_saliency_maps( - const std::vector>& tiles_results, + const std::vector>& tiles_results, const cv::Size& image_size, const std::vector& tile_coords) { std::vector>> all_saliecy_maps; all_saliecy_maps.reserve(tiles_results.size()); for (const auto& result : tiles_results) { - auto det_res = static_cast(result.get()); + auto& det_res = result->instance_segmentation_result; all_saliecy_maps.push_back(det_res->saliency_map); } diff --git a/src/cpp/tilers/src/semantic_segmentation.cpp b/src/cpp/tilers/src/semantic_segmentation.cpp index 6a8efc89..ac7ff5b7 100644 --- a/src/cpp/tilers/src/semantic_segmentation.cpp +++ b/src/cpp/tilers/src/semantic_segmentation.cpp @@ -50,15 +50,13 @@ SemanticSegmentationTiler::SemanticSegmentationTiler(std::shared_ptr get_from_any_maps("return_soft_prediction", configuration, extra_config, return_soft_prediction); } -std::unique_ptr SemanticSegmentationTiler::run(const ImageInputData& inputData) { - auto result = this->run_impl(inputData); - return std::unique_ptr( - static_cast(result.release())); +std::unique_ptr SemanticSegmentationTiler::run(const ImageInputData& inputData) { + return this->run_impl(inputData); } -std::unique_ptr SemanticSegmentationTiler::postprocess_tile(std::unique_ptr tile_result, +std::unique_ptr SemanticSegmentationTiler::postprocess_tile(std::unique_ptr tile_result, const cv::Rect&) { - ImageResultWithSoftPrediction* soft = dynamic_cast(tile_result.get()); + ImageResultWithSoftPrediction* soft = dynamic_cast(tile_result->image_result.get()); if (!soft) { throw std::runtime_error( "SemanticSegmentationTiler requires the underlying model to return ImageResultWithSoftPrediction"); @@ -66,22 +64,22 @@ std::unique_ptr SemanticSegmentationTiler::postprocess_tile(std::uni return tile_result; } -std::unique_ptr SemanticSegmentationTiler::merge_results( - const std::vector>& tiles_results, +std::unique_ptr SemanticSegmentationTiler::merge_results( + const std::vector>& tiles_results, const cv::Size& image_size, const std::vector& tile_coords) { if (tiles_results.empty()) { - return std::unique_ptr(new ImageResultWithSoftPrediction()); + return std::make_unique(); } cv::Mat voting_mask(cv::Size(image_size.width, image_size.height), CV_32SC1, cv::Scalar(0)); - auto* sseg_res = static_cast(tiles_results[0].get()); + auto* sseg_res = static_cast(tiles_results[0]->image_result.get()); cv::Mat merged_soft_prediction(cv::Size(image_size.width, image_size.height), CV_32FC(sseg_res->soft_prediction.channels()), cv::Scalar(0)); for (size_t i = 0; i < tiles_results.size(); ++i) { - auto* sseg_res = static_cast(tiles_results[i].get()); + auto* sseg_res = static_cast(tiles_results[i]->image_result.get()); voting_mask(tile_coords[i]) += 1; merged_soft_prediction(tile_coords[i]) += sseg_res->soft_prediction; } @@ -91,16 +89,12 @@ std::unique_ptr SemanticSegmentationTiler::merge_results( cv::Mat hard_prediction = create_hard_prediction_from_soft_prediction(merged_soft_prediction, soft_threshold, blur_strength); - std::unique_ptr retVal; + auto scene = std::make_unique(); + auto result = std::make_unique(); + result->resultImage = hard_prediction; if (return_soft_prediction) { - auto* result = new ImageResultWithSoftPrediction(); - retVal = std::unique_ptr(result); result->soft_prediction = merged_soft_prediction; - result->resultImage = hard_prediction; - } else { - auto* result = new ImageResult(); - retVal = std::unique_ptr(result); - result->resultImage = hard_prediction; } - return retVal; + scene->image_result = std::move(result); + return scene; } diff --git a/src/cpp/tilers/src/tiler_base.cpp b/src/cpp/tilers/src/tiler_base.cpp index 6d979dea..e56ed226 100644 --- a/src/cpp/tilers/src/tiler_base.cpp +++ b/src/cpp/tilers/src/tiler_base.cpp @@ -68,8 +68,8 @@ std::vector TilerBase::filter_tiles(const cv::Mat&, const std::vector< return coords; } -std::unique_ptr TilerBase::predict_sync(const cv::Mat& image, const std::vector& tile_coords) { - std::vector> tile_results; +std::unique_ptr TilerBase::predict_sync(const cv::Mat& image, const std::vector& tile_coords) { + std::vector> tile_results; for (const auto& coord : tile_coords) { auto tile_img = crop_tile(image, coord); @@ -81,7 +81,7 @@ std::unique_ptr TilerBase::predict_sync(const cv::Mat& image, const return merge_results(tile_results, image.size(), tile_coords); } -std::unique_ptr TilerBase::predict_async(const cv::Mat& image, const std::vector& tile_coords) { +std::unique_ptr TilerBase::predict_async(const cv::Mat& image, const std::vector& tile_coords) { std::vector input_data; input_data.reserve(tile_coords.size()); @@ -90,7 +90,7 @@ std::unique_ptr TilerBase::predict_async(const cv::Mat& image, const input_data.push_back(ImageInputData(tile_img.clone())); } - std::vector> tile_results; + std::vector> tile_results; auto tile_predictions = model->inferBatchImage(input_data); for (size_t i = 0; i < tile_predictions.size(); ++i) { auto tile_result = postprocess_tile(std::move(tile_predictions[i]), tile_coords[i]); @@ -103,7 +103,7 @@ cv::Mat TilerBase::crop_tile(const cv::Mat& image, const cv::Rect& coord) { return cv::Mat(image, coord); } -std::unique_ptr TilerBase::run_impl(const ImageInputData& inputData) { +std::unique_ptr TilerBase::run_impl(const ImageInputData& inputData) { auto& image = inputData.inputImage; auto tile_coords = tile(image.size()); tile_coords = filter_tiles(image, tile_coords); diff --git a/tests/cpp/accuracy/test_YOLOv8.cpp b/tests/cpp/accuracy/test_YOLOv8.cpp index c4ee90bd..881812f9 100644 --- a/tests/cpp/accuracy/test_YOLOv8.cpp +++ b/tests/cpp/accuracy/test_YOLOv8.cpp @@ -60,7 +60,7 @@ TEST_P(AccuracySuit, TestDetector) { EXPECT_EQ(ss.str(), string{*cached_model(param.model_name) ->infer(cv::imread(data() + "/coco128/images/train2017/" + param.refpath.stem().string() + - ".jpg"))}); + ".jpg"))->detection_result}); } INSTANTIATE_TEST_SUITE_P(YOLOv8, AccuracySuit, testing::ValuesIn([] { diff --git a/tests/cpp/accuracy/test_accuracy.cpp b/tests/cpp/accuracy/test_accuracy.cpp index 615ebc43..ed5a3a17 100644 --- a/tests/cpp/accuracy/test_accuracy.cpp +++ b/tests/cpp/accuracy/test_accuracy.cpp @@ -159,7 +159,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { throw std::runtime_error{"Failed to read the image"}; } - std::unique_ptr result; + std::unique_ptr result; if (modelData.tiler == "DetectionTiler") { auto tiler = DetectionTiler(std::move(model), {}); if (modelData.input_res.height > 0 && modelData.input_res.width > 0) { @@ -169,7 +169,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { } else { result = model->infer(image); } - EXPECT_EQ(std::string{*result}, modelData.testData[i].reference[0]); + EXPECT_EQ(std::string{*result->detection_result}, modelData.testData[i].reference[0]); } } } else if (modelData.type == "ClassificationModel") { @@ -183,7 +183,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { throw std::runtime_error{"Failed to read the image"}; } auto result = model->infer(image); - EXPECT_EQ(std::string{*result}, modelData.testData[i].reference[0]); + EXPECT_EQ(std::string{*result->classification_result}, modelData.testData[i].reference[0]); } } } else if (modelData.type == "SegmentationModel") { @@ -197,7 +197,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { throw std::runtime_error{"Failed to read the image"}; } - std::unique_ptr pred; + std::unique_ptr pred; if (modelData.tiler == "SemanticSegmentationTiler") { auto tiler = SemanticSegmentationTiler(std::move(model), {}); if (modelData.input_res.height > 0 && modelData.input_res.width > 0) { @@ -208,7 +208,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { pred = model->infer(image); } - ImageResultWithSoftPrediction* soft = dynamic_cast(pred.get()); + ImageResultWithSoftPrediction* soft = dynamic_cast(pred->image_result.get()); if (soft) { const std::vector& contours = model->getContours(*soft); std::stringstream ss; @@ -218,7 +218,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { } ASSERT_EQ(ss.str(), modelData.testData[i].reference[0]); } else { - ASSERT_EQ(std::string{*pred}, modelData.testData[i].reference[0]); + ASSERT_EQ(std::string{*pred->image_result}, modelData.testData[i].reference[0]); } } } @@ -233,7 +233,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { throw std::runtime_error{"Failed to read the image"}; } - std::unique_ptr result; + std::unique_ptr result; if (modelData.tiler == "InstanceSegmentationTiler") { auto tiler = InstanceSegmentationTiler(std::move(model), {}); if (modelData.input_res.height > 0 && modelData.input_res.width > 0) { @@ -245,20 +245,20 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { } const std::vector& withRects = - add_rotated_rects(result->segmentedObjects); + add_rotated_rects(result->instance_segmentation_result->segmentedObjects); std::stringstream ss; for (const SegmentedObjectWithRects& obj : withRects) { ss << obj << "; "; } size_t filled = 0; - for (const cv::Mat_& cls_map : result->saliency_map) { + for (const cv::Mat_& cls_map : result->instance_segmentation_result->saliency_map) { if (cls_map.data) { ++filled; } } ss << filled << "; "; try { - ss << result->feature_vector.get_shape(); + ss << result->instance_segmentation_result->feature_vector.get_shape(); } catch (ov::Exception&) { ss << "[0]"; } @@ -266,7 +266,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { try { // getContours() assumes each instance generates only one contour. // That doesn't hold for some models - for (const Contour& contour : getContours(result->segmentedObjects)) { + for (const Contour& contour : getContours(result->instance_segmentation_result->segmentedObjects)) { ss << contour << "; "; } } catch (const std::runtime_error&) { @@ -285,7 +285,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { throw std::runtime_error{"Failed to read the image"}; } auto result = model->infer(image); - EXPECT_EQ(std::string{*result}, modelData.testData[i].reference[0]); + EXPECT_EQ(std::string{*result->anomaly_result}, modelData.testData[i].reference[0]); } } } else if (modelData.type == "KeypointDetectionModel") { @@ -303,7 +303,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { throw std::runtime_error{"Failed to read the image"}; } auto result = model->infer(image); - EXPECT_EQ(std::string{(*result).poses[0]}, modelData.testData[i].reference[0]); + EXPECT_EQ(std::string{(*result->keypoint_detection_result).poses[0]}, modelData.testData[i].reference[0]); } } } From 110886bf6fb198e9a23f39bc7091d0066174f436 Mon Sep 17 00:00:00 2001 From: Ronald Hecker Date: Thu, 24 Apr 2025 15:32:15 +0200 Subject: [PATCH 02/16] Rework Detection to new scene system Refactored quite a bit for the tiler since saliency maps are cv::Mats now. --- .../include/models/detection_model_yolo.h | 5 +- src/cpp/models/include/models/results.h | 66 ++++++- src/cpp/models/src/detection_model_ssd.cpp | 78 ++++---- src/cpp/models/src/detection_model_yolo.cpp | 59 +++--- .../src/detection_model_yolov3_onnx.cpp | 10 +- src/cpp/models/src/detection_model_yolox.cpp | 12 +- src/cpp/tilers/include/tilers/detection.h | 2 +- src/cpp/tilers/src/detection.cpp | 173 ++++++------------ tests/cpp/accuracy/test_YOLOv8.cpp | 2 +- tests/cpp/accuracy/test_accuracy.cpp | 2 +- 10 files changed, 199 insertions(+), 210 deletions(-) diff --git a/src/cpp/models/include/models/detection_model_yolo.h b/src/cpp/models/include/models/detection_model_yolo.h index 40ffad20..491ee50b 100644 --- a/src/cpp/models/include/models/detection_model_yolo.h +++ b/src/cpp/models/include/models/detection_model_yolo.h @@ -16,7 +16,6 @@ #include "models/detection_model_ext.h" -struct DetectedObject; struct InferenceResult; class ModelYolo : public DetectionModelExt { @@ -56,10 +55,10 @@ class ModelYolo : public DetectionModelExt { const unsigned long resized_im_w, const unsigned long original_im_h, const unsigned long original_im_w, - std::vector& objects); + std::vector& objects); static int calculateEntryIndex(int entriesNum, int lcoords, size_t lclasses, int location, int entry); - static double intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2); + static double intersectionOverUnion(const Box& o1, const Box& o2); std::map regions; float iou_threshold; diff --git a/src/cpp/models/include/models/results.h b/src/cpp/models/include/models/results.h index d87849b5..6000698d 100644 --- a/src/cpp/models/include/models/results.h +++ b/src/cpp/models/include/models/results.h @@ -379,6 +379,42 @@ struct KeypointDetectionResult : public ResultBase { std::vector poses; }; +class Label { +public: + Label(std::string id, std::string name, float score): id(id), name(name), score(score) {} + + std::string id; + std::string name; + float score; + + friend std::ostream& operator<< (std::ostream& os, const Label& label) { + return os << label.id << " (" << label.name << "): " << std::fixed << std::setprecision(3) << label.score << "; "; + } +}; + +class Box { +public: + Box(cv::Rect shape, std::vector