Remove input shape as input for the tasks

RHeckerIntel · RHeckerIntel · commit 61e53f54a60b · 2025-06-17T06:47:36.000+02:00
orig_width and height are stored in rt_info anyway.
Serialization is a separate step and it _should_ output the same model
diff --git a/src/cpp/include/tasks/classification.h b/src/cpp/include/tasks/classification.h
@@ -19,9 +19,8 @@ class Classification {
     std::shared_ptr<InferenceAdapter> adapter;
     VisionPipeline<ClassificationResult> pipeline;
 
-    Classification(std::shared_ptr<InferenceAdapter> adapter, cv::Size input_shape)
-        : adapter(adapter),
-          input_shape(input_shape) {
+    Classification(std::shared_ptr<InferenceAdapter> adapter)
+        : adapter(adapter) {
         pipeline = VisionPipeline<ClassificationResult>(
             adapter,
             [&](cv::Mat image) {
@@ -56,7 +55,7 @@ class Classification {
         }
     }
 
-    static cv::Size serialize(std::shared_ptr<ov::Model>& ov_model);
+    static void serialize(std::shared_ptr<ov::Model>& ov_model);
     static Classification load(const std::string& model_path);
 
     ClassificationResult infer(cv::Mat image);
diff --git a/src/cpp/include/tasks/detection/ssd.h b/src/cpp/include/tasks/detection/ssd.h
@@ -28,15 +28,17 @@ class SSD {
 public:
     std::shared_ptr<InferenceAdapter> adapter;
 
-    SSD(std::shared_ptr<InferenceAdapter> adapter, cv::Size input_shape) : adapter(adapter), input_shape(input_shape) {
+    SSD(std::shared_ptr<InferenceAdapter> adapter) : adapter(adapter), input_shape(input_shape) {
         auto config = adapter->getModelConfig();
         labels = utils::get_from_any_maps("labels", config, {}, labels);
         confidence_threshold = utils::get_from_any_maps("confidence_threshold", config, {}, confidence_threshold);
+        input_shape.width = utils::get_from_any_maps("orig_width", config, {}, input_shape.width);
+        input_shape.height = utils::get_from_any_maps("orig_height", config, {}, input_shape.height);
     }
     std::map<std::string, ov::Tensor> preprocess(cv::Mat);
     DetectionResult postprocess(InferenceResult& infResult);
 
-    static cv::Size serialize(std::shared_ptr<ov::Model> ov_model);
+    static void serialize(std::shared_ptr<ov::Model> ov_model);
 
     SSDOutputMode output_mode;
 
diff --git a/src/cpp/include/tasks/instance_segmentation.h b/src/cpp/include/tasks/instance_segmentation.h
@@ -18,9 +18,8 @@ class InstanceSegmentation {
     std::shared_ptr<InferenceAdapter> adapter;
     VisionPipeline<InstanceSegmentationResult> pipeline;
 
-    InstanceSegmentation(std::shared_ptr<InferenceAdapter> adapter, cv::Size input_shape)
-        : adapter(adapter),
-          input_shape(input_shape) {
+    InstanceSegmentation(std::shared_ptr<InferenceAdapter> adapter)
+        : adapter(adapter) {
         pipeline = VisionPipeline<InstanceSegmentationResult>(
             adapter,
             [&](cv::Mat image) {
@@ -33,9 +32,11 @@ class InstanceSegmentation {
         auto config = adapter->getModelConfig();
         labels = utils::get_from_any_maps("labels", config, {}, labels);
         confidence_threshold = utils::get_from_any_maps("confidence_threshold", config, {}, confidence_threshold);
+        input_shape.width = utils::get_from_any_maps("orig_width", config, {}, input_shape.width);
+        input_shape.height = utils::get_from_any_maps("orig_height", config, {}, input_shape.width);
     }
 
-    static cv::Size serialize(std::shared_ptr<ov::Model>& ov_model);
+    static void serialize(std::shared_ptr<ov::Model>& ov_model);
     static InstanceSegmentation load(const std::string& model_path);
 
     InstanceSegmentationResult infer(cv::Mat image);
diff --git a/src/cpp/include/tasks/semantic_segmentation.h b/src/cpp/include/tasks/semantic_segmentation.h
@@ -33,7 +33,7 @@ class SemanticSegmentation {
         blur_strength = utils::get_from_any_maps("blur_strength", config, {}, blur_strength);
     }
 
-    static cv::Size serialize(std::shared_ptr<ov::Model>& ov_model);
+    static void serialize(std::shared_ptr<ov::Model>& ov_model);
     static SemanticSegmentation load(const std::string& model_path);
 
     std::map<std::string, ov::Tensor> preprocess(cv::Mat);
diff --git a/src/cpp/src/tasks/classification.cpp b/src/cpp/src/tasks/classification.cpp
@@ -82,7 +82,7 @@ std::vector<size_t> get_non_xai_output_indices(const std::vector<ov::Output<ov::
 }
 }  // namespace
 
-cv::Size Classification::serialize(std::shared_ptr<ov::Model>& ov_model) {
+void Classification::serialize(std::shared_ptr<ov::Model>& ov_model) {
     // --------------------------- Configure input & output -------------------------------------------------
     // --------------------------- Prepare input  ------------------------------------------------------
     auto config = ov_model->has_rt_info("model_info") ? ov_model->get_rt_info<ov::AnyMap>("model_info") : ov::AnyMap{};
@@ -171,7 +171,9 @@ cv::Size Classification::serialize(std::shared_ptr<ov::Model>& ov_model) {
     if (multiclass) {
         addOrFindSoftmaxAndTopkOutputs(ov_model, topk, output_raw_scores);
     }
-    return cv::Size(input_shape[0], input_shape[1]);
+
+    ov_model->set_rt_info(input_shape[0], "model_info", "orig_width");
+    ov_model->set_rt_info(input_shape[1], "model_info", "orig_height");
 }
 
 Classification Classification::load(const std::string& model_path) {
@@ -185,16 +187,14 @@ Classification Classification::load(const std::string& model_path) {
         throw std::runtime_error("Incorrect or unsupported model_type");
     }
 
-    cv::Size origin_input_shape;
     if (utils::model_has_embedded_processing(model)) {
         std::cout << "model already was serialized" << std::endl;
-        origin_input_shape = utils::get_input_shape_from_model_info(model);
     } else {
-        origin_input_shape = Classification::serialize(model);
+        Classification::serialize(model);
     }
     auto adapter = std::make_shared<OpenVINOInferenceAdapter>();
     adapter->loadModel(model, core, "AUTO");
-    return Classification(adapter, origin_input_shape);
+    return Classification(adapter);
 }
 
 ClassificationResult Classification::infer(cv::Mat image) {
diff --git a/src/cpp/src/tasks/detection.cpp b/src/cpp/src/tasks/detection.cpp
@@ -22,16 +22,15 @@ DetectionModel DetectionModel::load(const std::string& model_path, const ov::Any
         throw std::runtime_error("Incorrect or unsupported model_type");
     }
 
-    cv::Size origin_input_shape;
     if (utils::model_has_embedded_processing(model)) {
         std::cout << "model already was serialized" << std::endl;
-        origin_input_shape = utils::get_input_shape_from_model_info(model);
+        //utils::get_input_shape_from_model_info(model);
     } else {
-        origin_input_shape = SSD::serialize(model);
+        SSD::serialize(model);
     }
     auto adapter = std::make_shared<OpenVINOInferenceAdapter>();
     adapter->loadModel(model, core, "AUTO");
-    return DetectionModel(std::make_unique<SSD>(adapter, origin_input_shape), configuration);
+    return DetectionModel(std::make_unique<SSD>(adapter), configuration);
 }
 
 InferenceInput DetectionModel::preprocess(cv::Mat image) {
diff --git a/src/cpp/src/tasks/detection/ssd.cpp b/src/cpp/src/tasks/detection/ssd.cpp
@@ -67,7 +67,7 @@ std::map<std::string, ov::Tensor> SSD::preprocess(cv::Mat image) {
     return input;
 }
 
-cv::Size SSD::serialize(std::shared_ptr<ov::Model> ov_model) {
+void SSD::serialize(std::shared_ptr<ov::Model> ov_model) {
     auto output_mode = ov_model->outputs().size() > 1 ? SSDOutputMode::multi : SSDOutputMode::single;
 
     auto input_tensor = ov_model->inputs()[0];
@@ -111,7 +111,9 @@ cv::Size SSD::serialize(std::shared_ptr<ov::Model> ov_model) {
         // prepareMultipleOutputs(ov_model); //This does nothing from what I can see.
     }
 
-    return cv::Size(input_shape[0], input_shape[1]);
+    
+    ov_model->set_rt_info(input_shape[0], "model_info", "orig_width");
+    ov_model->set_rt_info(input_shape[1], "model_info", "orig_height");
 }
 
 void SSD::prepareSingleOutput(std::shared_ptr<ov::Model> ov_model) {
diff --git a/src/cpp/src/tasks/instance_segmentation.cpp b/src/cpp/src/tasks/instance_segmentation.cpp
@@ -121,7 +121,7 @@ cv::Mat segm_postprocess(const SegmentedObject& box, const cv::Mat& unpadded, in
     return im_mask;
 }
 
-cv::Size InstanceSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {
+void InstanceSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {
     if (ov_model->inputs().size() != 1) {
         throw std::logic_error("MaskRCNNModel model wrapper supports topologies with only 1 input");
     }
@@ -145,6 +145,7 @@ cv::Size InstanceSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {
     mean_values = utils::get_from_any_maps("mean_values", config, ov::AnyMap{}, mean_values);
     uint8_t pad_value = 0;
     bool reverse_input_channels = false;
+    reverse_input_channels = utils::get_from_any_maps("reverse_input_channels", config, ov::AnyMap{}, reverse_input_channels);
 
     ov_model = utils::embedProcessing(
         ov_model,
@@ -180,7 +181,8 @@ cv::Size InstanceSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {
                                saliency_map_name + ", " + feature_vector_name + " and 3 or 4 other outputs");
     }
 
-    return input_shape;
+    ov_model->set_rt_info(input_shape.width, "model_info", "orig_width");
+    ov_model->set_rt_info(input_shape.height, "model_info", "orig_height");
 }
 
 InstanceSegmentation InstanceSegmentation::load(const std::string& model_path) {
@@ -194,16 +196,14 @@ InstanceSegmentation InstanceSegmentation::load(const std::string& model_path) {
         throw std::runtime_error("Incorrect or unsupported model_type");
     }
 
-    cv::Size origin_input_shape;
     if (utils::model_has_embedded_processing(model)) {
         std::cout << "model already was serialized" << std::endl;
-        origin_input_shape = utils::get_input_shape_from_model_info(model);
     } else {
-        origin_input_shape = serialize(model);
+        serialize(model);
     }
     auto adapter = std::make_shared<OpenVINOInferenceAdapter>();
     adapter->loadModel(model, core, "AUTO");
-    return InstanceSegmentation(adapter, origin_input_shape);
+    return InstanceSegmentation(adapter);
 }
 
 InstanceSegmentationResult InstanceSegmentation::infer(cv::Mat image) {
diff --git a/src/cpp/src/tasks/semantic_segmentation.cpp b/src/cpp/src/tasks/semantic_segmentation.cpp
@@ -31,19 +31,17 @@ SemanticSegmentation SemanticSegmentation::load(const std::string& model_path) {
         throw std::runtime_error("Incorrect or unsupported model_type");
     }
 
-    cv::Size origin_input_shape;
     if (utils::model_has_embedded_processing(model)) {
         std::cout << "model already was serialized" << std::endl;
-        origin_input_shape = utils::get_input_shape_from_model_info(model);
     } else {
-        origin_input_shape = SemanticSegmentation::serialize(model);
+        SemanticSegmentation::serialize(model);
     }
     auto adapter = std::make_shared<OpenVINOInferenceAdapter>();
     adapter->loadModel(model, core, "AUTO");
     return SemanticSegmentation(adapter);
 }
 
-cv::Size SemanticSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {
+void SemanticSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {
     if (ov_model->inputs().size() != 1) {
         throw std::logic_error("Segmentation model wrapper supports topologies with only 1 input");
     }
@@ -90,14 +88,13 @@ cv::Size SemanticSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {
     scale_values = utils::get_from_any_maps("scale_values", config, ov::AnyMap{}, scale_values);
     mean_values = utils::get_from_any_maps("mean_values", config, ov::AnyMap{}, mean_values);
 
-    auto input_shape = ov::Shape{shape[ov::layout::width_idx(layout)], shape[ov::layout::height_idx(layout)]};
 
     ov_model = utils::embedProcessing(ov_model,
                                       input.get_any_name(),
                                       layout,
                                       resize_mode,
                                       interpolation_mode,
-                                      input_shape,
+                                      ov::Shape{shape[ov::layout::width_idx(layout)], shape[ov::layout::height_idx(layout)]},
                                       pad_value,
                                       reverse_input_channels,
                                       mean_values,
@@ -115,7 +112,10 @@ cv::Size SemanticSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {
     }
     ov_model = ppp.build();
 
-    return cv::Size(input_shape[0], input_shape[1]);
+    cv::Size input_shape(shape[ov::layout::width_idx(layout)],
+                         shape[ov::layout::height_idx(layout)]);
+    ov_model->set_rt_info(input_shape.width, "model_info", "orig_width");
+    ov_model->set_rt_info(input_shape.height, "model_info", "orig_height");
 }
 
 std::map<std::string, ov::Tensor> SemanticSegmentation::preprocess(cv::Mat image) {
@@ -162,7 +162,6 @@ SemanticSegmentationResult SemanticSegmentation::postprocess(InferenceResult& in
     SemanticSegmentationResult result;
     result.resultImage = hard_prediction;
     if (return_soft_prediction) {
-        std::cout << " got a soft prediction..." << std::endl;
         cv::resize(soft_prediction, soft_prediction, infResult.inputImageSize, 0.0, 0.0, cv::INTER_NEAREST);
         result.soft_prediction = soft_prediction;
         auto iter = infResult.data.find(feature_vector_name);
@@ -227,7 +226,6 @@ cv::Mat SemanticSegmentation::create_hard_prediction_from_soft_prediction(cv::Ma
 
     bool applyBlurAndSoftThreshold = (blur_strength > -1 && soft_threshold < std::numeric_limits<float>::infinity());
     if (applyBlurAndSoftThreshold) {
-        std::cout << "applying blur and soft threshold:  " << blur_strength << std::endl;
         cv::blur(soft_prediction_blurred, soft_prediction_blurred, cv::Size{blur_strength, blur_strength});
     }
 

Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@ class SemanticSegmentation {`
`33`	`33`	`blur_strength = utils::get_from_any_maps("blur_strength", config, {}, blur_strength);`
`34`	`34`	`}`
`35`	`35`
`36`		`- static cv::Size serialize(std::shared_ptr<ov::Model>& ov_model);`
	`36`	`+ static void serialize(std::shared_ptr<ov::Model>& ov_model);`
`37`	`37`	`static SemanticSegmentation load(const std::string& model_path);`
`38`	`38`
`39`	`39`	`std::map<std::string, ov::Tensor> preprocess(cv::Mat);`
Original file line number	Diff line number	Diff line change
`@@ -82,7 +82,7 @@ std::vector<size_t> get_non_xai_output_indices(const std::vector<ov::Output<ov::`
`82`	`82`	`}`
`83`	`83`	`} // namespace`
`84`	`84`
`85`		`-cv::Size Classification::serialize(std::shared_ptr<ov::Model>& ov_model) {`
	`85`	`+void Classification::serialize(std::shared_ptr<ov::Model>& ov_model) {`
`86`	`86`	`// --------------------------- Configure input & output -------------------------------------------------`
`87`	`87`	`// --------------------------- Prepare input ------------------------------------------------------`
`88`	`88`	`auto config = ov_model->has_rt_info("model_info") ? ov_model->get_rt_info<ov::AnyMap>("model_info") : ov::AnyMap{};`
`@@ -171,7 +171,9 @@ cv::Size Classification::serialize(std::shared_ptr<ov::Model>& ov_model) {`
`171`	`171`	`if (multiclass) {`
`172`	`172`	`addOrFindSoftmaxAndTopkOutputs(ov_model, topk, output_raw_scores);`
`173`	`173`	`}`
`174`		`- return cv::Size(input_shape[0], input_shape[1]);`
	`174`	`+`
	`175`	`+ ov_model->set_rt_info(input_shape[0], "model_info", "orig_width");`
	`176`	`+ ov_model->set_rt_info(input_shape[1], "model_info", "orig_height");`
`175`	`177`	`}`
`176`	`178`
`177`	`179`	`Classification Classification::load(const std::string& model_path) {`
`@@ -185,16 +187,14 @@ Classification Classification::load(const std::string& model_path) {`
`185`	`187`	`throw std::runtime_error("Incorrect or unsupported model_type");`
`186`	`188`	`}`
`187`	`189`
`188`		`- cv::Size origin_input_shape;`
`189`	`190`	`if (utils::model_has_embedded_processing(model)) {`
`190`	`191`	`std::cout << "model already was serialized" << std::endl;`
`191`		`- origin_input_shape = utils::get_input_shape_from_model_info(model);`
`192`	`192`	`} else {`
`193`		`- origin_input_shape = Classification::serialize(model);`
	`193`	`+ Classification::serialize(model);`
`194`	`194`	`}`
`195`	`195`	`auto adapter = std::make_shared<OpenVINOInferenceAdapter>();`
`196`	`196`	`adapter->loadModel(model, core, "AUTO");`
`197`		`- return Classification(adapter, origin_input_shape);`
	`197`	`+ return Classification(adapter);`
`198`	`198`	`}`
`199`	`199`
`200`	`200`	`ClassificationResult Classification::infer(cv::Mat image) {`
Original file line number	Diff line number	Diff line change
`@@ -22,16 +22,15 @@ DetectionModel DetectionModel::load(const std::string& model_path, const ov::Any`
`22`	`22`	`throw std::runtime_error("Incorrect or unsupported model_type");`
`23`	`23`	`}`
`24`	`24`
`25`		`- cv::Size origin_input_shape;`
`26`	`25`	`if (utils::model_has_embedded_processing(model)) {`
`27`	`26`	`std::cout << "model already was serialized" << std::endl;`
`28`		`- origin_input_shape = utils::get_input_shape_from_model_info(model);`
	`27`	`+ //utils::get_input_shape_from_model_info(model);`
`29`	`28`	`} else {`
`30`		`- origin_input_shape = SSD::serialize(model);`
	`29`	`+ SSD::serialize(model);`
`31`	`30`	`}`
`32`	`31`	`auto adapter = std::make_shared<OpenVINOInferenceAdapter>();`
`33`	`32`	`adapter->loadModel(model, core, "AUTO");`
`34`		`- return DetectionModel(std::make_unique<SSD>(adapter, origin_input_shape), configuration);`
	`33`	`+ return DetectionModel(std::make_unique<SSD>(adapter), configuration);`
`35`	`34`	`}`
`36`	`35`
`37`	`36`	`InferenceInput DetectionModel::preprocess(cv::Mat image) {`
Original file line number	Diff line number	Diff line change
`@@ -67,7 +67,7 @@ std::map<std::string, ov::Tensor> SSD::preprocess(cv::Mat image) {`
`67`	`67`	`return input;`
`68`	`68`	`}`
`69`	`69`
`70`		`-cv::Size SSD::serialize(std::shared_ptr<ov::Model> ov_model) {`
	`70`	`+void SSD::serialize(std::shared_ptr<ov::Model> ov_model) {`
`71`	`71`	`auto output_mode = ov_model->outputs().size() > 1 ? SSDOutputMode::multi : SSDOutputMode::single;`
`72`	`72`
`73`	`73`	`auto input_tensor = ov_model->inputs()[0];`
`@@ -111,7 +111,9 @@ cv::Size SSD::serialize(std::shared_ptr<ov::Model> ov_model) {`
`111`	`111`	`// prepareMultipleOutputs(ov_model); //This does nothing from what I can see.`
`112`	`112`	`}`
`113`	`113`
`114`		`- return cv::Size(input_shape[0], input_shape[1]);`
	`114`	`+`
	`115`	`+ ov_model->set_rt_info(input_shape[0], "model_info", "orig_width");`
	`116`	`+ ov_model->set_rt_info(input_shape[1], "model_info", "orig_height");`
`115`	`117`	`}`
`116`	`118`
`117`	`119`	`void SSD::prepareSingleOutput(std::shared_ptr<ov::Model> ov_model) {`
Original file line number	Diff line number	Diff line change
`@@ -121,7 +121,7 @@ cv::Mat segm_postprocess(const SegmentedObject& box, const cv::Mat& unpadded, in`
`121`	`121`	`return im_mask;`
`122`	`122`	`}`
`123`	`123`
`124`		`-cv::Size InstanceSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {`
	`124`	`+void InstanceSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {`
`125`	`125`	`if (ov_model->inputs().size() != 1) {`
`126`	`126`	`throw std::logic_error("MaskRCNNModel model wrapper supports topologies with only 1 input");`
`127`	`127`	`}`
`@@ -145,6 +145,7 @@ cv::Size InstanceSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {`
`145`	`145`	`mean_values = utils::get_from_any_maps("mean_values", config, ov::AnyMap{}, mean_values);`
`146`	`146`	`uint8_t pad_value = 0;`
`147`	`147`	`bool reverse_input_channels = false;`
	`148`	`+ reverse_input_channels = utils::get_from_any_maps("reverse_input_channels", config, ov::AnyMap{}, reverse_input_channels);`
`148`	`149`
`149`	`150`	`ov_model = utils::embedProcessing(`
`150`	`151`	`ov_model,`
`@@ -180,7 +181,8 @@ cv::Size InstanceSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {`
`180`	`181`	`saliency_map_name + ", " + feature_vector_name + " and 3 or 4 other outputs");`
`181`	`182`	`}`
`182`	`183`
`183`		`- return input_shape;`
	`184`	`+ ov_model->set_rt_info(input_shape.width, "model_info", "orig_width");`
	`185`	`+ ov_model->set_rt_info(input_shape.height, "model_info", "orig_height");`
`184`	`186`	`}`
`185`	`187`
`186`	`188`	`InstanceSegmentation InstanceSegmentation::load(const std::string& model_path) {`
`@@ -194,16 +196,14 @@ InstanceSegmentation InstanceSegmentation::load(const std::string& model_path) {`
`194`	`196`	`throw std::runtime_error("Incorrect or unsupported model_type");`
`195`	`197`	`}`
`196`	`198`
`197`		`- cv::Size origin_input_shape;`
`198`	`199`	`if (utils::model_has_embedded_processing(model)) {`
`199`	`200`	`std::cout << "model already was serialized" << std::endl;`
`200`		`- origin_input_shape = utils::get_input_shape_from_model_info(model);`
`201`	`201`	`} else {`
`202`		`- origin_input_shape = serialize(model);`
	`202`	`+ serialize(model);`
`203`	`203`	`}`
`204`	`204`	`auto adapter = std::make_shared<OpenVINOInferenceAdapter>();`
`205`	`205`	`adapter->loadModel(model, core, "AUTO");`
`206`		`- return InstanceSegmentation(adapter, origin_input_shape);`
	`206`	`+ return InstanceSegmentation(adapter);`
`207`	`207`	`}`
`208`	`208`
`209`	`209`	`InstanceSegmentationResult InstanceSegmentation::infer(cv::Mat image) {`