embedded-dev-research · aobolensk · Dec 24, 2025 · Dec 9, 2025 · Dec 10, 2025 · Dec 10, 2025
@@ -66,8 +66,8 @@ void build_graph_linear(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input,
 
       it_lab_ai::Tensor tmp_values = tensor;
       it_lab_ai::Tensor tmp_bias = it_lab_ai::make_tensor(tensor.get_bias());
-      auto conv_layer = std::make_shared<it_lab_ai::ConvolutionalLayer>(
-          1, pads, 1, tmp_values, tmp_bias, 1, true);
+      auto conv_layer = it_lab_ai::LayerFactory::createConvLayer(
+          options, 1, pads, 1, tmp_values, tmp_bias, 1, true);
       layers.push_back(conv_layer);
       layerpostop.push_back(false);
       if (comments) std::cout << "ConvLayer added to layers." << '\n';
@@ -367,12 +367,10 @@ ParseResult parse_json_model(RuntimeOptions options,
           }
         }
 
-        it_lab_ai::Tensor tmp_tensor = tensor;
-
+        it_lab_ai::Tensor& tmp_tensor = tensor;
         it_lab_ai::Tensor tmp_bias = it_lab_ai::make_tensor(tensor.get_bias());
-
-        auto conv_layer = std::make_shared<it_lab_ai::ConvolutionalLayer>(
-            stride, pads, dilations, tmp_tensor, tmp_bias, group);
+        auto conv_layer = it_lab_ai::LayerFactory::createConvLayer(
+            options, stride, pads, dilations, tmp_tensor, tmp_bias, group);
         layer = conv_layer;
       } else if (layer_type.find("Relu") != std::string::npos ||
                  layer_type.find("relu") != std::string::npos) {

@@ -32,6 +32,7 @@
 #include "layers/SplitLayer.hpp"
 #include "layers/Tensor.hpp"
 #include "layers/TransposeLayer.hpp"
+#include "layers_oneDNN/ConvLayer.hpp"
 #include "layers_oneDNN/EWLayer.hpp"
 
 extern std::unordered_map<std::string, std::string> model_paths;
@@ -86,6 +87,18 @@ class LayerFactory {
     }
     return std::make_shared<EWLayer>(function, alpha, beta);
   }
+
+  static std::shared_ptr<Layer> createConvLayer(
+      const RuntimeOptions& options, size_t step, size_t pads, size_t dilations,
+      const Tensor& kernel, const Tensor& bias = Tensor(), size_t group = 1,
+      bool useLegacyImpl = false) {
+    if (options.backend == Backend::kOneDnn) {
+      return std::make_shared<ConvLayerOneDnn>(step, pads, dilations, kernel,
+                                               bias, group, useLegacyImpl);
+    }
+    return std::make_shared<ConvolutionalLayer>(step, pads, dilations, kernel,
+                                                bias, group, useLegacyImpl);
+  }
 };
 
 }  // namespace it_lab_ai
@@ -0,0 +1,113 @@
+#pragma once
+
+#include <dnnl.hpp>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "layers/Layer.hpp"
+
+namespace it_lab_ai {
+
+class ConvLayerOneDnn : public Layer {
+ public:
+  ConvLayerOneDnn()
+      : Layer(kConvolution),
+        stride_(1),
+        pads_(0),
+        dilations_(1),
+        group_(1),
+        use_legacy_(false) {}
+
+  ConvLayerOneDnn(size_t stride, size_t pads, size_t dilations,
+                  const Tensor& kernel, const Tensor& bias = Tensor(),
+                  size_t group = 1, bool use_legacy = false)
+      : Layer(kConvolution),
+        stride_(stride),
+        pads_(pads),
+        dilations_(dilations),
+        kernel_(kernel),
+        bias_(bias),
+        group_(group),
+        use_legacy_(use_legacy) {}
+
+  void run(const std::vector<Tensor>& input,
+           std::vector<Tensor>& output) override;
+
+#ifdef ENABLE_STATISTIC_WEIGHTS
+  Tensor get_weights() override { return kernel_; }
+#endif
+
+ private:
+  void initialize_convolution(const Shape& input_shape, Type data_type);
+  void validate_input(const std::vector<Tensor>& input) const;
+  void validate_depthwise_input(const std::vector<Tensor>& input) const;
+  static void create_output_tensor(Tensor& output_tensor,
+                                   const Shape& output_shape, Type data_type,
+                                   dnnl::memory& dst_memory);
+  static void fill_memory_with_tensor(dnnl::memory& memory,
+                                      const Tensor& tensor, Type data_type);
+  void initialize_special_conv(const Shape& input_shape, Type data_type);
+
+  void run_special_conv(const std::vector<Tensor>& input,
+                        std::vector<Tensor>& output);
+
+  [[nodiscard]] static dnnl::memory::dims shape_to_dims(const Shape& shape) {
+    dnnl::memory::dims dims;
+    for (size_t i = 0; i < shape.dims(); ++i) {
+      dims.push_back(static_cast<dnnl::memory::dim>(shape[i]));
+    }
+    return dims;
+  }
+
+  [[nodiscard]] static Shape dims_to_shape(const dnnl::memory::dims& dims) {
+    std::vector<size_t> shape_vec;
+    for (auto dim : dims) {
+      shape_vec.push_back(static_cast<size_t>(dim));
+    }
+    return Shape(shape_vec);
+  }
+
+  template <typename T>
+  std::vector<T> reorder_hwio_to_oihw(const Tensor& kernel);
+
+  [[nodiscard]] Shape get_output_shape(const Shape& input_shape) const;
+
+  [[nodiscard]] dnnl::memory::dims get_output_dims(
+      const Shape& input_shape) const {
+    return shape_to_dims(get_output_shape(input_shape));
+  }
+
+  [[nodiscard]] dnnl::memory::dims get_kernel_dims() const;
+
+  [[nodiscard]] bool is_depthwise_convolution() const;
+
+  size_t stride_;
+  size_t pads_;
+  size_t dilations_;
+  Tensor kernel_;
+  Tensor bias_;
+  size_t group_;
+  bool use_legacy_;
+
+  std::unique_ptr<dnnl::engine> engine_;
+  std::unique_ptr<dnnl::stream> stream_;
+
+  std::unique_ptr<dnnl::convolution_forward> conv_prim_;
+  dnnl::memory src_memory_;
+  dnnl::memory weights_memory_;
+  dnnl::memory bias_memory_;
+  dnnl::memory dst_memory_;
+
+  std::unique_ptr<dnnl::convolution_forward> depthwise_conv_prim_;
+  dnnl::memory depthwise_src_memory_;
+  dnnl::memory depthwise_weights_memory_;
+  dnnl::memory depthwise_bias_memory_;
+  dnnl::memory depthwise_dst_memory_;
+
+  bool initialized_ = false;
+  Shape last_input_shape_;
+  Type last_data_type_;
+};
+
+}  // namespace it_lab_ai