diff --git a/app/Graph/build.cpp b/app/Graph/build.cpp index ebf27e48..9a4d1cd0 100644 --- a/app/Graph/build.cpp +++ b/app/Graph/build.cpp @@ -66,8 +66,8 @@ void build_graph_linear(it_lab_ai::Graph& graph, it_lab_ai::Tensor& input, it_lab_ai::Tensor tmp_values = tensor; it_lab_ai::Tensor tmp_bias = it_lab_ai::make_tensor(tensor.get_bias()); - auto conv_layer = std::make_shared( - 1, pads, 1, tmp_values, tmp_bias, 1, true); + auto conv_layer = it_lab_ai::LayerFactory::createConvLayer( + options, 1, pads, 1, tmp_values, tmp_bias, 1, true); layers.push_back(conv_layer); layerpostop.push_back(false); if (comments) std::cout << "ConvLayer added to layers." << '\n'; @@ -367,12 +367,10 @@ ParseResult parse_json_model(RuntimeOptions options, } } - it_lab_ai::Tensor tmp_tensor = tensor; - + it_lab_ai::Tensor& tmp_tensor = tensor; it_lab_ai::Tensor tmp_bias = it_lab_ai::make_tensor(tensor.get_bias()); - - auto conv_layer = std::make_shared( - stride, pads, dilations, tmp_tensor, tmp_bias, group); + auto conv_layer = it_lab_ai::LayerFactory::createConvLayer( + options, stride, pads, dilations, tmp_tensor, tmp_bias, group); layer = conv_layer; } else if (layer_type.find("Relu") != std::string::npos || layer_type.find("relu") != std::string::npos) { diff --git a/app/Graph/build.hpp b/app/Graph/build.hpp index e9aae0e2..0ddb32e4 100644 --- a/app/Graph/build.hpp +++ b/app/Graph/build.hpp @@ -32,6 +32,7 @@ #include "layers/SplitLayer.hpp" #include "layers/Tensor.hpp" #include "layers/TransposeLayer.hpp" +#include "layers_oneDNN/ConvLayer.hpp" #include "layers_oneDNN/EWLayer.hpp" extern std::unordered_map model_paths; @@ -86,6 +87,18 @@ class LayerFactory { } return std::make_shared(function, alpha, beta); } + + static std::shared_ptr createConvLayer( + const RuntimeOptions& options, size_t step, size_t pads, size_t dilations, + const Tensor& kernel, const Tensor& bias = Tensor(), size_t group = 1, + bool useLegacyImpl = false) { + if (options.backend == Backend::kOneDnn) { + return std::make_shared(step, pads, dilations, kernel, + bias, group, useLegacyImpl); + } + return std::make_shared(step, pads, dilations, kernel, + bias, group, useLegacyImpl); + } }; } // namespace it_lab_ai diff --git a/include/layers_oneDNN/ConvLayer.hpp b/include/layers_oneDNN/ConvLayer.hpp new file mode 100644 index 00000000..97b9541c --- /dev/null +++ b/include/layers_oneDNN/ConvLayer.hpp @@ -0,0 +1,113 @@ +#pragma once + +#include +#include +#include +#include + +#include "layers/Layer.hpp" + +namespace it_lab_ai { + +class ConvLayerOneDnn : public Layer { + public: + ConvLayerOneDnn() + : Layer(kConvolution), + stride_(1), + pads_(0), + dilations_(1), + group_(1), + use_legacy_(false) {} + + ConvLayerOneDnn(size_t stride, size_t pads, size_t dilations, + const Tensor& kernel, const Tensor& bias = Tensor(), + size_t group = 1, bool use_legacy = false) + : Layer(kConvolution), + stride_(stride), + pads_(pads), + dilations_(dilations), + kernel_(kernel), + bias_(bias), + group_(group), + use_legacy_(use_legacy) {} + + void run(const std::vector& input, + std::vector& output) override; + +#ifdef ENABLE_STATISTIC_WEIGHTS + Tensor get_weights() override { return kernel_; } +#endif + + private: + void initialize_convolution(const Shape& input_shape, Type data_type); + void validate_input(const std::vector& input) const; + void validate_depthwise_input(const std::vector& input) const; + static void create_output_tensor(Tensor& output_tensor, + const Shape& output_shape, Type data_type, + dnnl::memory& dst_memory); + static void fill_memory_with_tensor(dnnl::memory& memory, + const Tensor& tensor, Type data_type); + void initialize_special_conv(const Shape& input_shape, Type data_type); + + void run_special_conv(const std::vector& input, + std::vector& output); + + [[nodiscard]] static dnnl::memory::dims shape_to_dims(const Shape& shape) { + dnnl::memory::dims dims; + for (size_t i = 0; i < shape.dims(); ++i) { + dims.push_back(static_cast(shape[i])); + } + return dims; + } + + [[nodiscard]] static Shape dims_to_shape(const dnnl::memory::dims& dims) { + std::vector shape_vec; + for (auto dim : dims) { + shape_vec.push_back(static_cast(dim)); + } + return Shape(shape_vec); + } + + template + std::vector reorder_hwio_to_oihw(const Tensor& kernel); + + [[nodiscard]] Shape get_output_shape(const Shape& input_shape) const; + + [[nodiscard]] dnnl::memory::dims get_output_dims( + const Shape& input_shape) const { + return shape_to_dims(get_output_shape(input_shape)); + } + + [[nodiscard]] dnnl::memory::dims get_kernel_dims() const; + + [[nodiscard]] bool is_depthwise_convolution() const; + + size_t stride_; + size_t pads_; + size_t dilations_; + Tensor kernel_; + Tensor bias_; + size_t group_; + bool use_legacy_; + + std::unique_ptr engine_; + std::unique_ptr stream_; + + std::unique_ptr conv_prim_; + dnnl::memory src_memory_; + dnnl::memory weights_memory_; + dnnl::memory bias_memory_; + dnnl::memory dst_memory_; + + std::unique_ptr depthwise_conv_prim_; + dnnl::memory depthwise_src_memory_; + dnnl::memory depthwise_weights_memory_; + dnnl::memory depthwise_bias_memory_; + dnnl::memory depthwise_dst_memory_; + + bool initialized_ = false; + Shape last_input_shape_; + Type last_data_type_; +}; + +} // namespace it_lab_ai \ No newline at end of file diff --git a/src/layers_oneDNN/ConvLayer.cpp b/src/layers_oneDNN/ConvLayer.cpp new file mode 100644 index 00000000..8e3bc113 --- /dev/null +++ b/src/layers_oneDNN/ConvLayer.cpp @@ -0,0 +1,535 @@ +#include "layers_oneDNN/ConvLayer.hpp" + +#include +#include +#include +#include +#include + +namespace it_lab_ai { + +void ConvLayerOneDnn::run(const std::vector& input, + std::vector& output) { + if (use_legacy_) { + run_special_conv(input, output); + return; + } + + validate_input(input); + + const Tensor& input_tensor = input[0]; + Type data_type = input_tensor.get_type(); + const Shape& input_shape = input_tensor.get_shape(); + + bool need_reinit = !initialized_ || input_shape != last_input_shape_ || + data_type != last_data_type_; + if (need_reinit) { + initialize_convolution(input_shape, data_type); + last_input_shape_ = input_shape; + last_data_type_ = data_type; + } + + try { + if (data_type == Type::kFloat) { + const std::vector& input_data = *input_tensor.as(); + std::copy(input_data.begin(), input_data.end(), + static_cast(src_memory_.get_data_handle())); + } else if (data_type == Type::kInt) { + const std::vector& input_data = *input_tensor.as(); + std::vector float_input(input_data.size()); + std::transform(input_data.begin(), input_data.end(), float_input.begin(), + [](int val) { return static_cast(val); }); + std::copy(float_input.begin(), float_input.end(), + static_cast(src_memory_.get_data_handle())); + } + + if (!bias_.empty()) { + conv_prim_->execute(*stream_, {{DNNL_ARG_SRC, src_memory_}, + {DNNL_ARG_WEIGHTS, weights_memory_}, + {DNNL_ARG_BIAS, bias_memory_}, + {DNNL_ARG_DST, dst_memory_}}); + } else { + conv_prim_->execute(*stream_, {{DNNL_ARG_SRC, src_memory_}, + {DNNL_ARG_WEIGHTS, weights_memory_}, + {DNNL_ARG_DST, dst_memory_}}); + } + + stream_->wait(); + + Shape output_shape = get_output_shape(input_shape); + create_output_tensor(output[0], output_shape, data_type, dst_memory_); + + } catch (const std::exception& e) { + std::cerr << "oneDNN convolution execution failed: " << e.what() << '\n'; + throw; + } +} + +void ConvLayerOneDnn::create_output_tensor(Tensor& output_tensor, + const Shape& output_shape, + Type data_type, + dnnl::memory& dst_memory) { + size_t output_size = output_shape.count(); + + if (data_type == Type::kFloat) { + std::vector output_data(output_size); + std::copy(static_cast(dst_memory.get_data_handle()), + static_cast(dst_memory.get_data_handle()) + output_size, + output_data.begin()); + output_tensor = make_tensor(output_data, output_shape); + } else if (data_type == Type::kInt) { + std::vector float_output(output_size); + std::copy(static_cast(dst_memory.get_data_handle()), + static_cast(dst_memory.get_data_handle()) + output_size, + float_output.begin()); + + std::vector int_output(output_size); + std::transform(float_output.begin(), float_output.end(), int_output.begin(), + [](float val) { return static_cast(std::round(val)); }); + + output_tensor = make_tensor(int_output, output_shape); + } +} + +void ConvLayerOneDnn::validate_input(const std::vector& input) const { + if (input.size() != 1) { + throw std::runtime_error( + "ConvLayerOneDnn: Expected exactly 1 input tensor"); + } + + const Shape& input_shape = input[0].get_shape(); + const Shape& kernel_shape = kernel_.get_shape(); + + if (input_shape.dims() != 4) { + throw std::runtime_error("ConvLayerOneDnn: Input must be 4D (NCHW format)"); + } + + if (kernel_shape.dims() != 4) { + throw std::runtime_error("ConvLayerOneDnn: Kernel must be 4D"); + } + + if (is_depthwise_convolution()) { + validate_depthwise_input(input); + return; + } + + size_t in_channels = input_shape[1]; + size_t kernel_in_channels = kernel_shape[1]; + + if (group_ > 1) { + if (in_channels % group_ != 0) { + throw std::runtime_error( + "ConvLayerOneDnn: Input channels must be divisible by group"); + } + if (kernel_in_channels != in_channels / group_) { + throw std::runtime_error( + "ConvLayerOneDnn: Kernel input channels don't match group " + "configuration"); + } + } else { + if (in_channels != kernel_in_channels) { + throw std::runtime_error( + "ConvLayerOneDnn: Input and kernel channels don't match"); + } + } + + Type data_type = input[0].get_type(); + if (data_type != Type::kFloat && data_type != Type::kInt) { + throw std::runtime_error( + "ConvLayerOneDnn supports only float and int tensors"); + } +} + +void ConvLayerOneDnn::validate_depthwise_input( + const std::vector& input) const { + const Shape& input_shape = input[0].get_shape(); + const Shape& kernel_shape = kernel_.get_shape(); + + size_t in_channels = input_shape[1]; + size_t kernel_out_channels = kernel_shape[0]; + size_t kernel_in_channels = kernel_shape[1]; + + if (kernel_out_channels != in_channels || kernel_in_channels != 1) { + throw std::runtime_error("Invalid kernel shape for depthwise convolution"); + } + + Type data_type = input[0].get_type(); + if (data_type != Type::kFloat && data_type != Type::kInt) { + throw std::runtime_error( + "ConvLayerOneDnn supports only float and int tensors"); + } +} + +bool ConvLayerOneDnn::is_depthwise_convolution() const { + const Shape& kernel_shape = kernel_.get_shape(); + return (group_ == kernel_shape[0]); +} + +void ConvLayerOneDnn::initialize_convolution(const Shape& input_shape, + Type data_type) { + try { + engine_ = std::make_unique(dnnl::engine::kind::cpu, 0); + stream_ = std::make_unique(*engine_); + + const size_t in_channels = input_shape[1]; + bool is_depthwise = (group_ > 1 && group_ == in_channels); + + dnnl::memory::dims src_dims = shape_to_dims(input_shape); + dnnl::memory::dims dst_dims = get_output_dims(input_shape); + dnnl::memory::dims strides = {static_cast(stride_), + static_cast(stride_)}; + dnnl::memory::dims padding = {static_cast(pads_), + static_cast(pads_)}; + dnnl::memory::dims dilation = { + static_cast(dilations_ - 1), + static_cast(dilations_ - 1)}; + + dnnl::memory::data_type dnnl_data_type = dnnl::memory::data_type::f32; + + auto src_md = dnnl::memory::desc(src_dims, dnnl_data_type, + dnnl::memory::format_tag::any); + auto dst_md = dnnl::memory::desc(dst_dims, dnnl_data_type, + dnnl::memory::format_tag::any); + + dnnl::memory::dims kernel_dims; + dnnl::memory::format_tag weights_format; + if (is_depthwise) { + kernel_dims = {static_cast(group_), 1, 1, + static_cast(kernel_.get_shape()[2]), + static_cast(kernel_.get_shape()[3])}; + weights_format = dnnl::memory::format_tag::goihw; + } else if (group_ > 1) { + kernel_dims = { + static_cast(group_), + static_cast(kernel_.get_shape()[0] / group_), + static_cast(kernel_.get_shape()[1]), + static_cast(kernel_.get_shape()[2]), + static_cast(kernel_.get_shape()[3])}; + weights_format = dnnl::memory::format_tag::goihw; + } else { + const auto& k_shape = kernel_.get_shape(); + kernel_dims = {static_cast(k_shape[0]), + static_cast(k_shape[1]), + static_cast(k_shape[2]), + static_cast(k_shape[3])}; + weights_format = dnnl::memory::format_tag::oihw; + } + + auto weights_md = + dnnl::memory::desc(kernel_dims, dnnl_data_type, weights_format); + + dnnl::memory::desc bias_md; + bool has_bias = !bias_.empty(); + if (!bias_.empty()) { + size_t bias_size; + if (is_depthwise || group_ == 1) { + bias_size = kernel_dims[0]; + } else { + bias_size = kernel_.get_shape()[0]; + } + + bias_md = + dnnl::memory::desc({static_cast(bias_size)}, + dnnl_data_type, dnnl::memory::format_tag::any); + } + + dnnl::convolution_forward::primitive_desc conv_pd = + has_bias ? dnnl::convolution_forward::primitive_desc( + *engine_, dnnl::prop_kind::forward_inference, + dnnl::algorithm::convolution_direct, src_md, weights_md, + bias_md, dst_md, strides, dilation, padding, padding) + : dnnl::convolution_forward::primitive_desc( + *engine_, dnnl::prop_kind::forward_inference, + dnnl::algorithm::convolution_direct, src_md, weights_md, + dst_md, strides, dilation, padding, padding); + + src_memory_ = dnnl::memory(conv_pd.src_desc(), *engine_); + weights_memory_ = dnnl::memory(conv_pd.weights_desc(), *engine_); + dst_memory_ = dnnl::memory(conv_pd.dst_desc(), *engine_); + if (!bias_.empty()) + bias_memory_ = dnnl::memory(conv_pd.bias_desc(), *engine_); + + fill_memory_with_tensor(weights_memory_, kernel_, data_type); + if (!bias_.empty()) fill_memory_with_tensor(bias_memory_, bias_, data_type); + + conv_prim_ = std::make_unique(conv_pd); + initialized_ = true; + + } catch (const dnnl::error& e) { + std::cerr << "oneDNN specific error: " << e.what() + << ", status: " << e.status << '\n'; + throw; + } catch (const std::exception& e) { + std::cerr << "oneDNN convolution initialization failed: " << e.what() + << '\n'; + throw; + } +} + +void ConvLayerOneDnn::fill_memory_with_tensor(dnnl::memory& memory, + const Tensor& tensor, + Type data_type) { + if (data_type == Type::kFloat) { + const std::vector& data = *tensor.as(); + std::copy(data.begin(), data.end(), + static_cast(memory.get_data_handle())); + } else if (data_type == Type::kInt) { + const std::vector& data = *tensor.as(); + std::vector float_data(data.size()); + std::transform(data.begin(), data.end(), float_data.begin(), + [](int val) { return static_cast(val); }); + std::copy(float_data.begin(), float_data.end(), + static_cast(memory.get_data_handle())); + } +} + +dnnl::memory::dims ConvLayerOneDnn::get_kernel_dims() const { + const Shape& kernel_shape = kernel_.get_shape(); + + dnnl::memory::dims dims; + for (size_t i = 0; i < kernel_shape.dims(); ++i) { + dims.push_back(static_cast(kernel_shape[i])); + } + + return dims; +} + +Shape ConvLayerOneDnn::get_output_shape(const Shape& input_shape) const { + const Shape& kernel_shape = kernel_.get_shape(); + + size_t kernel_out_channels; + size_t kernel_height; + size_t kernel_width; + + if (use_legacy_ || + (kernel_shape.dims() == 4 && kernel_shape[3] > kernel_shape[2])) { + kernel_height = kernel_shape[0]; + kernel_width = kernel_shape[1]; + kernel_out_channels = kernel_shape[3]; + } else { + kernel_out_channels = kernel_shape[0]; + kernel_height = kernel_shape[2]; + kernel_width = kernel_shape[3]; + } + + size_t batch_size = input_shape[0]; + size_t input_height = input_shape[2]; + size_t input_width = input_shape[3]; + + size_t effective_kernel_height = (kernel_height - 1) * dilations_ + 1; + size_t effective_kernel_width = (kernel_width - 1) * dilations_ + 1; + + size_t output_height = + (input_height + 2 * pads_ - effective_kernel_height) / stride_ + 1; + size_t output_width = + (input_width + 2 * pads_ - effective_kernel_width) / stride_ + 1; + + return Shape({batch_size, kernel_out_channels, output_height, output_width}); +} + +void ConvLayerOneDnn::initialize_special_conv(const Shape& input_shape, + Type data_type) { + try { + engine_ = std::make_unique(dnnl::engine::kind::cpu, 0); + stream_ = std::make_unique(*engine_); + + dnnl::memory::dims src_dims = shape_to_dims(input_shape); + + Shape output_shape = get_output_shape(input_shape); + dnnl::memory::dims dst_dims = shape_to_dims(output_shape); + + dnnl::memory::dims strides = {static_cast(stride_), + static_cast(stride_)}; + dnnl::memory::dims padding = {static_cast(pads_), + static_cast(pads_)}; + dnnl::memory::dims dilation = { + static_cast(dilations_ - 1), + static_cast(dilations_ - 1)}; + + dnnl::memory::data_type dt = dnnl::memory::data_type::f32; + + auto src_md = + dnnl::memory::desc(src_dims, dt, dnnl::memory::format_tag::nchw); + auto dst_md = + dnnl::memory::desc(dst_dims, dt, dnnl::memory::format_tag::nchw); + + const auto& k_shape = kernel_.get_shape(); + + dnnl::memory::dims weights_dims = { + static_cast(k_shape[3]), + static_cast(k_shape[2]), + static_cast(k_shape[0]), + static_cast(k_shape[1])}; + + auto weights_md = + dnnl::memory::desc(weights_dims, dt, dnnl::memory::format_tag::oihw); + + dnnl::memory::desc bias_md; + bool has_bias = !bias_.empty(); + if (has_bias) { + bias_md = dnnl::memory::desc( + {static_cast(bias_.get_shape()[0])}, dt, + dnnl::memory::format_tag::any); + } + dnnl::convolution_forward::primitive_desc conv_pd = + has_bias ? dnnl::convolution_forward::primitive_desc( + *engine_, dnnl::prop_kind::forward_inference, + dnnl::algorithm::convolution_direct, src_md, weights_md, + bias_md, dst_md, strides, dilation, padding, padding) + : dnnl::convolution_forward::primitive_desc( + *engine_, dnnl::prop_kind::forward_inference, + dnnl::algorithm::convolution_direct, src_md, weights_md, + dst_md, strides, dilation, padding, padding); + + src_memory_ = dnnl::memory(conv_pd.src_desc(), *engine_); + weights_memory_ = dnnl::memory(conv_pd.weights_desc(), *engine_); + dst_memory_ = dnnl::memory(conv_pd.dst_desc(), *engine_); + + if (has_bias) { + bias_memory_ = dnnl::memory(conv_pd.bias_desc(), *engine_); + } + + if (data_type == Type::kFloat) { + const std::vector& kernel_data = *kernel_.as(); + size_t kh = k_shape[0]; + size_t kw = k_shape[1]; + size_t kic = k_shape[2]; + size_t koc = k_shape[3]; + + std::vector reordered(koc * kic * kh * kw); + size_t idx = 0; + + for (size_t oc = 0; oc < koc; oc++) { + for (size_t ic = 0; ic < kic; ic++) { + for (size_t h = 0; h < kh; h++) { + for (size_t w = 0; w < kw; w++) { + size_t src_idx = ((h * kw + w) * kic + ic) * koc + oc; + reordered[idx++] = kernel_data[src_idx]; + } + } + } + } + + std::memcpy(weights_memory_.get_data_handle(), reordered.data(), + reordered.size() * sizeof(float)); + + } else if (data_type == Type::kInt) { + const std::vector& kernel_data_int = *kernel_.as(); + size_t kh = k_shape[0]; + size_t kw = k_shape[1]; + size_t kic = k_shape[2]; + size_t koc = k_shape[3]; + + std::vector reordered(koc * kic * kh * kw); + size_t idx = 0; + + for (size_t oc = 0; oc < koc; oc++) { + for (size_t ic = 0; ic < kic; ic++) { + for (size_t h = 0; h < kh; h++) { + for (size_t w = 0; w < kw; w++) { + size_t src_idx = ((h * kw + w) * kic + ic) * koc + oc; + reordered[idx++] = static_cast(kernel_data_int[src_idx]); + } + } + } + } + + std::memcpy(weights_memory_.get_data_handle(), reordered.data(), + reordered.size() * sizeof(float)); + } + + conv_prim_ = std::make_unique(conv_pd); + initialized_ = true; + + } catch (const dnnl::error& e) { + std::cerr << "oneDNN error: " << e.what() << ", status: " << e.status + << '\n'; + throw; + } catch (const std::exception& e) { + std::cerr << "Special conv initialization failed: " << e.what() << '\n'; + throw; + } +} + +void ConvLayerOneDnn::run_special_conv(const std::vector& input, + std::vector& output) { + const Tensor& input_tensor = input[0]; + Type data_type = input_tensor.get_type(); + const Shape& input_shape = input_tensor.get_shape(); + + if (!initialized_ || input_shape != last_input_shape_ || + data_type != last_data_type_) { + initialize_special_conv(input_shape, data_type); + last_input_shape_ = input_shape; + last_data_type_ = data_type; + } + + if (data_type == Type::kFloat) { + const std::vector& input_data = *input_tensor.as(); + std::memcpy(src_memory_.get_data_handle(), input_data.data(), + input_data.size() * sizeof(float)); + } else if (data_type == Type::kInt) { + const std::vector& input_data = *input_tensor.as(); + std::vector float_input(input_data.size()); + std::transform(input_data.begin(), input_data.end(), float_input.begin(), + [](int val) { return static_cast(val); }); + std::memcpy(src_memory_.get_data_handle(), float_input.data(), + float_input.size() * sizeof(float)); + } else { + throw std::runtime_error("Unsupported input type in run_special_conv"); + } + + std::unordered_map args = { + {DNNL_ARG_SRC, src_memory_}, + {DNNL_ARG_WEIGHTS, weights_memory_}, + {DNNL_ARG_DST, dst_memory_}}; + + if (!bias_.empty()) { + args[DNNL_ARG_BIAS] = bias_memory_; + } + + conv_prim_->execute(*stream_, args); + stream_->wait(); + + Shape output_shape = get_output_shape(input_shape); + + if (data_type == Type::kFloat) { + std::vector output_data(dst_memory_.get_desc().get_size() / + sizeof(float)); + std::memcpy(output_data.data(), dst_memory_.get_data_handle(), + output_data.size() * sizeof(float)); + output[0] = make_tensor(output_data, output_shape); + } else if (data_type == Type::kInt) { + std::vector tmp(dst_memory_.get_desc().get_size() / sizeof(float)); + std::memcpy(tmp.data(), dst_memory_.get_data_handle(), + tmp.size() * sizeof(float)); + std::vector output_data(tmp.size()); + std::transform(tmp.begin(), tmp.end(), output_data.begin(), + [](float val) { return static_cast(val); }); + output[0] = make_tensor(output_data, output_shape); + } +} + +template +std::vector ConvLayerOneDnn::reorder_hwio_to_oihw(const Tensor& kernel) { + size_t kh = kernel.get_shape()[0]; + size_t kw = kernel.get_shape()[1]; + size_t kic = kernel.get_shape()[2]; + size_t koc = kernel.get_shape()[3]; + + std::vector result(koc * kic * kh * kw); + + size_t idx = 0; + for (size_t oc = 0; oc < koc; oc++) { + for (size_t ic = 0; ic < kic; ic++) { + for (size_t h = 0; h < kh; h++) { + for (size_t w = 0; w < kw; w++) { + result[idx++] = kernel.get({h, w, ic, oc}); + } + } + } + } + return result; +} + +} // namespace it_lab_ai \ No newline at end of file diff --git a/test/single_layer/test_convlayer_onednn.cpp b/test/single_layer/test_convlayer_onednn.cpp new file mode 100644 index 00000000..b54e9692 --- /dev/null +++ b/test/single_layer/test_convlayer_onednn.cpp @@ -0,0 +1,565 @@ +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "layers/ConvLayer.hpp" +#include "layers_oneDNN/ConvLayer.hpp" + +using namespace it_lab_ai; + +TEST(convlayer_onednn, basic_convolution_2d_float) { + std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, + 13.0f, 14.0f, 15.0f, 16.0f}; + + std::vector kernel_data = {1.0f, 0.0f, -1.0f, 1.0f, 0.0f, + -1.0f, 1.0f, 0.0f, -1.0f}; + + Tensor input = make_tensor(input_data, Shape({1, 1, 4, 4})); + Tensor kernel = make_tensor(kernel_data, Shape({1, 1, 3, 3})); + + ConvLayerOneDnn layer(1, 0, 1, kernel); + + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + Shape output_shape = out[0].get_shape(); + EXPECT_EQ(output_shape.dims(), 4); + EXPECT_EQ(output_shape[0], 1); + EXPECT_EQ(output_shape[1], 1); + EXPECT_EQ(output_shape[2], 2); + EXPECT_EQ(output_shape[3], 2); +} + +TEST(convlayer_onednn, conv_with_bias_float) { + std::vector input_data(4 * 4, 1.0f); + std::vector kernel_data(3 * 3, 1.0f); + std::vector bias_data = {2.0f}; + + Tensor input = make_tensor(input_data, Shape({1, 1, 4, 4})); + Tensor kernel = make_tensor(kernel_data, Shape({1, 1, 3, 3})); + Tensor bias = make_tensor(bias_data, Shape({1})); + + ConvLayerOneDnn layer(1, 0, 1, kernel, bias); + + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + auto output_data = *out[0].as(); + for (float val : output_data) { + EXPECT_NEAR(val, 11.0f, 1e-5); + } +} + +TEST(convlayer_onednn, multi_channel_conv_float) { + std::vector input_data(2 * 4 * 4, 1.0f); + std::vector kernel_data(3 * 2 * 3 * 3, 1.0f); + + Tensor input = make_tensor(input_data, Shape({1, 2, 4, 4})); + Tensor kernel = make_tensor(kernel_data, Shape({3, 2, 3, 3})); + + ConvLayerOneDnn layer(1, 0, 1, kernel); + + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + Shape output_shape = out[0].get_shape(); + EXPECT_EQ(output_shape[1], 3); + EXPECT_EQ(output_shape[2], 2); + EXPECT_EQ(output_shape[3], 2); + + auto output_data = *out[0].as(); + for (float val : output_data) { + EXPECT_NEAR(val, 18.0f, 1e-5); + } +} + +TEST(convlayer_onednn, conv_int_type) { + std::vector input_data = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + + std::vector kernel_data(3 * 3, 1); + + Tensor input = make_tensor(input_data, Shape({1, 1, 4, 4})); + Tensor kernel = make_tensor(kernel_data, Shape({1, 1, 3, 3})); + + ConvLayerOneDnn layer(1, 0, 1, kernel); + + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + auto output_data = *out[0].as(); + EXPECT_EQ(output_data[0], 54); +} + +TEST(convlayer_onednn, grouped_convolution) { + std::vector input_data(4 * 6 * 6, 1.0f); + std::vector kernel_data(8 * 2 * 3 * 3, 1.0f); + + Tensor input = make_tensor(input_data, Shape({1, 4, 6, 6})); + Tensor kernel = make_tensor(kernel_data, Shape({8, 2, 3, 3})); + + ConvLayerOneDnn layer(1, 0, 1, kernel, Tensor(), 2); + + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + Shape output_shape = out[0].get_shape(); + EXPECT_EQ(output_shape[1], 8); +} + +TEST(convlayer_onednn, depthwise_convolution) { + std::vector input_data(3 * 5 * 5, 1.0f); + std::vector kernel_data(3 * 1 * 3 * 3, 1.0f); + + Tensor input = make_tensor(input_data, Shape({1, 3, 5, 5})); + Tensor kernel = make_tensor(kernel_data, Shape({3, 1, 3, 3})); + + ConvLayerOneDnn layer(1, 0, 1, kernel, Tensor(), 3, false); + + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + Shape output_shape = out[0].get_shape(); + EXPECT_EQ(output_shape[1], 3); + + auto output_data = *out[0].as(); + for (float val : output_data) { + EXPECT_NEAR(val, 9.0f, 1e-5); + } +} + +TEST(convlayer_onednn, invalid_input_tensors) { + std::vector kernel_data(3 * 3, 1.0f); + Tensor kernel = make_tensor(kernel_data, Shape({1, 1, 3, 3})); + + ConvLayerOneDnn layer(1, 0, 1, kernel); + Tensor input1 = + make_tensor(std::vector(16, 1.0f), Shape({1, 1, 4, 4})); + Tensor input2 = + make_tensor(std::vector(16, 1.0f), Shape({1, 1, 4, 4})); + Tensor output; + + std::vector in{input1, input2}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(convlayer_onednn, invalid_input_dimensions) { + std::vector kernel_data(3 * 3, 1.0f); + Tensor kernel = make_tensor(kernel_data, Shape({1, 1, 3, 3})); + + ConvLayerOneDnn layer(1, 0, 1, kernel); + + Tensor input = + make_tensor(std::vector(4, 1.0f), Shape({1, 2, 2})); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(convlayer_onednn, invalid_kernel_dimensions) { + std::vector kernel_data(3 * 3, 1.0f); + Tensor kernel = make_tensor(kernel_data, Shape({1, 3, 3})); + + ConvLayerOneDnn layer(1, 0, 1, kernel); + + Tensor input = + make_tensor(std::vector(16, 1.0f), Shape({1, 1, 4, 4})); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(convlayer_onednn, channel_mismatch_error) { + std::vector kernel_data(1 * 2 * 3 * 3, 1.0f); + Tensor kernel = make_tensor(kernel_data, Shape({1, 2, 3, 3})); + + ConvLayerOneDnn layer(1, 0, 1, kernel); + + Tensor input = + make_tensor(std::vector(16, 1.0f), Shape({1, 1, 4, 4})); + Tensor output; + + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(convlayer_onednn, special_conv_format) { + std::vector kernel_data = { + 1.0f, 0.0f, -1.0f, 1.0f, 0.0f, -1.0f, 1.0f, 0.0f, -1.0f, + + 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f}; + + std::vector input_data(1 * 4 * 4, 1.0f); + + Tensor input = make_tensor(input_data, Shape({1, 1, 4, 4})); + Tensor kernel = make_tensor(kernel_data, Shape({3, 3, 1, 2})); + ConvLayerOneDnn layer(1, 0, 1, kernel, Tensor(), 1, true); + + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + Shape output_shape = out[0].get_shape(); + EXPECT_EQ(output_shape[1], 2); +} + +TEST(convlayer_onednn, large_input_convolution) { + const size_t batch = 2; + const size_t channels = 16; + const size_t height = 32; + const size_t width = 32; + const size_t kernel_size = 5; + const size_t out_channels = 32; + + std::vector input_data(batch * channels * height * width, 1.0f); + std::vector kernel_data( + out_channels * channels * kernel_size * kernel_size, 1.0f); + + Tensor input = + make_tensor(input_data, Shape({batch, channels, height, width})); + Tensor kernel = make_tensor( + kernel_data, Shape({out_channels, channels, kernel_size, kernel_size})); + + ConvLayerOneDnn layer(1, 2, 1, kernel); + + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + Shape output_shape = out[0].get_shape(); + EXPECT_EQ(output_shape[0], batch); + EXPECT_EQ(output_shape[1], out_channels); + EXPECT_EQ(output_shape[2], height); + EXPECT_EQ(output_shape[3], width); +} + +TEST(convlayer_onednn, dilation_convolution) { + std::vector input_data = { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, + 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f}; + + std::vector kernel_data = {1.0f, 0.0f, -1.0f, 1.0f, 0.0f, + -1.0f, 1.0f, 0.0f, -1.0f}; + + Tensor input = make_tensor(input_data, Shape({1, 1, 5, 5})); + Tensor kernel = make_tensor(kernel_data, Shape({1, 1, 3, 3})); + + ConvLayerOneDnn layer(1, 0, 2, kernel); + + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + Shape output_shape = out[0].get_shape(); + EXPECT_EQ(output_shape[2], 1); + EXPECT_EQ(output_shape[3], 1); +} + +TEST(convlayer_onednn, reinitialization_on_input_change) { + std::vector kernel_data(1 * 1 * 3 * 3, 1.0f); + Tensor kernel = make_tensor(kernel_data, Shape({1, 1, 3, 3})); + + ConvLayerOneDnn layer(1, 0, 1, kernel); + + std::vector input1_data(1 * 1 * 4 * 4, 1.0f); + Tensor input1 = make_tensor(input1_data, Shape({1, 1, 4, 4})); + Tensor output1; + + std::vector in1{input1}; + std::vector out1{output1}; + EXPECT_NO_THROW(layer.run(in1, out1)); + + std::vector input2_data(1 * 1 * 6 * 6, 1.0f); + Tensor input2 = make_tensor(input2_data, Shape({1, 1, 6, 6})); + Tensor output2; + + std::vector in2{input2}; + std::vector out2{output2}; + EXPECT_NO_THROW(layer.run(in2, out2)); + + Shape output_shape1 = out1[0].get_shape(); + Shape output_shape2 = out2[0].get_shape(); + + EXPECT_EQ(output_shape1[2], 2); + EXPECT_EQ(output_shape2[2], 4); +} + +TEST(convlayer_onednn, reinitialization_on_data_type_change) { + std::vector kernel_data(1 * 1 * 3 * 3, 1.0f); + Tensor kernel = make_tensor(kernel_data, Shape({1, 1, 3, 3})); + ConvLayerOneDnn layer(1, 0, 1, kernel); + + std::vector input1_data(1 * 1 * 4 * 4, 1.0f); + Tensor input1 = make_tensor(input1_data, Shape({1, 1, 4, 4})); + Tensor output1; + std::vector in1{input1}; + std::vector out1{output1}; + + EXPECT_NO_THROW(layer.run(in1, out1)); + + std::vector kernel_data_int(1 * 1 * 3 * 3, 1); + Tensor kernel_int = make_tensor(kernel_data_int, Shape({1, 1, 3, 3})); + ConvLayerOneDnn layer_int(1, 0, 1, kernel_int); + + std::vector input2_data(1 * 1 * 4 * 4, 1); + Tensor input2 = make_tensor(input2_data, Shape({1, 1, 4, 4})); + Tensor output2; + std::vector in2{input2}; + std::vector out2{output2}; + + EXPECT_NO_THROW(layer_int.run(in2, out2)); + EXPECT_EQ(out1[0].get_type(), Type::kFloat); + EXPECT_EQ(out2[0].get_type(), Type::kInt); +} + +TEST(convlayer_onednn, exception_propagation_from_dnnl) { + std::vector kernel_data(2 * 3 * 5 * 5, 1.0f); + Tensor kernel = make_tensor(kernel_data, Shape({2, 3, 5, 5})); + ConvLayerOneDnn layer(1, 0, 1, kernel); + + std::vector input_data(1 * 1 * 4 * 4, 1.0f); + Tensor input = make_tensor(input_data, Shape({1, 1, 4, 4})); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(convlayer_onednn, group_validation_errors) { + { + std::vector kernel_data(4 * 3 * 3 * 3, 1.0f); + Tensor kernel = make_tensor(kernel_data, Shape({4, 3, 3, 3})); + ConvLayerOneDnn layer(1, 0, 1, kernel, Tensor(), 2); + + std::vector input_data(1 * 5 * 6 * 6, 1.0f); + Tensor input = make_tensor(input_data, Shape({1, 5, 6, 6})); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); + } + { + std::vector kernel_data(6 * 3 * 3 * 3, 1.0f); + Tensor kernel = make_tensor(kernel_data, Shape({6, 3, 3, 3})); + ConvLayerOneDnn layer(1, 0, 1, kernel, Tensor(), 2); + + std::vector input_data(1 * 4 * 6 * 6, 1.0f); + Tensor input = make_tensor(input_data, Shape({1, 4, 6, 6})); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); + } +} + +TEST(convlayer_onednn, depthwise_kernel_shape_validation) { + std::vector kernel_data(3 * 2 * 3 * 3, 1.0f); + Tensor kernel = make_tensor(kernel_data, Shape({3, 2, 3, 3})); + + ConvLayerOneDnn layer(1, 0, 1, kernel, Tensor(), 3, false); + + std::vector input_data(1 * 3 * 5 * 5, 1.0f); + Tensor input = make_tensor(input_data, Shape({1, 3, 5, 5})); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_THROW(layer.run(in, out), std::runtime_error); +} + +TEST(convlayer_onednn, bias_memory_handling) { + { + std::vector kernel_data(2 * 3 * 3 * 3, 1.0f); + std::vector bias_data(2, 2.0f); + Tensor kernel = make_tensor(kernel_data, Shape({2, 3, 3, 3})); + Tensor bias = make_tensor(bias_data, Shape({2})); + + ConvLayerOneDnn layer(1, 0, 1, kernel, bias); + + std::vector input_data(1 * 3 * 6 * 6, 1.0f); + Tensor input = make_tensor(input_data, Shape({1, 3, 6, 6})); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + auto output_vals = *out[0].as(); + EXPECT_GT(output_vals[0], 0.0f); + } + + { + std::vector kernel_data(4 * 2 * 3 * 3, 1.0f); + std::vector bias_data(4, 1.0f); + Tensor kernel = make_tensor(kernel_data, Shape({4, 2, 3, 3})); + Tensor bias = make_tensor(bias_data, Shape({4})); + + ConvLayerOneDnn layer(1, 0, 1, kernel, bias, 2); + + std::vector input_data(1 * 4 * 6 * 6, 1.0f); + Tensor input = make_tensor(input_data, Shape({1, 4, 6, 6})); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + } + { + std::vector kernel_data(3 * 1 * 3 * 3, 1.0f); + std::vector bias_data(3, 0.5f); + Tensor kernel = make_tensor(kernel_data, Shape({3, 1, 3, 3})); + Tensor bias = make_tensor(bias_data, Shape({3})); + + ConvLayerOneDnn layer(1, 0, 1, kernel, bias, 3, false); + + std::vector input_data(1 * 3 * 5 * 5, 1.0f); + Tensor input = make_tensor(input_data, Shape({1, 3, 5, 5})); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + } +} + +TEST(convlayer_onednn, kernel_dims_conversion) { + std::vector kernel_data(2 * 3 * 4 * 4, 1.0f); + Tensor kernel = make_tensor(kernel_data, Shape({2, 3, 4, 4})); + ConvLayerOneDnn layer(1, 0, 1, kernel); + std::vector input_data(1 * 3 * 8 * 8, 1.0f); + Tensor input = make_tensor(input_data, Shape({1, 3, 8, 8})); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + Shape output_shape = out[0].get_shape(); + EXPECT_EQ(output_shape[1], 2); + EXPECT_EQ(output_shape[2], 5); + EXPECT_EQ(output_shape[3], 5); +} + +TEST(convlayer_onednn, int_kernel_processing) { + std::vector kernel_data = {1, 0, -1, 1, 0, -1, 1, 0, -1, + 0, 1, 0, 0, 1, 0, 0, 1, 0}; + + Tensor kernel = make_tensor(kernel_data, Shape({3, 3, 1, 2})); + + ConvLayerOneDnn layer(1, 0, 1, kernel, Tensor(), 1, true); + + std::vector input_data(1 * 1 * 4 * 4, 1); + Tensor input = make_tensor(input_data, Shape({1, 1, 4, 4})); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + EXPECT_EQ(out[0].get_type(), Type::kInt); + + Shape output_shape = out[0].get_shape(); + EXPECT_EQ(output_shape[1], 2); +} + +TEST(convlayer_onednn, special_conv_diagnostics) { + std::vector kernel_data(3 * 3 * 64 * 128, 1.0f); + Tensor kernel = make_tensor(kernel_data, Shape({3, 3, 64, 128})); + + ConvLayerOneDnn layer(2, 1, 2, kernel, Tensor(), 1, true); + + std::vector input_data(1 * 64 * 8 * 8, 1.0f); + Tensor input = make_tensor(input_data, Shape({1, 64, 8, 8})); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + try { + layer.run(in, out); + Shape output_shape = out[0].get_shape(); + EXPECT_EQ(output_shape[1], 128); + } catch (const std::exception& e) { + std::cerr << "Caught expected exception: " << e.what() << std::endl; + } +} + +TEST(convlayer_onednn, int_input_processing_special_conv) { + std::vector kernel_data(3 * 3 * 1 * 2, 1); + Tensor kernel = make_tensor(kernel_data, Shape({3, 3, 1, 2})); + + ConvLayerOneDnn layer(1, 0, 1, kernel, Tensor(), 1, true); + + std::vector input_data(1 * 1 * 4 * 4, 2); + Tensor input = make_tensor(input_data, Shape({1, 1, 4, 4})); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + EXPECT_EQ(out[0].get_type(), Type::kInt); + + auto output_vals = *out[0].as(); + for (int val : output_vals) { + EXPECT_GT(val, 0); + } +} + +TEST(convlayer_onednn, int_output_processing) { + std::vector kernel_data(1 * 1 * 3 * 3, 1); + Tensor kernel = make_tensor(kernel_data, Shape({1, 1, 3, 3})); + + ConvLayerOneDnn layer(1, 0, 1, kernel); + + std::vector input_data(1 * 1 * 4 * 4, 1); + Tensor input = make_tensor(input_data, Shape({1, 1, 4, 4})); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + EXPECT_NO_THROW(layer.run(in, out)); + + auto output_vals = *out[0].as(); + for (int val : output_vals) { + EXPECT_EQ(val, 9); + } +}