diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index aeb0f2182b5fde..e19a8c6d9cd163 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -320,6 +320,10 @@ struct layout { // element == { 0,0,0,0 } means first no-padding (i.e. data) element size_t get_linear_offset(tensor element = tensor(0)) const; + // Get variables needed for computing linear offset for a tensor with padding + void get_linear_offset_params(tensor& start_points, tensor& end_points, int64_t* padded_sizes, + int64_t* axes_map, int8_t& map_size); + /// @brief Get aligned linear size calculated as multiplication of all elements. size_t get_linear_size() const; @@ -462,6 +466,8 @@ struct layout { } private: + static void get_axes_map(cldnn::format& fmt, int64_t* axes_map, int8_t& map_size); + /// The size of the @ref memory (excluding padding) ov::PartialShape size; }; diff --git a/src/plugins/intel_gpu/src/plugin/common_utils.cpp b/src/plugins/intel_gpu/src/plugin/common_utils.cpp index b1969aabebf0de..b5ad9aa2b3fb52 100644 --- a/src/plugins/intel_gpu/src/plugin/common_utils.cpp +++ b/src/plugins/intel_gpu/src/plugin/common_utils.cpp @@ -25,16 +25,29 @@ void convert_and_copy_no_pad(const src_t* src, dst_t* dst, size_t size) { dst[i] = static_cast(src[i]); } +#define MAX_NUM_AXES 6 + template void convert_and_copy_padded_source(const src_t* src, dst_t* dst, cldnn::layout layout) { - cldnn::tensor size = layout.get_tensor(); - for (int64_t b = 0; b < size.batch[0]; b++) { - for (int64_t f = 0; f < size.feature[0]; f++) { - for (int64_t w = 0; w < size.spatial[3]; w++) { - for (int64_t z = 0; z < size.spatial[2]; z++) { - for (int64_t y = 0; y < size.spatial[1]; y++) { - for (int64_t x = 0; x < size.spatial[0]; x++) { - *dst++ = static_cast(src[layout.get_linear_offset(cldnn::tensor(b, f, x, y, z, w))]); + cldnn::tensor axes_start_point, axes_end_point; + int64_t padded_sizes[MAX_NUM_AXES], axes_map[MAX_NUM_AXES]; + int8_t map_len = MAX_NUM_AXES; + + layout.get_linear_offset_params(axes_start_point, axes_end_point, padded_sizes, axes_map, map_len); + + for (int64_t b = axes_start_point.batch[0]; b < axes_end_point.batch[0]; b++) { + for (int64_t f = axes_start_point.feature[0]; f < axes_end_point.feature[0]; f++) { + for (int64_t w = axes_start_point.spatial[3]; w < axes_end_point.spatial[3]; w++) { + for (int64_t z = axes_start_point.spatial[2]; z < axes_end_point.spatial[2]; z++) { + for (int64_t y = axes_start_point.spatial[1]; y < axes_end_point.spatial[1]; y++) { + for (int64_t x = axes_start_point.spatial[0]; x < axes_end_point.spatial[0]; x++) { + int64_t element_sizes[MAX_NUM_AXES] = {b, f, x, y, z, w}; + size_t offset = element_sizes[axes_map[0]]; + + for (size_t i = 1; i < map_len; i++) + offset = offset * padded_sizes[i] + element_sizes[axes_map[i]]; + + *dst++ = static_cast(src[offset]); } } } diff --git a/src/plugins/intel_gpu/src/runtime/layout.cpp b/src/plugins/intel_gpu/src/runtime/layout.cpp index 05ca549151d9fb..7f6a3222b43614 100644 --- a/src/plugins/intel_gpu/src/runtime/layout.cpp +++ b/src/plugins/intel_gpu/src/runtime/layout.cpp @@ -335,6 +335,55 @@ std::vector layout::get_pitches() const { return pitches; } +void layout::get_axes_map(cldnn::format& fmt, int64_t* axes_map, int8_t& map_size) { + const auto& output_order = fmt.order(); + const auto& internal_order = fmt.internal_order(); + std::vector sizes_map(output_order.size(), 0); + + //output_order has more elements than allocated in axes_map + if (output_order.size() > map_size) { + OPENVINO_THROW("Layout dimension higher than expected" + output_order.size()); + } + + map_size = output_order.size(); + + for (size_t i = 0; i < map_size; i++) { + auto c = output_order[i]; + auto pos = internal_order.find(c); + + if (pos == std::string::npos) + OPENVINO_THROW("Unknown coord type: " + c); + + axes_map[i] = pos; + } +} + +void layout::get_linear_offset_params(tensor& start_points, tensor& end_points, int64_t* padded_sizes, + int64_t* axes_map, int8_t& map_size) { + auto default_fmt = format::get_default_format(format.dimension(), format::is_weights_format(format), format::is_grouped(format)); + + std::vector lower_sizes, upper_sizes; + lower_sizes.assign(data_padding._lower_size.begin(), data_padding._lower_size.begin() + format.dimension()); + upper_sizes.assign(data_padding._upper_size.begin(), data_padding._upper_size.begin() + format.dimension()); + start_points = tensor(default_fmt, lower_sizes, 0); + const auto& u_padd = tensor(default_fmt, upper_sizes, 0); + + auto t = get_tensor(); + end_points = t + start_points; + + std::replace(t.raw.begin(), t.raw.end(), 0, 1); + + get_axes_map(format, axes_map, map_size); + const auto& p_sizes = (t + start_points + u_padd).sizes(format); + + if (p_sizes.size() < map_size) { + OPENVINO_THROW("Unsupported padded layout dimension" + p_sizes.size()); + } + + for (int8_t i = 0; i < p_sizes.size(); i++) { + padded_sizes[i] = p_sizes[i]; + } +} size_t layout::get_linear_offset(tensor element) const { auto default_fmt = format::get_default_format(format.dimension(), format::is_weights_format(format), format::is_grouped(format));