openvinotoolkit · susbhere · Oct 12, 2025 · Oct 14, 2025 · Oct 14, 2025 · Oct 12, 2025
@@ -320,6 +320,13 @@ struct layout {
     // element == { 0,0,0,0 } means first no-padding (i.e. data) element
     size_t get_linear_offset(tensor element = tensor(0)) const;
 
+    // Sets up variables needed to get linear offset for a tensor with given layout including padding
+    // axes_start and axes_end provides start and end coordinates for the axes. 
+    void setup_fast_liner_offset(tensor& axes_start_point, tensor& axes_end_point);
+
+    // Compute linear offset for a given tensor for which setup_fast_liner_offset() was called
+    size_t get_linear_offset_fast(int64_t* element_sizes) const;
+
     /// @brief Get aligned linear size calculated as multiplication of all elements.
     size_t get_linear_size() const;
 
@@ -464,6 +471,8 @@ struct layout {
 private:
     /// The size of the @ref memory (excluding padding)
     ov::PartialShape size;
+    std::vector<int64_t> _padded_sizes;
+    std::vector<int64_t> _axes_size_map;
 };
 
 inline ::std::ostream& operator<<(::std::ostream& os, const layout& p) {

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor.hpp
@@ -433,6 +433,24 @@ struct tensor {
         return sizes;
     }
 
+    /// @brief Returns a vector of position mapping to map output order to internal order for a format
+    std::vector<value_type> get_axes_size_map(cldnn::format fmt) const {
+        const auto& output_order = fmt.order();
+        const auto& internal_order = fmt.internal_order();
+        std::vector<value_type> sizes_map(output_order.size(), 0);
+
+        for (size_t i = 0; i < sizes_map.size(); ++i) {
+            auto c = output_order[i];
+            auto pos = internal_order.find(c);
+            if (pos == std::string::npos)
+                throw std::domain_error(std::string("Unknown coord type: ") + c);
+
+            sizes_map[i] = pos;
+        }
+
+        return sizes_map;
+    }
+
     /// @brief Returns a vector of tensors values, ordered batch, feature, spatial_x, spatial_y.
     std::vector<value_type> sizes() const {
         std::vector<value_type> sizes(sizeof(_sizes) / sizeof(_sizes[0]), 0);

@@ -27,14 +27,17 @@ void convert_and_copy_no_pad(const src_t* src, dst_t* dst, size_t size) {
 
 template <typename src_t, typename dst_t>
 void convert_and_copy_padded_source(const src_t* src, dst_t* dst, cldnn::layout layout) {
-    cldnn::tensor size = layout.get_tensor();
-    for (int64_t b = 0; b < size.batch[0]; b++) {
-        for (int64_t f = 0; f < size.feature[0]; f++) {
-            for (int64_t w = 0; w < size.spatial[3]; w++) {
-                for (int64_t z = 0; z < size.spatial[2]; z++) {
-                    for (int64_t y = 0; y < size.spatial[1]; y++) {
-                        for (int64_t x = 0; x < size.spatial[0]; x++) {
-                            *dst++ = static_cast<dst_t>(src[layout.get_linear_offset(cldnn::tensor(b, f, x, y, z, w))]);
+    cldnn::tensor axes_start_point, axes_end_point;
+    layout.setup_fast_liner_offset(axes_start_point, axes_end_point);
+
+    for (int64_t b = axes_start_point.batch[0]; b < axes_end_point.batch[0]; b++) {
+        for (int64_t f = axes_start_point.feature[0]; f < axes_end_point.feature[0]; f++) {
+            for (int64_t w = axes_start_point.spatial[3]; w < axes_end_point.spatial[3]; w++) {
+                for (int64_t z = axes_start_point.spatial[2]; z < axes_end_point.spatial[2]; z++) {
+                    for (int64_t y = axes_start_point.spatial[1]; y < axes_end_point.spatial[1]; y++) {
+                        for (int64_t x = axes_start_point.spatial[0]; x < axes_end_point.spatial[0]; x++) {
+                            int64_t element_sizes[6] = {b, f, x, y, z, w};
+                            *dst++ = static_cast<dst_t>(src[layout.get_linear_offset_fast(element_sizes)]);
                         }
                     }
                 }

@@ -335,6 +335,34 @@ std::vector<tensor::value_type> layout::get_pitches() const {
     return pitches;
 }
 
+void layout::setup_fast_liner_offset(tensor& axes_start_point, tensor& axes_end_point) {
+    auto default_fmt = format::get_default_format(format.dimension(), format::is_weights_format(format), format::is_grouped(format));
+
+    std::vector<tensor::value_type> lower_sizes, upper_sizes;
+    lower_sizes.assign(data_padding._lower_size.begin(), data_padding._lower_size.begin() + format.dimension());
+    upper_sizes.assign(data_padding._upper_size.begin(), data_padding._upper_size.begin() + format.dimension());
+    axes_start_point = tensor(default_fmt, lower_sizes, 0);
+    const auto& u_padd = tensor(default_fmt, upper_sizes, 0);
+
+    auto t = get_tensor();
+    axes_end_point = t + axes_start_point;
+
+    std::replace(t.raw.begin(), t.raw.end(), 0, 1);
+
+    const auto& padded_size = t + axes_start_point + u_padd;
+    _padded_sizes = padded_size.sizes(format);
+    _axes_size_map = axes_start_point.get_axes_size_map(format);
+}
+
+size_t layout::get_linear_offset_fast(int64_t* element_sizes) const {
+    size_t offset = element_sizes[_axes_size_map[0]];
+
+    for (size_t i = 1; i < _axes_size_map.size(); i++) {
+        offset = offset * _padded_sizes[i] + element_sizes[_axes_size_map[i]];
+    }
+
+    return offset;
+}
 
 size_t layout::get_linear_offset(tensor element) const {
     auto default_fmt = format::get_default_format(format.dimension(), format::is_weights_format(format), format::is_grouped(format));