From e8faa23fa814458210666794a3644c4ea4269bd9 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Tue, 11 Mar 2025 12:07:33 -0700 Subject: [PATCH 1/3] [ET-VK] Shortening code for slice op when packed dim is not the same as slice dim. Pull Request resolved: https://github.com/pytorch/executorch/pull/9136 This diff is a code change for the Slice op to shorten its code when packed dimension is not the same as the slice dimension. ghstack-source-id: 271070837 @exported-using-ghexport Differential Revision: [D70737264](https://our.internmc.facebook.com/intern/diff/D70737264/) --- .../vulkan/runtime/graph/ops/impl/Slice.cpp | 30 ++++++------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp index 48584880583..b0b2a740d6b 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp @@ -108,27 +108,15 @@ void add_slice_tensor_copy_node( spec_vars)); } else { - // GPU's coordinate is in x, y, z - int64_t gpu_dim = -1; - int64_t in_channel_stride = 1; - if (dim_index == kWidth4D) { - gpu_dim = 0; // width: x dimension in gpu - VK_CHECK_COND(out_sizes[dim] == (1 + (end - start - 1) / step)); - } else if (dim_index == kHeight4D) { - gpu_dim = 1; // height: y dimension - VK_CHECK_COND(out_sizes[dim] == (1 + (end - start - 1) / step)); - } else if (dim_index == kChannel4D) { - gpu_dim = 2; // channel: z dimension - VK_CHECK_COND(out_sizes[dim] == (1 + (end - start - 1) / step)); - in_channel_stride = dim_at(in_sizes, kChannel4D); - } else { - gpu_dim = 3; // batch: w dimension - - in_channel_stride = dim_at(in_sizes, kChannel4D); - if (packed_dim_idx == kChannel4D) { - // Due to channel packing, each batch value is span over stride planes - in_channel_stride = utils::div_up_4(in_channel_stride); - } + // GPU's coordinate is in x = 0, y = 1, z = 2, w = 3 + const int64_t gpu_dim = -(dim_index + 1); + // stride of input tensor's channel dimension + int64_t in_channel_stride = dim_at(in_sizes, kChannel4D); + VK_CHECK_COND(out_sizes[dim] == (1 + (end - start - 1) / step)); + + // Due to channel packing, each batch value is span over stride planes + if (dim_index == kBatch4D && packed_dim_idx == kChannel4D) { + in_channel_stride = utils::div_up_4(in_channel_stride); } std::string kernel_name = "slice_batch_height_width"; From 41bf83a1b69d8df97b6716fd5a46c8cee04a164f Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Tue, 11 Mar 2025 12:07:34 -0700 Subject: [PATCH 2/3] [ET-VK] Renaming slice_channel and slice_batch_height_width files to reflect change in its utility. Pull Request resolved: https://github.com/pytorch/executorch/pull/9137 This diff renames files `slice_batch_height_width.y*` to `slice_unpacked_dim.*` and `slice_channel.*` to `slice_packed_dim.*` to reflect the change in their utility after enabling all dim packing for slice op. ghstack-source-id: 271070838 @exported-using-ghexport Differential Revision: [D70919994](https://our.internmc.facebook.com/intern/diff/D70919994/) --- .../ops/glsl/{slice_channel.glsl => slice_packed_dim.glsl} | 0 .../ops/glsl/{slice_channel.yaml => slice_packed_dim.yaml} | 4 ++-- ...{slice_batch_height_width.glsl => slice_unpacked_dim.glsl} | 0 ...{slice_batch_height_width.yaml => slice_unpacked_dim.yaml} | 4 ++-- backends/vulkan/runtime/graph/ops/impl/Slice.cpp | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) rename backends/vulkan/runtime/graph/ops/glsl/{slice_channel.glsl => slice_packed_dim.glsl} (100%) rename backends/vulkan/runtime/graph/ops/glsl/{slice_channel.yaml => slice_packed_dim.yaml} (80%) rename backends/vulkan/runtime/graph/ops/glsl/{slice_batch_height_width.glsl => slice_unpacked_dim.glsl} (100%) rename backends/vulkan/runtime/graph/ops/glsl/{slice_batch_height_width.yaml => slice_unpacked_dim.yaml} (72%) diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_channel.glsl b/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.glsl similarity index 100% rename from backends/vulkan/runtime/graph/ops/glsl/slice_channel.glsl rename to backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.glsl diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_channel.yaml b/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.yaml similarity index 80% rename from backends/vulkan/runtime/graph/ops/glsl/slice_channel.yaml rename to backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.yaml index 56317260170..718e7316824 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/slice_channel.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.yaml @@ -1,4 +1,4 @@ -slice_channel: +slice_packed_dim: parameter_names_with_default_values: DTYPE: float NDIM: 3 @@ -8,4 +8,4 @@ slice_channel: - VALUE: half - VALUE: float shader_variants: - - NAME: slice_channel + - NAME: slice_packed_dim diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_batch_height_width.glsl b/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.glsl similarity index 100% rename from backends/vulkan/runtime/graph/ops/glsl/slice_batch_height_width.glsl rename to backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.glsl diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_batch_height_width.yaml b/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.yaml similarity index 72% rename from backends/vulkan/runtime/graph/ops/glsl/slice_batch_height_width.yaml rename to backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.yaml index 9e69b09a304..0453bb707b1 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/slice_batch_height_width.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.yaml @@ -1,4 +1,4 @@ -slice_batch_height_width: +slice_unpacked_dim: parameter_names_with_default_values: DTYPE: float NDIM: 3 @@ -7,4 +7,4 @@ slice_batch_height_width: - VALUE: half - VALUE: float shader_variants: - - NAME: slice_batch_height_width + - NAME: slice_unpacked_dim diff --git a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp index b0b2a740d6b..efda6e04992 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp @@ -83,7 +83,7 @@ void add_slice_tensor_copy_node( // if slice dim is the same as the packed dim, we can use the channel slice if (dim_index == packed_dim_idx) { // slice by channel - std::string kernel_name = "slice_channel"; + std::string kernel_name = "slice_packed_dim"; kernel_name.reserve(kShaderNameReserve); add_dtype_suffix(kernel_name, *t_out); @@ -119,7 +119,7 @@ void add_slice_tensor_copy_node( in_channel_stride = utils::div_up_4(in_channel_stride); } - std::string kernel_name = "slice_batch_height_width"; + std::string kernel_name = "slice_unpacked_dim"; kernel_name.reserve(kShaderNameReserve); add_dtype_suffix(kernel_name, *t_out); From 4ee30fd251b686ed1a95e5efdb20c716d6b23aec Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Tue, 11 Mar 2025 12:07:36 -0700 Subject: [PATCH 3/3] [ET-VK] Performance improvement to few indexing functions. Pull Request resolved: https://github.com/pytorch/executorch/pull/9138 This diff improves the performance of indexing functions in the Vulkan backend of Executorch. ghstack-source-id: 271070839 Differential Revision: [D70944577](https://our.internmc.facebook.com/intern/diff/D70944577/) --- .../runtime/graph/ops/glsl/indexing_utils.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h b/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h index 0b372ab70a4..2126104430f 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h +++ b/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h @@ -104,16 +104,19 @@ ivec4 tidx_to_4bufi( } ivec4 nchwi_to_tidx(const int nchwi, const ivec4 sizes) { + const int nchwi_div_x = nchwi / sizes.x; + const int nchwi_div_y = nchwi_div_x / sizes.y; return ivec4( nchwi % sizes.x, - (nchwi / (sizes.x)) % sizes.y, - (nchwi / (sizes.x * sizes.y)) % sizes.z, - (nchwi / (sizes.x * sizes.y * sizes.z))); + nchwi_div_x % sizes.y, + nchwi_div_y % sizes.z, + nchwi_div_y / sizes.z); } int tidx_to_nchwi(const ivec4 tidx, const ivec4 sizes) { - return tidx.w * sizes.x * sizes.y * sizes.z + tidx.z * sizes.x * sizes.y + - tidx.y * sizes.x + tidx.x; + const int sizes_xy = sizes.x * sizes.y; + return tidx.w * sizes_xy * sizes.z + tidx.z * sizes_xy + tidx.y * sizes.x + + tidx.x; } // TODO(ssjia): make this function use dim order so that it can work with any @@ -360,8 +363,8 @@ ivec4 to_texture_elem_pos(ivec4 idx, ivec4 sizes, int packed_dim) { // pos[4] is set to a placeholder value ivec4 pos = idx.xyzx; pos[BATCH_AXIS] += idx.w * sizes[BATCH_AXIS]; - pos[packed_dim] /= 4; - pos.w = idx[packed_dim] % 4; + pos[packed_dim] >>= 2; + pos.w = idx[packed_dim] & 0x3; return pos; }