From 969ebaf8e59ded1637723341e20b15d4c6b6ad28 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Thu, 2 Jan 2025 12:43:38 -0800 Subject: [PATCH] [ET-VK] Changing texture access pattern for conv2d dw ops to improve performance. This diff changes the texture access pattern for convolutional depthwise (DW) operations in Executorch's Vulkan backend to iterate first on x axis then y and then z to improve performance. Differential Revision: [D67770160](https://our.internmc.facebook.com/intern/diff/D67770160/) [ghstack-poisoned] --- backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl | 5 ++++- .../vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl | 5 ++++- backends/vulkan/runtime/graph/ops/impl/Convolution.cpp | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl index 8cb487e5807..43a4f7c8dc7 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl @@ -35,7 +35,10 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; * output at a single output location. */ void main() { - const ivec3 pos = ivec3(gl_GlobalInvocationID); + const ivec3 pos = ivec3( + gl_GlobalInvocationID.x % out_limits.x, + (gl_GlobalInvocationID.x / out_limits.x) % out_limits.y, + gl_GlobalInvocationID.x / (out_limits.x * out_limits.y)); if (any(greaterThanEqual(pos, out_limits))) { return; diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl index 57ae98eb85b..29e57c4fecf 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl @@ -39,7 +39,10 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; * output at a single output location. */ void main() { - const u16vec3 pos = u16vec3(gl_GlobalInvocationID); + const u16vec3 pos = u16vec3( + gl_GlobalInvocationID.x % out_limits.x, + (gl_GlobalInvocationID.x / out_limits.x) % out_limits.y, + gl_GlobalInvocationID.x / (out_limits.x * out_limits.y)); if (any(greaterThanEqual(pos, out_limits))) { return; diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp index 4f123cb8337..9ad600d27a7 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp @@ -372,7 +372,7 @@ void add_conv2d_node( utils::uvec3 wg_size = create_conv2d_global_wg_size(graph, method, out); - if (method == Conv2dMethod::Pointwise) { + if (method == Conv2dMethod::Pointwise || method == Conv2dMethod::Depthwise) { wg_size = {wg_size[0] * wg_size[1] * wg_size[2], 1, 1}; }