From 7fe007ca537878bca18b7de97f8de6001ea341a9 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Mon, 26 May 2025 21:40:49 -0700 Subject: [PATCH] [ET-VK] Minor unroll tuning to improve conv2d pw perf. This diff provides a minor unroll tuning to improve the performance of the conv2d pointwise (pw) operation in the Executorch Vulkan backend. Differential Revision: [D75420510](https://our.internmc.facebook.com/intern/diff/D75420510/) [ghstack-poisoned] --- backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl index ed07979afc0..c090c5d344f 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl @@ -38,6 +38,8 @@ layout(push_constant) uniform restrict Block { layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; +#extension GL_EXT_control_flow_attributes : require + /* * Computes a 2D pointwise convolution of an NxN output tile. Calculating an * output tile for pointwise convolution is more efficient because the kernel @@ -105,7 +107,7 @@ void main() { float kernel_values[4 * 4]; // 4 channels, 4 elements per channel // Load kernel values from texels to array - for (int i = 0; i < 4; ++i) { + [[unroll]] for (int i = 0; i < 4; ++i) { const vec4 k_tex = texelFetch(t_kernel, ivec2(z + i, gpos.z), 0); kernel_values[i * 4 + 0] = k_tex.x; kernel_values[i * 4 + 1] = k_tex.y;