From 0a79df8321c625381b481c1f6f253ffb446e6f01 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Thu, 10 Jul 2025 22:17:36 -0700 Subject: [PATCH] [ET-VK] Optimizing buffer to int8 quantized packing op to improve width packed performance. This diff simplifies looping in int8 quantized packing operation for width pack tensor, to improve performance. Differential Revision: [D78143041](https://our.internmc.facebook.com/intern/diff/D78143041/) [ghstack-poisoned] --- .../nchw_to_bitw8_image_nobitw8buffer.glsl | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl index 1a2c257baec..a7d06a4458c 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl @@ -57,12 +57,24 @@ ivec4 read_texel(ivec4 tidx) { ivec4 out_tex = ivec4(0); - [[unroll]] for (int i = 0; i < 4; ++i) { - if (tidx[packed_dim] + i < sizes[packed_dim]) { - const int in_texel = nchw_in[buf_indices[i] >> 2]; - int extracted_val = (in_texel >> (8 * (buf_indices[i] & 3))) & mask; - extracted_val = extend_sign(extracted_val); - out_tex[i] = extracted_val; + if (packed_dim == 0) { + int buf_index = buf_indices[0]; + [[unroll]] for (int i = 0; i < 4; ++i, ++buf_index) { + if (tidx[packed_dim] + i < sizes[packed_dim]) { + const int in_texel = nchw_in[buf_index >> 2]; + int extracted_val = (in_texel >> (8 * (buf_index & 3))) & mask; + extracted_val = extend_sign(extracted_val); + out_tex[i] = extracted_val; + } + } + } else { + [[unroll]] for (int i = 0; i < 4; ++i) { + if (tidx[packed_dim] + i < sizes[packed_dim]) { + const int in_texel = nchw_in[buf_indices[i] >> 2]; + int extracted_val = (in_texel >> (8 * (buf_indices[i] & 3))) & mask; + extracted_val = extend_sign(extracted_val); + out_tex[i] = extracted_val; + } } }