diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl index 1a2c257baec..a7d06a4458c 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl @@ -57,12 +57,24 @@ ivec4 read_texel(ivec4 tidx) { ivec4 out_tex = ivec4(0); - [[unroll]] for (int i = 0; i < 4; ++i) { - if (tidx[packed_dim] + i < sizes[packed_dim]) { - const int in_texel = nchw_in[buf_indices[i] >> 2]; - int extracted_val = (in_texel >> (8 * (buf_indices[i] & 3))) & mask; - extracted_val = extend_sign(extracted_val); - out_tex[i] = extracted_val; + if (packed_dim == 0) { + int buf_index = buf_indices[0]; + [[unroll]] for (int i = 0; i < 4; ++i, ++buf_index) { + if (tidx[packed_dim] + i < sizes[packed_dim]) { + const int in_texel = nchw_in[buf_index >> 2]; + int extracted_val = (in_texel >> (8 * (buf_index & 3))) & mask; + extracted_val = extend_sign(extracted_val); + out_tex[i] = extracted_val; + } + } + } else { + [[unroll]] for (int i = 0; i < 4; ++i) { + if (tidx[packed_dim] + i < sizes[packed_dim]) { + const int in_texel = nchw_in[buf_indices[i] >> 2]; + int extracted_val = (in_texel >> (8 * (buf_indices[i] & 3))) & mask; + extracted_val = extend_sign(extracted_val); + out_tex[i] = extracted_val; + } } }