[ET-VK] Conv2d quantize/dequantize ops for conv2d activations (#14611)

pytorchbot · ssjia · web-flow · commit 17853460342b · 2025-09-25T17:01:55.000-04:00
This PR was created by the merge bot to help merge the original PR into the main branch. ghstack PR number: #14330 by @SS-JIA ^ Please use this as the source of truth for the PR details, comments, and reviews ghstack PR base: https://github.com/pytorch/executorch/tree/gh/SS-JIA/330/base ghstack PR head: https://github.com/pytorch/executorch/tree/gh/SS-JIA/330/head Merge bot PR base: https://github.com/pytorch/executorch/tree/gh/SS-JIA/331/orig Merge bot PR head: https://github.com/pytorch/executorch/tree/gh/SS-JIA/330/orig Differential Revision: [D82542335](https://our.internmc.facebook.com/intern/diff/D82542335/) @diff-train-skip-merge Co-authored-by: ssjia <ssjia@devvm1479.ncg0.facebook.com>
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -1009,6 +1009,7 @@ jobs:
         ./cmake-out/backends/vulkan/test/custom_ops/q8csw_conv2d
         ./cmake-out/backends/vulkan/test/custom_ops/q4gsw_linear
         ./cmake-out/backends/vulkan/test/custom_ops/choose_qparams_per_row
+        ./cmake-out/backends/vulkan/test/custom_ops/qdq8ta_conv2d_activations
 
         # "Classic" Operator tests
         PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_op.sh --build
diff --git a/backends/vulkan/runtime/graph/ops/glsl/common.glslh b/backends/vulkan/runtime/graph/ops/glsl/common.glslh
@@ -33,6 +33,30 @@ struct TensorIndex4D {
   ivec4 data;
 };
 
+int sign_extend_8bit(const int val) {
+  if ((val & 0x80) != 0) {
+    return val | (~0xFF);
+  }
+  return val;
+}
+
+int extract_8bit_from_packed_int_le(const int packed, const int i) {
+  // account for little endian
+  int byte = sign_extend_8bit(packed >> (8 * i) & 0xFF);
+  return byte;
+}
+
+int pack_4xqint_into_int32(
+    const int val0,
+    const int val1,
+    const int val2,
+    const int val3) {
+  int packed = (val0 & 0xFF) | ((val1 & 0xFF) << 8) | ((val2 & 0xFF) << 16) |
+      ((val3 & 0xFF) << 24);
+
+  return packed;
+}
+
 #ifdef DEBUG_MODE
 
 #extension GL_EXT_debug_printf : require
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_common.glslh b/backends/vulkan/runtime/graph/ops/glsl/conv2d_common.glslh
@@ -27,6 +27,48 @@ struct Conv2DParams {
   int K4;
 };
 
+struct Conv2dTensorIndex {
+  ivec3 data;
+  int texel_i;
+};
+
+struct Conv2dBlockIndex {
+  ivec3 data;
+};
+
+Conv2dTensorIndex block_idx_to_tensor_idx(const Conv2dBlockIndex block_idx) {
+  Conv2dTensorIndex tensor_idx;
+  tensor_idx.data.x = mul_4(block_idx.data.x);
+  tensor_idx.data.y = block_idx.data.y;
+  tensor_idx.data.z = block_idx.data.z;
+  tensor_idx.texel_i = 0;
+  return tensor_idx;
+}
+
+struct Conv2dBlockExtents {
+  ivec3 data;
+  int data_xz;
+};
+
+Conv2dBlockExtents make_block_extents(const ivec4 tensor_sizes) {
+  Conv2dBlockExtents block_sizes;
+  block_sizes.data.x = div_up_4(tensor_sizes.x);
+  block_sizes.data.y = tensor_sizes.y;
+  block_sizes.data.z = div_up_4(tensor_sizes.z);
+
+  block_sizes.data_xz = block_sizes.data.x * block_sizes.data.z;
+
+  return block_sizes;
+}
+
+bool block_idx_out_of_bounds(
+    const Conv2dBlockIndex block_idx,
+    const Conv2dBlockExtents block_extents) {
+  return block_idx.data.x >= block_extents.data.x ||
+      block_idx.data.y >= block_extents.data.y ||
+      block_idx.data.z >= block_extents.data.z;
+}
+
 #ifdef DEBUG_MODE
 
 void printConv2DParams(const Conv2DParams params) {
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_fp_input_tile_load.glslh b/backends/vulkan/runtime/graph/ops/glsl/conv2d_fp_input_tile_load.glslh
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef CONV2D_FP_INPUT_TILE_LOAD
+#define CONV2D_FP_INPUT_TILE_LOAD
+
+#extension GL_EXT_control_flow_attributes : require
+
+#include "linear_fp_input_tile.glslh"
+
+VEC4_T load_fp_input_texel(const Conv2dTensorIndex tidx) {
+  return texelFetch(t_fp_input, tidx.data, 0);
+}
+
+void load_fp_input_tile(
+    out FPInputTile tile,
+    const Conv2dBlockIndex block_idx) {
+#if TILE_M == 4 && TILE_K4 == 1
+  Conv2dTensorIndex load_tidx = block_idx_to_tensor_idx(block_idx);
+  [[unroll]] for (int w = 0; w < TILE_M; w++) {
+    tile.data[w][0] = load_fp_input_texel(load_tidx);
+    load_tidx.data.x++;
+  }
+#else
+  not_implemented;
+#endif
+}
+
+#endif // CONV2D_FP_INPUT_TILE_LOAD
diff --git a/backends/vulkan/runtime/graph/ops/glsl/linear_common.glslh b/backends/vulkan/runtime/graph/ops/glsl/linear_common.glslh
@@ -16,19 +16,6 @@
 
 #include "common.glslh"
 
-int sign_extend_8bit(const int val) {
-  if ((val & 0x80) != 0) {
-    return val | (~0xFF);
-  }
-  return val;
-}
-
-int extract_8bit_from_packed_int_le(const int packed, const int i) {
-  // account for little endian
-  int byte = sign_extend_8bit(packed >> (8 * i) & 0xFF);
-  return byte;
-}
-
 // Extract a 4-bit value from a packed int (little endian)
 // It is assumed that the 4-bit value is in the range [0, 15]
 int extract_4bit_from_packed_int_le(const int packed, const int col) {
diff --git a/backends/vulkan/runtime/graph/ops/glsl/quantize_and_pack_q8ta_conv2d_input.glsl b/backends/vulkan/runtime/graph/ops/glsl/quantize_and_pack_q8ta_conv2d_input.glsl
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#version 450 core
+
+#define PRECISION ${PRECISION}
+#define VEC4_T ${texel_load_type(DTYPE, INPUT_STORAGE)}
+#define T ${texel_load_component_type(DTYPE, INPUT_STORAGE)}
+
+// corresponds to the input width dim
+#define TILE_M4 1
+// corresponds to the input channels dim
+#define TILE_K4 1
+
+#define TILE_M 4
+
+$if OUTPUT_STORAGE == "buffer":
+  #define OUTPUT_BUFFER
+$if INPUT_STORAGE == "buffer":
+  #define INPUT_BUFFER
+
+${define_required_extensions(DTYPE)}
+
+layout(std430) buffer;
+
+#include "conv2d_common.glslh"
+
+${layout_declare_tensor(B, "w", "t_packed_int8_input", "int", OUTPUT_STORAGE, is_scalar_array=False)}
+${layout_declare_tensor(B, "r", "t_fp_input", DTYPE, INPUT_STORAGE, is_scalar_array=False)}
+
+${layout_declare_ubo(B, "ivec4", "input_sizes")}
+
+layout(push_constant) uniform restrict Block {
+  float inv_scale;
+  int zp;
+};
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+#include "conv2d_fp_input_tile_load.glslh"
+#include "linear_int8_input_block.glslh"
+
+void store_packed_int8_block(
+    const Conv2dBlockIndex block_idx,
+    const Conv2dBlockExtents block_extents,
+    const Int8InputBlock packed_int8_block) {
+#ifdef OUTPUT_BUFFER
+  const int buffer_idx = block_idx.data.y * block_extents.data_xz +
+      block_idx.data.x * block_extents.data.z + block_idx.data.z;
+  t_packed_int8_input[buffer_idx] = packed_int8_block.data;
+#else
+  imageStore(t_packed_int8_input, block_idx.data, packed_int8_block.data);
+#endif
+}
+
+void main() {
+  Conv2dBlockIndex block_idx;
+  block_idx.data = ivec3(gl_GlobalInvocationID);
+
+  Conv2dBlockExtents block_extents = make_block_extents(input_sizes);
+  if (block_idx_out_of_bounds(block_idx, block_extents)) {
+    return;
+  }
+
+  FPInputTile fp_tile;
+  load_fp_input_tile(fp_tile, block_idx);
+
+  Int8InputBlock int8_block;
+  quantize_and_pack(int8_block, fp_tile, inv_scale, zp);
+
+  store_packed_int8_block(block_idx, block_extents, int8_block);
+}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/quantize_and_pack_q8ta_conv2d_input.yaml b/backends/vulkan/runtime/graph/ops/glsl/quantize_and_pack_q8ta_conv2d_input.yaml
@@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+quantize_and_pack_q8ta_conv2d_input:
+  parameter_names_with_default_values:
+    DTYPE: float
+    OUTPUT_STORAGE: texture3d
+    INPUT_STORAGE: texture3d
+  generate_variant_forall:
+    combination:
+      parameter_names: [OUTPUT_STORAGE, INPUT_STORAGE]
+      combos:
+        - parameter_values: [texture3d, texture3d]
+        - parameter_values: [buffer, texture3d]
+    DTYPE:
+      - VALUE: float
+  shader_variants:
+    - NAME: quantize_and_pack_q8ta_conv2d_input
diff --git a/backends/vulkan/runtime/graph/ops/glsl/unpack_and_dequantize_q8ta_conv2d_output.glsl b/backends/vulkan/runtime/graph/ops/glsl/unpack_and_dequantize_q8ta_conv2d_output.glsl
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#version 450 core
+
+#define PRECISION ${PRECISION}
+#define VEC4_T ${texel_load_type(DTYPE, INPUT_STORAGE)}
+#define T ${texel_load_component_type(DTYPE, INPUT_STORAGE)}
+
+// corresponds to the output width dim
+#define TILE_M4 1
+// corresponds to the output channels dim
+#define TILE_K4 1
+
+#define TILE_M 4
+
+$if OUTPUT_STORAGE == "buffer":
+  #define OUTPUT_BUFFER
+$if INPUT_STORAGE == "buffer":
+  #define INPUT_BUFFER
+
+${define_required_extensions(DTYPE)}
+
+layout(std430) buffer;
+
+#define DEBUG_MODE
+#include "conv2d_common.glslh"
+
+${layout_declare_tensor(B, "w", "t_fp_output", DTYPE, OUTPUT_STORAGE, is_scalar_array=False)}
+${layout_declare_tensor(B, "r", "t_packed_int8_output", "int", INPUT_STORAGE, is_scalar_array=False)}
+
+${layout_declare_ubo(B, "ivec4", "output_sizes")}
+
+layout(push_constant) uniform restrict Block {
+  float scale;
+  int zp;
+};
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+#include "linear_fp_input_tile.glslh"
+#include "linear_int8_input_tile.glslh"
+
+void load_packed_int8_tile(
+    out Int8InputTile int8_tile,
+    const Conv2dBlockIndex block_idx,
+    const Conv2dBlockExtents block_extents) {
+#ifdef INPUT_BUFFER
+  const int buffer_idx = block_idx.data.y * block_extents.data_xz +
+      block_idx.data.x * block_extents.data.z + block_idx.data.z;
+  int8_tile.data[0][0] = t_packed_int8_output[buffer_idx];
+#else
+  int8_tile.data[0][0] = texelFetch(t_packed_int8_output, block_idx.data, 0);
+#endif
+}
+
+VEC4_T
+dequantize_8bit(const ivec4 val, const float q_scale, const int q_zero_point) {
+  return VEC4_T(val - q_zero_point) * q_scale;
+}
+
+void unpack_and_dequantize(
+    out FPInputTile fp_tile,
+    const Int8InputTile int8_tile,
+    const float q_scale,
+    const int q_zero_point) {
+  [[unroll]] for (int w = 0; w < 4; ++w) {
+    int packed = int8_tile.data[0][0][w];
+    fp_tile.data[w][0] = dequantize_8bit(
+        ivec4(
+            extract_8bit_from_packed_int_le(packed, 0),
+            extract_8bit_from_packed_int_le(packed, 1),
+            extract_8bit_from_packed_int_le(packed, 2),
+            extract_8bit_from_packed_int_le(packed, 3)),
+        q_scale,
+        q_zero_point);
+  }
+}
+
+void store_fp_output_texel(
+    const Conv2dTensorIndex tidx,
+    const VEC4_T out_texel) {
+  imageStore(t_fp_output, tidx.data, out_texel);
+}
+
+void store_fp_tile(
+    const FPInputTile block,
+    const Conv2dBlockIndex block_idx) {
+  Conv2dTensorIndex store_tidx = block_idx_to_tensor_idx(block_idx);
+  [[unroll]] for (int w = 0; w < 4; w++) {
+    store_fp_output_texel(store_tidx, block.data[w][0]);
+    store_tidx.data.x++;
+  }
+}
+
+void main() {
+  Conv2dBlockIndex block_idx;
+  block_idx.data = ivec3(gl_GlobalInvocationID);
+
+  Conv2dBlockExtents block_extents = make_block_extents(output_sizes);
+  if (block_idx_out_of_bounds(block_idx, block_extents)) {
+    return;
+  }
+
+  Int8InputTile int8_tile;
+  load_packed_int8_tile(int8_tile, block_idx, block_extents);
+
+  FPInputTile fp_tile;
+  unpack_and_dequantize(
+      fp_tile, int8_tile, scale, zp);
+
+  store_fp_tile(fp_tile, block_idx);
+}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/unpack_and_dequantize_q8ta_conv2d_output.yaml b/backends/vulkan/runtime/graph/ops/glsl/unpack_and_dequantize_q8ta_conv2d_output.yaml
@@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+unpack_and_dequantize_q8ta_conv2d_output:
+  parameter_names_with_default_values:
+    DTYPE: float
+    OUTPUT_STORAGE: texture3d
+    INPUT_STORAGE: texture3d
+  generate_variant_forall:
+    combination:
+      parameter_names: [OUTPUT_STORAGE, INPUT_STORAGE]
+      combos:
+        - parameter_values: [texture3d, texture3d]
+        - parameter_values: [texture3d, buffer]
+    DTYPE:
+      - VALUE: float
+  shader_variants:
+    - NAME: unpack_and_dequantize_q8ta_conv2d_output
diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizedConvolution.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizedConvolution.cpp
diff --git a/backends/vulkan/test/custom_ops/CMakeLists.txt b/backends/vulkan/test/custom_ops/CMakeLists.txt
diff --git a/backends/vulkan/test/custom_ops/qdq8ta_conv2d_activations.cpp b/backends/vulkan/test/custom_ops/qdq8ta_conv2d_activations.cpp
diff --git a/backends/vulkan/test/custom_ops/targets.bzl b/backends/vulkan/test/custom_ops/targets.bzl