[ET-VK][ez][Ops] registering Q/DQ/CQP ops and specifying optimal storage

morelos · morelos · commit 734e1f8eaf73 · 2025-07-03T11:17:05.000-07:00
# Context Certain quantization operators need scales and zeros to be set with a storage layout as buffers. Since the existing op_registry does not allow specifying how input parameters are set with their memory or storage layout, we need to specify that the optimal storage type is buffer so that is conversion pass is added to ensure that the inputs are also buffers. # Changes This moves the quantized_decomposed operators in their own registration, while also specifying that buffer is preferred. Differential Revision: [D77746131](https://our.internmc.facebook.com/intern/diff/D77746131/) [ghstack-poisoned]
diff --git a/backends/vulkan/op_registry.py b/backends/vulkan/op_registry.py
@@ -221,13 +221,6 @@ def update_features_impl(op: OpKey):
 @update_features(
     [
         operator.getitem,
-        # Quantization related ops will be fused via graph passes
-        exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
-        exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
-        exir_ops.edge.quantized_decomposed.quantize_per_tensor.tensor,
-        exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
-        exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor,
-        exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
         # Symbolic integer ops
         torch.ops.aten.sym_size.int,
         operator.add,
@@ -250,6 +243,35 @@ def register_ephemeral_op(features: OpFeatures):
     return features
 
 
+@update_features(
+    [
+        exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
+        exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
+        exir_ops.edge.quantized_decomposed.quantize_per_tensor.tensor,
+        exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
+        exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor,
+        exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
+        exir_ops.edge.quantized_decomposed.quantize_per_token.default,
+        exir_ops.edge.quantized_decomposed.dequantize_per_token.default,
+        exir_ops.edge.quantized_decomposed.choose_qparams.tensor,
+        exir_ops.edge.quantized_decomposed.choose_qparams_per_token_asymmetric.default,
+    ]
+)
+def register_quantization_op(features: OpFeatures):
+    # Quantization requires buffer storage and width packing for scales/zero_points
+    # but we need to provide texture impl features for the partitioner to work properly
+    features.texture_impl = TextureImplFeatures(
+        uses_axis_map=True,
+        valid_packed_dims={
+            PackedDim.WIDTH,
+        },
+    )
+    features.buffer_impl = True
+    features.resize_fn = True
+    features.optimal_storage = VkStorageType.BUFFER
+    return features
+
+
 @update_features(
     [
         exir_ops.edge.aten.add.Tensor,