Skip to content

Commit 734e1f8

Browse files
author
morelos
committed
[ET-VK][ez][Ops] registering Q/DQ/CQP ops and specifying optimal storage
# Context Certain quantization operators need scales and zeros to be set with a storage layout as buffers. Since the existing op_registry does not allow specifying how input parameters are set with their memory or storage layout, we need to specify that the optimal storage type is buffer so that is conversion pass is added to ensure that the inputs are also buffers. # Changes This moves the quantized_decomposed operators in their own registration, while also specifying that buffer is preferred. Differential Revision: [D77746131](https://our.internmc.facebook.com/intern/diff/D77746131/) [ghstack-poisoned]
1 parent e5b95dc commit 734e1f8

File tree

1 file changed

+29
-7
lines changed

1 file changed

+29
-7
lines changed

backends/vulkan/op_registry.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -221,13 +221,6 @@ def update_features_impl(op: OpKey):
221221
@update_features(
222222
[
223223
operator.getitem,
224-
# Quantization related ops will be fused via graph passes
225-
exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
226-
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
227-
exir_ops.edge.quantized_decomposed.quantize_per_tensor.tensor,
228-
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
229-
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor,
230-
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
231224
# Symbolic integer ops
232225
torch.ops.aten.sym_size.int,
233226
operator.add,
@@ -250,6 +243,35 @@ def register_ephemeral_op(features: OpFeatures):
250243
return features
251244

252245

246+
@update_features(
247+
[
248+
exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
249+
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
250+
exir_ops.edge.quantized_decomposed.quantize_per_tensor.tensor,
251+
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
252+
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor,
253+
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
254+
exir_ops.edge.quantized_decomposed.quantize_per_token.default,
255+
exir_ops.edge.quantized_decomposed.dequantize_per_token.default,
256+
exir_ops.edge.quantized_decomposed.choose_qparams.tensor,
257+
exir_ops.edge.quantized_decomposed.choose_qparams_per_token_asymmetric.default,
258+
]
259+
)
260+
def register_quantization_op(features: OpFeatures):
261+
# Quantization requires buffer storage and width packing for scales/zero_points
262+
# but we need to provide texture impl features for the partitioner to work properly
263+
features.texture_impl = TextureImplFeatures(
264+
uses_axis_map=True,
265+
valid_packed_dims={
266+
PackedDim.WIDTH,
267+
},
268+
)
269+
features.buffer_impl = True
270+
features.resize_fn = True
271+
features.optimal_storage = VkStorageType.BUFFER
272+
return features
273+
274+
253275
@update_features(
254276
[
255277
exir_ops.edge.aten.add.Tensor,

0 commit comments

Comments
 (0)