Update on "[ExecuTorch][XNNPACK] Don't partition per_tensor weights with qd8"

digantdesai · digantdesai · commit 399b3a273272 · 2025-02-27T17:14:05.000-08:00
This is not supported, so we shouldn't partition it. Add an expectedFailure test to indicate that this is not supported. Differential Revision: [D70343584](https://our.internmc.facebook.com/intern/diff/D70343584/) [ghstack-poisoned]
diff --git a/backends/xnnpack/partition/config/gemm_configs.py b/backends/xnnpack/partition/config/gemm_configs.py
@@ -161,17 +161,18 @@ def _get_weight_deps(
                 return False, []
             gemm_deps.append(weight)
 
+            if is_per_tensor(dequant_node) and precision == ConfigPrecisionType.DYNAMIC_QUANT:
+                why(node, "XNNPACK does not support per tensor quantized weights for dynamic quantization of activations")
+                return False, []
+
             if is_per_channel(dequant_node) or is_per_channel_group(dequant_node):
                 if len(dequant_node.all_input_nodes) < 2:
                     # Expected channel quantized to have scale/zp nodes
                     why(node, "Expected channel quantized to have scale/zp nodes")
                     return False, []
 
-            if is_per_tensor(dequant_node) and precision == ConfigPrecisionType.DYNAMIC_QUANT:
-                why(node, "XNNPACK does not support per tensor quantized weights for dynamic quantization of activations")
-                return False, []
-
                 gemm_deps.extend(dequant_node.all_input_nodes[1:3])
+
             return (True, gemm_deps)
 
     def _get_output_deps(