Update on "Remove Per-Op mode from DQPartitioner"

mcr229 · mcr229 · commit 149a846f13ba · 2025-03-20T13:40:33.000-07:00
Differential Revision: [D71427234](https://our.internmc.facebook.com/intern/diff/D71427234/) [ghstack-poisoned]
diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py
@@ -331,7 +331,6 @@ def _test_dqlinear(
         uses_bias=False,
         qconfig: Optional[QuantizationConfig] = None,
         atol=5e-02,  # TODO(T212995726): Investigate right atol for rand[n] inputs
-        no_per_op_mode=False,
     ):
         """
         Helper function to test dynamic quantized linear op with different configurations.
@@ -340,9 +339,8 @@ def _test_dqlinear(
             is_per_channel=is_per_channel,
             is_dynamic=True,
         )
-        per_op_mode_choices = [False] if no_per_op_mode else [True, False]
         for legacy_partitioner in (True, False):
-            for per_op_mode in per_op_mode_choices:
+            for per_op_mode in (True, False):
                 DynamicallyQuantizedPartitioner = XnnpackPartitioner(
                     config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,
                     per_op_mode=per_op_mode,
@@ -538,22 +536,21 @@ def get_qnode_checks(quant_node_checks, dialect):
                 # )
 
     def test_qd8_f32_per_channel_shared_dq_chain(self):
-        for use_bias in (False, True):
-            module = SharedDQChain(
-                input_size=13,
-                output_size=17,
-            )
-            inputs = (torch.randn(1, 2, 13),)
+        # for use_bias in (False, True):
+        module = SharedDQChain(
+            input_size=13,
+            output_size=17,
+        )
+        inputs = (torch.randn(1, 2, 13),)
 
-            self._test_dqlinear(
-                module,
-                inputs,
-                dynamic_shapes=None,
-                is_per_channel=True,
-                linear_count=2,
-                uses_bias=use_bias,
-                no_per_op_mode=True,
-            )
+        self._test_dqlinear(
+            module,
+            inputs,
+            dynamic_shapes=None,
+            is_per_channel=True,
+            linear_count=2,
+            uses_bias=False,
+        )
 
     def _test_qd8_per_channel_linear(self, dtype: torch.dtype = torch.float):
         for uses_bias in (False, True):
diff --git a/backends/xnnpack/test/tester/TARGETS b/backends/xnnpack/test/tester/TARGETS
@@ -26,5 +26,6 @@ runtime.python_library(
         "//executorch/exir/backend:partitioner",
         "//executorch/exir/passes:spec_prop_pass",
         "//executorch/extension/pybindings:portable_lib",  # @manual
+        "//executorch/backends/transforms:duplicate_dynamic_quant_chain"
     ],
 )
diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py
@@ -26,6 +26,9 @@
     to_edge,
     to_edge_transform_and_lower,
 )
+from executorch.backends.transforms.duplicate_dynamic_quant_chain import (
+    DuplicateDynamicQuantChainPass,
+)
 from executorch.exir.backend.backend_api import validation_disabled
 from executorch.exir.backend.partitioner import Partitioner
 from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass
@@ -177,6 +180,8 @@ def run(
                 prepared(*inputs)
 
         converted = convert_pt2e(prepared)
+        DuplicateDynamicQuantChainPass()(converted)
+
         self.converted_graph = converted
 
     @property

Original file line number	Diff line number	Diff line change
`@@ -26,5 +26,6 @@ runtime.python_library(`
`26`	`26`	`"//executorch/exir/backend:partitioner",`
`27`	`27`	`"//executorch/exir/passes:spec_prop_pass",`
`28`	`28`	`"//executorch/extension/pybindings:portable_lib", # @manual`
	`29`	`+ "//executorch/backends/transforms:duplicate_dynamic_quant_chain"`
`29`	`30`	`],`
`30`	`31`	`)`