Remove Per-Op mode from DQPartitioner

mcr229 · mcr229 · commit 1d8a94d5f2e4 · 2025-03-20T13:40:33.000-07:00
XNNPACK can handle Dynamically quantized partitions in which the dq chain is shared between two dynamic linear. This means per-op mode is not necessarily needed, and subsequently duplicate dynamic dequant is not needed. Pull Request resolved: #9378 ghstack-source-id: 273077941 Differential Revision: [D71427234](https://our.internmc.facebook.com/intern/diff/D71427234/)
diff --git a/backends/xnnpack/partition/xnnpack_partitioner.py b/backends/xnnpack/partition/xnnpack_partitioner.py
@@ -115,7 +115,7 @@ def generate_per_op_partitions(self, ep: ExportedProgram) -> List[Partition]:
 class XnnpackDynamicallyQuantizedPartitioner(XnnpackPartitioner):
     def __init__(self):
         super().__init__(
-            config_precisions=ConfigPrecisionType.DYNAMIC_QUANT, per_op_mode=True
+            config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,
         )
 
 
diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py
@@ -191,6 +191,21 @@ def forward(self, x, y):
         return a + b
 
 
+class SharedDQChain(torch.nn.Module):
+    def __init__(self, input_size, output_size):
+        super().__init__()
+        self.linear1_weight = torch.nn.Parameter(torch.rand(output_size, input_size))
+        self.linear1_bias = torch.nn.Parameter(torch.rand(output_size))
+
+        self.linear2_weight = torch.nn.Parameter(torch.rand(output_size, input_size))
+        self.linear2_bias = torch.nn.Parameter(torch.rand(output_size))
+
+    def forward(self, x):
+        a = torch.nn.functional.linear(x, self.linear1_weight, self.linear1_bias)
+        b = torch.nn.functional.linear(x, self.linear2_weight, self.linear2_bias)
+        return a + b
+
+
 class TestLinear(unittest.TestCase):
     """
     Test Class for XNNPACK Linear Operators.
@@ -520,6 +535,23 @@ def get_qnode_checks(quant_node_checks, dialect):
                 #     qtol=bool(quant_config), atol=atol
                 # )
 
+    def test_qd8_f32_per_channel_shared_dq_chain(self):
+        # for use_bias in (False, True):
+        module = SharedDQChain(
+            input_size=13,
+            output_size=17,
+        )
+        inputs = (torch.randn(1, 2, 13),)
+
+        self._test_dqlinear(
+            module,
+            inputs,
+            dynamic_shapes=None,
+            is_per_channel=True,
+            linear_count=2,
+            uses_bias=False,
+        )
+
     def _test_qd8_per_channel_linear(self, dtype: torch.dtype = torch.float):
         for uses_bias in (False, True):
             module = BaseLinear(
diff --git a/backends/xnnpack/test/tester/TARGETS b/backends/xnnpack/test/tester/TARGETS
@@ -26,5 +26,6 @@ runtime.python_library(
         "//executorch/exir/backend:partitioner",
         "//executorch/exir/passes:spec_prop_pass",
         "//executorch/extension/pybindings:portable_lib",  # @manual
+        "//executorch/backends/transforms:duplicate_dynamic_quant_chain"
     ],
 )
diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py
@@ -26,6 +26,9 @@
     to_edge,
     to_edge_transform_and_lower,
 )
+from executorch.backends.transforms.duplicate_dynamic_quant_chain import (
+    DuplicateDynamicQuantChainPass,
+)
 from executorch.exir.backend.backend_api import validation_disabled
 from executorch.exir.backend.partitioner import Partitioner
 from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass
@@ -177,6 +180,8 @@ def run(
                 prepared(*inputs)
 
         converted = convert_pt2e(prepared)
+        DuplicateDynamicQuantChainPass()(converted)
+
         self.converted_graph = converted
 
     @property

Original file line number	Diff line number	Diff line change
`@@ -115,7 +115,7 @@ def generate_per_op_partitions(self, ep: ExportedProgram) -> List[Partition]:`
`115`	`115`	`class XnnpackDynamicallyQuantizedPartitioner(XnnpackPartitioner):`
`116`	`116`	`def __init__(self):`
`117`	`117`	`super().__init__(`
`118`		`- config_precisions=ConfigPrecisionType.DYNAMIC_QUANT, per_op_mode=True`
	`118`	`+ config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,`
`119`	`119`	`)`
`120`	`120`
`121`	`121`
Original file line number	Diff line number	Diff line change
`@@ -26,5 +26,6 @@ runtime.python_library(`
`26`	`26`	`"//executorch/exir/backend:partitioner",`
`27`	`27`	`"//executorch/exir/passes:spec_prop_pass",`
`28`	`28`	`"//executorch/extension/pybindings:portable_lib", # @manual`
	`29`	`+ "//executorch/backends/transforms:duplicate_dynamic_quant_chain"`
`29`	`30`	`],`
`30`	`31`	`)`