Enable quant fusion and const propagation by default

metascroy · facebook-github-bot · commit cb7acb408636 · 2025-04-23T08:38:18.000-07:00
Summary: This diff enables quant fusion and constant propagation by default in ExecuTorch.  It occurs after all to_edge passes, but before memory planning.

Differential Revision: D73513516
diff --git a/exir/capture/_config.py b/exir/capture/_config.py
@@ -102,3 +102,6 @@ class ExecutorchBackendConfig:
     # serialized in the PTE file. Its value is ignored if mutable buffers are not
     # memory planned as the names must be serialized in that case.
     emit_mutable_buffer_names: bool = False
+
+    # If set to true, we run quant fusion and constant propagation passes
+    do_quant_fusion_and_const_prop: bool = True
diff --git a/exir/passes/TARGETS b/exir/passes/TARGETS
@@ -154,6 +154,8 @@ python_library(
         "//caffe2:torch",
         "//executorch/exir:pass_base",
         "//executorch/exir/dialects:lib",
+        "//pytorch/ao:torchao",
+        "//executorch/exir/passes:constant_prop_pass",
     ],
 )
 
diff --git a/exir/passes/quant_fusion_pass.py b/exir/passes/quant_fusion_pass.py
@@ -10,6 +10,8 @@
 from torch.fx import GraphModule, subgraph_rewriter
 from torch.fx.passes.infra.pass_base import PassResult
 from torch.utils import _pytree as pytree
+from executorch.exir.passes.constant_prop_pass import constant_prop_pass
+from torch.export import ExportedProgram
 
 from ._quant_patterns_and_replacements import get_quant_patterns_and_replacements
 
@@ -139,3 +141,35 @@ def call(self, graph_module: GraphModule) -> PassResult:
         graph_module.graph.lint()
         graph_module.graph.eliminate_dead_code()
         return PassResult(graph_module, True)
+
+
+import torchao # noqa: F401
+_QDQ_OPS = [
+    exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
+    exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
+    exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor,
+    exir_ops.edge.quantized_decomposed.convert_element_type.no_fuse,
+    exir_ops.edge.torchao.dequantize_affine,
+    exir_ops.edge.torchao.dequantize_affine.default,
+    exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
+    exir_ops.edge.quantized_decomposed.quantize_per_tensor.tensor,
+    exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
+    exir_ops.edge.torchao.quantize_affine.default,
+    exir_ops.edge.quantized_decomposed.choose_qparams.tensor,
+    exir_ops.edge.torchao.choose_qparams_affine.default,
+]
+
+def quant_fusion_and_const_prop_pass(program: ExportedProgram) -> ExportedProgram:
+    gm = program.graph_module
+    gm_res = QuantFusionPass(_fix_node_meta_val=True)(gm)
+    gm = gm_res.graph_module
+    program.validate()
+
+    # Assert no Q/DQ ops remain in graph after quant fusion pass
+    for node in gm.graph.nodes:
+        if node.target in _QDQ_OPS:
+            raise AssertionError(f"Q/DQ op {node.target} remains in graph after quant fusion pass")
+    
+    # Do const prop pass to remove packing ops
+    program = constant_prop_pass(program)
+    return program
diff --git a/exir/program/_program.py b/exir/program/_program.py
@@ -52,6 +52,7 @@
 from executorch.exir.passes.normalize_view_copy_base_pass import (
     NormalizeViewCopyBasePass,
 )
+from executorch.exir.passes.quant_fusion_pass import quant_fusion_and_const_prop_pass
 from executorch.exir.passes.remove_graph_asserts_pass import (
     RemoveGraphAssertsPass,
     RemoveNonCoreAtenOpGraphAssertsPass,
@@ -1526,9 +1527,12 @@ def to_executorch(
             after it has been transformed to the ExecuTorch backend.
         """
         config = config if config else ExecutorchBackendConfig()
-
         execution_programs: Dict[str, ExportedProgram] = {}
         for name, program in self._edge_programs.items():
+            # Do constant propagation.  This is needed for some quant fusion
+            # passes to work correctly
+            if config.do_quant_fusion_and_const_prop:
+                program = quant_fusion_and_const_prop_pass(program)
             program = weights_to_outputs_pass(program)
             program = unsafe_remove_auto_functionalized_pass(program)
             gm, new_signature = insert_write_back_for_buffers_pass(program)
diff --git a/exir/tests/test_quant_fusion_pass.py b/exir/tests/test_quant_fusion_pass.py
@@ -12,7 +12,7 @@
 from executorch import exir
 from executorch.exir import EdgeCompileConfig, to_edge
 from executorch.exir.passes.constant_prop_pass import constant_prop_pass
-from executorch.exir.passes.quant_fusion_pass import QuantFusionPass
+from executorch.exir.passes.quant_fusion_pass import QuantFusionPass, quant_fusion_and_const_prop_pass
 from executorch.exir.tests.common import register_additional_test_aten_ops
 from torch.ao.quantization import (  # @manual
     float_qparams_weight_only_qconfig,
@@ -33,7 +33,7 @@
 from torch.testing import FileCheck
 from torchao.quantization.granularity import PerAxis, PerGroup
 from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_
-
+import copy
 
 class TestQuantFusionPass(unittest.TestCase):
     @classmethod
@@ -419,6 +419,7 @@ def _test_embedding_torchao(
         m = to_edge(
             export(model, example_inputs, strict=True), compile_config=compile_config
         )
+        m_copy = copy.deepcopy(m)
 
         # Before pass, we see torchao dequantize and embedding ops
         FileCheck().check_count(
@@ -437,13 +438,9 @@ def _test_embedding_torchao(
 
         # After pass, we see packing op and quantized embedding op, but no torchao dequantize op
         FileCheck().check_count(
-            "executorch_exir_dialects_edge__ops_quant_fusion__pack_embedding_weight_default",
-            1 if bit_width < 8 else 0,
-            exactly=True,
+            "executorch_exir_dialects_edge__ops_quant_fusion__pack_embedding_weight_default", 1 if bit_width < 8 else 0, exactly=True
         ).check_count(
-            f"executorch_exir_dialects_edge__ops_quantized_decomposed_embedding_{embedding_suffix}",
-            1,
-            exactly=True,
+            f"executorch_exir_dialects_edge__ops_quantized_decomposed_embedding_{embedding_suffix}", 1, exactly=True,
         ).check_not(
             "executorch_exir_dialects_edge__ops_torchao_dequantize_affine_default"
         ).run(
@@ -454,9 +451,7 @@ def _test_embedding_torchao(
 
         # After constant prop, we see quantized embedding op, but no packing op
         FileCheck().check_count(
-            f"executorch_exir_dialects_edge__ops_quantized_decomposed_embedding_{embedding_suffix}",
-            1,
-            exactly=True,
+             f"executorch_exir_dialects_edge__ops_quantized_decomposed_embedding_{embedding_suffix}", 1, exactly=True,
         ).check_not(
             "executorch_exir_dialects_edge__ops_quant_fusion__pack_embedding_weight_default",
         ).run(
@@ -468,4 +463,21 @@ def _test_embedding_torchao(
         self.assertTrue(torch.allclose(expected_outputs, actual_outputs))
 
         # Can lower to executorch
-        exec_prog = m.to_executorch()  # noqa: F841
+        exec_prog = m.to_executorch() # noqa
+
+
+        # Alternative flow 2 using quant_fusion_pass on exported program
+        quant_fusion_and_const_prop_pass(m_copy.exported_program())
+        FileCheck().check_count(
+             f"executorch_exir_dialects_edge__ops_quantized_decomposed_embedding_{embedding_suffix}", 1, exactly=True,
+        ).check_not(
+            "executorch_exir_dialects_edge__ops_quant_fusion__pack_embedding_weight_default",
+        ).run(
+            m_copy.exported_program().graph_module.code
+        )
+
+        actual_outputs2 = m_copy.exported_program().module()(*example_inputs)
+        self.assertTrue(torch.allclose(expected_outputs, actual_outputs2))
+
+        # Can lower to executorch
+        exec_prog2 = m_copy.to_executorch() # noqa

Original file line number	Diff line number	Diff line change
`@@ -154,6 +154,8 @@ python_library(`
`154`	`154`	`"//caffe2:torch",`
`155`	`155`	`"//executorch/exir:pass_base",`
`156`	`156`	`"//executorch/exir/dialects:lib",`
	`157`	`+ "//pytorch/ao:torchao",`
	`158`	`+ "//executorch/exir/passes:constant_prop_pass",`
`157`	`159`	`],`
`158`	`160`	`)`
`159`	`161`