Enable do_quant_fusion_and_const_prop by default

metascroy · facebook-github-bot · commit bdbd6992bb57 · 2025-05-01T15:31:11.000-07:00
Summary:
This diff enables const_prop + quant fusion during to_executorch by default (do_quant_fusion_and_const_prop: bool = True in ExecuTorchBackendConfig).

This requires updating various tests in CI.

Differential Revision: D73749914
diff --git a/exir/capture/_config.py b/exir/capture/_config.py
@@ -104,4 +104,4 @@ class ExecutorchBackendConfig:
     emit_mutable_buffer_names: bool = False
 
     # If set to true, we run quant fusion and constant propagation passes
-    do_quant_fusion_and_const_prop: bool = False
+    do_quant_fusion_and_const_prop: bool = True
diff --git a/exir/emit/test/test_emit.py b/exir/emit/test/test_emit.py
@@ -431,8 +431,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             .executorch_program
         )
         # The value for beta should appear before alpha
-        self.assertEqual(program.execution_plan[0].values[12].val, Int(3))
-        self.assertEqual(program.execution_plan[0].values[13].val, Int(2))
+        self.assertEqual(program.execution_plan[0].values[4].val, Int(3))
+        self.assertEqual(program.execution_plan[0].values[5].val, Int(2))
 
     def test_kwargs2(self) -> None:
         """Tests that the kwargs are placed in the order specified by
@@ -451,10 +451,10 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             to_edge(export(f, (x,), strict=True)).to_executorch().executorch_program
         )
         # The value for right should appear before side
-        self.assertEqual(program.execution_plan[0].values[6].val, Bool(False))
-        self.assertEqual(program.execution_plan[0].values[7].val, Bool(True))
-        self.assertEqual(program.execution_plan[0].values[8].val, String("right"))
-        self.assertEqual(program.execution_plan[0].values[9].val, Null())
+        self.assertEqual(program.execution_plan[0].values[3].val, Bool(False))
+        self.assertEqual(program.execution_plan[0].values[4].val, Bool(True))
+        self.assertEqual(program.execution_plan[0].values[5].val, String("right"))
+        self.assertEqual(program.execution_plan[0].values[6].val, Null())
 
     def _assertCallLength(self, program: Program, idx: int, expected_len: int) -> None:
         instr_args = program.execution_plan[0].chains[0].instructions[idx].instr_args
@@ -532,24 +532,24 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         # Check the mul operator's stack trace contains f -> g -> h
         self.assertTrue(
             "return torch.mul(x, torch.randn(3, 2))"
-            in program.execution_plan[0].chains[0].stacktrace[1].items[-1].context
+            in program.execution_plan[0].chains[0].stacktrace[0].items[-1].context
         )
         self.assertEqual(
-            program.execution_plan[0].chains[0].stacktrace[1].items[-1].name, "f"
+            program.execution_plan[0].chains[0].stacktrace[0].items[-1].name, "f"
         )
         self.assertEqual(
-            program.execution_plan[0].chains[0].stacktrace[1].items[-2].name, "g"
+            program.execution_plan[0].chains[0].stacktrace[0].items[-2].name, "g"
         )
         self.assertEqual(
-            program.execution_plan[0].chains[0].stacktrace[1].items[-3].name, "forward"
+            program.execution_plan[0].chains[0].stacktrace[0].items[-3].name, "forward"
         )
 
         # Check the sin operator's stack trace contains g -> h
         self.assertEqual(
-            program.execution_plan[0].chains[0].stacktrace[2].items[-1].name, "g"
+            program.execution_plan[0].chains[0].stacktrace[1].items[-1].name, "g"
         )
         self.assertEqual(
-            program.execution_plan[0].chains[0].stacktrace[2].items[-2].name, "forward"
+            program.execution_plan[0].chains[0].stacktrace[1].items[-2].name, "forward"
         )
 
     def test_stacktrace_off(self) -> None:
@@ -878,10 +878,13 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             .executorch_program.execution_plan[0]
             .non_const_buffer_sizes
         )
-
+        
+        config = ExecutorchBackendConfig(
+            do_quant_fusion_and_const_prop=False,
+        )
         edge_program_manager = to_edge(export(f, (torch.ones(3, 2),), strict=True))
         non_const_buffer_size_without_const_prop_pass = (
-            edge_program_manager.to_executorch()
+            edge_program_manager.to_executorch(config)
             .executorch_program.execution_plan[0]
             .non_const_buffer_sizes
         )
@@ -1510,7 +1513,12 @@ def forward(self, x):
         self.assertEqual(model.W1.untyped_storage().nbytes(), 8)
         self.assertEqual(model.W2.nbytes, 4)
         self.assertEqual(model.W2.untyped_storage().nbytes(), 8)
-        program = to_edge(export(model, (torch.ones(1),), strict=True)).to_executorch()
+
+        # Without this, the views get 
+        config = exir.ExecutorchBackendConfig(
+            do_quant_fusion_and_const_prop=False,
+        )
+        program = to_edge(export(model, (torch.ones(1),), strict=True)).to_executorch(config)
 
         program = program._emitter_output.program
         # each emitted weight is not a view
@@ -1531,7 +1539,10 @@ def forward(self, x):
         program = program._emitter_output.program
         # confirm that the buffer was emitted
         self.assertEqual(len(program.constant_buffer), 2)
-        self.assertEqual(len(program.constant_buffer[1].storage), 8)
+
+        # executorch_exir_dialects_edge__ops_dim_order_ops__to_dim_order_copy_default
+        # converts the buffer from i64 to fp32 (4 bytes), which gets const propagated
+        self.assertEqual(len(program.constant_buffer[1].storage), 4)
 
     def test_emit_lifted_tensor_constant(self) -> None:
         class LiftedTensorConstants(nn.Module):
@@ -1544,7 +1555,7 @@ def forward(self, x):
 
         model = LiftedTensorConstants()
         # Specify that we want to move non-lifted constants to external file
-        et_cfg = ExecutorchBackendConfig(external_constants=True)
+        et_cfg = ExecutorchBackendConfig(external_constants=True, do_quant_fusion_and_const_prop=False)
         program = to_edge(
             export(model, (torch.ones(3, 2),), strict=True)
         ).to_executorch(et_cfg)
@@ -1566,7 +1577,7 @@ def forward(self, x):
 
         model = LiftedConstants()
         # Specify that we want to move non-lifted constants to external file
-        et_cfg = ExecutorchBackendConfig(external_constants=True)
+        et_cfg = ExecutorchBackendConfig(external_constants=True, do_quant_fusion_and_const_prop=False)
         program = to_edge(
             export(model, (torch.ones(3, 2),), strict=True)
         ).to_executorch(et_cfg)
@@ -1658,7 +1669,10 @@ def forward(self, x):
         model = to_edge(export(InfinityMaskModel(), (torch.randn(2, 2),), strict=True))
 
         # Confirm that we can serialize the model with infinity in it.
-        model = model.to_executorch()
+        config = ExecutorchBackendConfig(
+            do_quant_fusion_and_const_prop=False,
+        )
+        model = model.to_executorch(config)
 
         # Assert that the infinity is stored as a string "-inf".
         values = model.executorch_program.execution_plan[0].values
@@ -1716,8 +1730,8 @@ def forward(self, x):
         external_map = emitter_output.external_constant_map[
             "_default_external_constant"
         ]
-        self.assertEqual(external_map["linear.weight"], 0)
-        self.assertEqual(external_map["linear.bias"], 1)
+        self.assertEqual(external_map["_prop_tensor_constant0"], 1)
+        self.assertEqual(external_map["linear.bias"], 0)
 
     def test_delegate_deduplicate(self) -> None:
         class SharedModule(torch.nn.Module):
@@ -1804,7 +1818,7 @@ def forward(self, input, label):
         ep = to_edge(ep)
         # Lower the graph to executorch.
         ep = ep.to_executorch(
-            config=ExecutorchBackendConfig(external_mutable_weights=True)
+            config=ExecutorchBackendConfig(external_mutable_weights=True, do_quant_fusion_and_const_prop=False)
         )
 
         emitter_output = ep._emitter_output
diff --git a/exir/tests/test_joint_graph.py b/exir/tests/test_joint_graph.py
@@ -10,7 +10,7 @@
 import torch
 import torch._dynamo
 
-from executorch.exir import to_edge
+from executorch.exir import to_edge, ExecutorchBackendConfig
 
 from executorch.extension.pybindings.portable_lib import (
     _load_for_executorch_from_buffer,
@@ -49,8 +49,11 @@ def forward(self, x, y):
                 break
 
         orig_outputs = len(output_node.args[0])
-
-        et = edge.to_executorch()
+     
+        config = ExecutorchBackendConfig(
+            do_quant_fusion_and_const_prop=False,
+        )
+        et = edge.to_executorch(config)
 
         weight_output_specs = [
             spec
diff --git a/exir/tests/test_memory_planning.py b/exir/tests/test_memory_planning.py
@@ -769,7 +769,8 @@ def forward(self, input, label):
         ep = export(net, inputs, strict=True)
         ep = _export_forward_backward(ep)
         ep = to_edge(ep)
-        ep = ep.to_executorch()
+        config = ExecutorchBackendConfig(do_quant_fusion_and_const_prop=False)
+        ep = ep.to_executorch(config)
 
         ep.dump_executorch_program(True)
 
diff --git a/exir/tests/test_passes.py b/exir/tests/test_passes.py
@@ -1085,7 +1085,16 @@ def forward(self) -> torch.Tensor:
         self.assertEqual(ep.graph_signature.input_specs[1].arg.name, "b_a")
 
         # Validate that the program successfully passes validation to executorch:
-        edge.to_executorch()
+
+        # The test fails when do_quant_fusion_and_const_prop=True, but it is not related to
+        # the pass, but rather that memory planning fails (AssertionError: graph_output_allocated not set)
+        # when a graph has no user inputs and no operations.  We can construct a failure case
+        # even with do_quant_fusion_and_const_prop = False by changing the forward method in NoUserInputs
+        # to just return self.a
+        config = exir.ExecutorchBackendConfig(
+            do_quant_fusion_and_const_prop=False,
+        )
+        edge.to_executorch(config)
 
     def test_constant_prop_pass_for_parameter(self) -> None:
         def count_additions(gm: torch.fx.GraphModule) -> int:
diff --git a/exir/tests/test_remove_view_copy.py b/exir/tests/test_remove_view_copy.py
@@ -102,6 +102,7 @@ def test_spec(self) -> None:
             config=ExecutorchBackendConfig(
                 remove_view_copy=True,
                 memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
+                do_quant_fusion_and_const_prop=False,
             ),
         )
 
diff --git a/extension/training/examples/XOR/export_model.py b/extension/training/examples/XOR/export_model.py
@@ -32,7 +32,8 @@ def _export_model(external_mutable_weights: bool = False):
     # Lower the graph to executorch.
     ep = ep.to_executorch(
         config=ExecutorchBackendConfig(
-            external_mutable_weights=external_mutable_weights
+            external_mutable_weights=external_mutable_weights,
+            do_quant_fusion_and_const_prop=False,
         )
     )
     return ep
diff --git a/extension/training/pybindings/test/test.py b/extension/training/pybindings/test/test.py
@@ -9,7 +9,7 @@
 import unittest
 
 import torch
-from executorch.exir import to_edge
+from executorch.exir import to_edge, ExecutorchBackendConfig
 
 from executorch.extension.training import (
     _load_for_executorch_for_training_from_buffer,
@@ -36,7 +36,8 @@ def test(self):
         ep = torch.export.export(m, m.get_inputs(), strict=True)
         ep = _export_forward_backward(ep)
         ep = to_edge(ep)
-        ep = ep.to_executorch()
+        config = ExecutorchBackendConfig(do_quant_fusion_and_const_prop=False)
+        ep = ep.to_executorch(config)
         buffer = ep.buffer
         tm = _load_for_executorch_for_training_from_buffer(buffer)
 

Original file line number	Diff line number	Diff line change
`@@ -102,6 +102,7 @@ def test_spec(self) -> None:`
`102`	`102`	`config=ExecutorchBackendConfig(`
`103`	`103`	`remove_view_copy=True,`
`104`	`104`	`memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),`
	`105`	`+ do_quant_fusion_and_const_prop=False,`
`105`	`106`	`),`
`106`	`107`	`)`
`107`	`108`
Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,8 @@ def _export_model(external_mutable_weights: bool = False):`
`32`	`32`	`# Lower the graph to executorch.`
`33`	`33`	`ep = ep.to_executorch(`
`34`	`34`	`config=ExecutorchBackendConfig(`
`35`		`- external_mutable_weights=external_mutable_weights`
	`35`	`+ external_mutable_weights=external_mutable_weights,`
	`36`	`+ do_quant_fusion_and_const_prop=False,`
`36`	`37`	`)`
`37`	`38`	`)`
`38`	`39`	`return ep`