up

metascroy · metascroy · commit 0765b249d309 · 2025-04-28T11:52:16.000-07:00
diff --git a/exir/passes/constant_prop_pass.py b/exir/passes/constant_prop_pass.py
@@ -6,9 +6,9 @@
 
 # pyre-unsafe
 
+import logging
 from collections import OrderedDict
 from typing import cast, Mapping, Optional
-import logging
 
 import torch
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -30,6 +30,10 @@
 # Propagating aten.full can significantly increase compiled model size.
 _DEFAULT_SKIP_TARGETS = {exir_ops.edge.aten.full.default}
 
+# Skipping transpose/permute for now because https://github.com/pytorch/executorch/issues/10499
+_DEFAULT_SKIP_TARGETS.add(exir_ops.edge.transpose.int)
+_DEFAULT_SKIP_TARGETS.add(exir_ops.edge.permute.default)
+
 # Do not const prop quantization primitives
 _QDQ_OPS = [
     exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
@@ -42,7 +46,8 @@
     exir_ops.edge.quantized_decomposed.choose_qparams.tensor,
 ]
 try:
-    import torchao # noqa: F401
+    import torchao  # noqa: F401
+
     _QDQ_OPS.extend(
         [
             exir_ops.edge.torchao.dequantize_affine.default,
@@ -66,6 +71,7 @@
     torch.layout,
 )
 
+
 def is_const(
     arg,
     exported_program: ExportedProgram,
@@ -333,7 +339,9 @@ def constant_prop_pass(
         if node.target == torch.ops.higher_order.cond
     ]
     if len(has_control_flow) > 0:
-        logging.warning("constant_prop_pass does not constant propagate in control flow modules")
+        logging.warning(
+            "constant_prop_pass does not constant propagate in control flow modules"
+        )
 
     const_node_to_tensor = get_propagated_const_tensor_dict(
         exported_program, custom_skip_targets
diff --git a/exir/passes/quant_fusion_pass.py b/exir/passes/quant_fusion_pass.py
@@ -7,11 +7,11 @@
 import torch
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
+from executorch.exir.passes.constant_prop_pass import constant_prop_pass
+from torch.export import ExportedProgram
 from torch.fx import GraphModule, subgraph_rewriter
 from torch.fx.passes.infra.pass_base import PassResult
 from torch.utils import _pytree as pytree
-from executorch.exir.passes.constant_prop_pass import constant_prop_pass
-from torch.export import ExportedProgram
 
 from ._quant_patterns_and_replacements import get_quant_patterns_and_replacements
 
@@ -147,7 +147,7 @@ def quant_fusion_and_const_prop_pass(program: ExportedProgram) -> ExportedProgra
     gm = program.graph_module
     gm_res = QuantFusionPass(_fix_node_meta_val=True)(gm)
     gm = gm_res.graph_module
-    
+
     # Do const prop pass to remove packing/dtype conversion ops
     program = constant_prop_pass(program)
     return program
diff --git a/exir/program/_program.py b/exir/program/_program.py
@@ -1532,7 +1532,7 @@ def to_executorch(
                     raise Exception(
                         "Cannot run do_quant_fusion_and_const_prop on a graph with a backward signature intended for on-device training."
                         " Please set do_quant_fusion_and_const_prop to False in the ExecutorchBackendConfig."
-                     )
+                    )
                 program = quant_fusion_and_const_prop_pass(program)
             program = weights_to_outputs_pass(program)
             program = unsafe_remove_auto_functionalized_pass(program)
diff --git a/exir/tests/test_passes.py b/exir/tests/test_passes.py
@@ -1294,7 +1294,9 @@ def false_fn(self, val):
                 return self.linear(val) - self.f(val)
 
             def forward(self, pred, x):
-                out = torch.nn.functional.linear(x, self.w.to(torch.float16).to(torch.float32))
+                out = torch.nn.functional.linear(
+                    x, self.w.to(torch.float16).to(torch.float32)
+                )
                 return torch.ops.higher_order.cond(
                     pred, self.true_fn, self.false_fn, [out]
                 )
@@ -1308,7 +1310,9 @@ def forward(self, pred, x):
         )
         expected_out = edge.exported_program().module()(pred, x)
 
-        warn_log = "constant_prop_pass does not constant propagate in control flow modules"
+        warn_log = (
+            "constant_prop_pass does not constant propagate in control flow modules"
+        )
         with self.assertLogs(level="WARNING") as log:
             program = constant_prop_pass(edge.exported_program())
             self.assertIn(warn_log, log.output[0])
@@ -1317,8 +1321,10 @@ def forward(self, pred, x):
         self.assertTrue(torch.allclose(expected_out, out))
 
         # dtype casts in parent module are const propagated
-        FileCheck().check("executorch_exir_dialects_edge__ops_aten_mm_default(x, _prop_tensor_constant").run(program.graph_module.code)
-    
+        FileCheck().check(
+            "executorch_exir_dialects_edge__ops_aten_mm_default(x, _prop_tensor_constant"
+        ).run(program.graph_module.code)
+
     def test_constant_prop_pass_quant_primitives(self) -> None:
         class M(torch.nn.Module):
             def __init__(self):
@@ -1329,9 +1335,10 @@ def __init__(self):
 
             def forward(self, x):
                 w_dq = torch.ops.quantized_decomposed.dequantize_per_tensor.default(
-                    self.w_int, self.w_scale, self.w_zero_point, -127, 128, torch.int8)
+                    self.w_int, self.w_scale, self.w_zero_point, -127, 128, torch.int8
+                )
                 return torch.nn.functional.linear(x, w_dq)
-        
+
         mod = M()
         x = torch.randn([3])
         mod(x)
@@ -1340,7 +1347,9 @@ def forward(self, x):
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
         constant_prop_pass(edge.exported_program())
-        FileCheck().check("executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default").run(edge.exported_program().graph_module.code)
+        FileCheck().check(
+            "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default"
+        ).run(edge.exported_program().graph_module.code)
 
     def test_mutable_buffers(self) -> None:
         def count_copies(gm: torch.fx.GraphModule) -> int:
diff --git a/exir/tests/test_quant_fusion_pass.py b/exir/tests/test_quant_fusion_pass.py
@@ -6,13 +6,17 @@
 
 # pyre-strict
 
+import copy
 import unittest
 
 import torch
 from executorch import exir
 from executorch.exir import EdgeCompileConfig, to_edge
 from executorch.exir.passes.constant_prop_pass import constant_prop_pass
-from executorch.exir.passes.quant_fusion_pass import QuantFusionPass, quant_fusion_and_const_prop_pass
+from executorch.exir.passes.quant_fusion_pass import (
+    quant_fusion_and_const_prop_pass,
+    QuantFusionPass,
+)
 from executorch.exir.tests.common import register_additional_test_aten_ops
 from torch.ao.quantization import (  # @manual
     float_qparams_weight_only_qconfig,
@@ -33,7 +37,7 @@
 from torch.testing import FileCheck
 from torchao.quantization.granularity import PerAxis, PerGroup
 from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_
-import copy
+
 
 class TestQuantFusionPass(unittest.TestCase):
     @classmethod
@@ -438,9 +442,13 @@ def _test_embedding_torchao(
 
         # After pass, we see packing op and quantized embedding op, but no torchao dequantize op
         FileCheck().check_count(
-            "executorch_exir_dialects_edge__ops_quant_fusion__pack_embedding_weight_default", 1 if bit_width < 8 else 0, exactly=True
+            "executorch_exir_dialects_edge__ops_quant_fusion__pack_embedding_weight_default",
+            1 if bit_width < 8 else 0,
+            exactly=True,
         ).check_count(
-            f"executorch_exir_dialects_edge__ops_quantized_decomposed_embedding_{embedding_suffix}", 1, exactly=True,
+            f"executorch_exir_dialects_edge__ops_quantized_decomposed_embedding_{embedding_suffix}",
+            1,
+            exactly=True,
         ).check_not(
             "executorch_exir_dialects_edge__ops_torchao_dequantize_affine_default"
         ).run(
@@ -451,7 +459,9 @@ def _test_embedding_torchao(
 
         # After constant prop, we see quantized embedding op, but no packing op
         FileCheck().check_count(
-             f"executorch_exir_dialects_edge__ops_quantized_decomposed_embedding_{embedding_suffix}", 1, exactly=True,
+            f"executorch_exir_dialects_edge__ops_quantized_decomposed_embedding_{embedding_suffix}",
+            1,
+            exactly=True,
         ).check_not(
             "executorch_exir_dialects_edge__ops_quant_fusion__pack_embedding_weight_default",
         ).run(
@@ -463,13 +473,14 @@ def _test_embedding_torchao(
         self.assertTrue(torch.allclose(expected_outputs, actual_outputs))
 
         # Can lower to executorch
-        exec_prog = m.to_executorch() # noqa
-
+        exec_prog = m.to_executorch()  # noqa
 
         # Alternative flow 2 using quant_fusion_pass on exported program
         quant_fusion_and_const_prop_pass(m_copy.exported_program())
         FileCheck().check_count(
-             f"executorch_exir_dialects_edge__ops_quantized_decomposed_embedding_{embedding_suffix}", 1, exactly=True,
+            f"executorch_exir_dialects_edge__ops_quantized_decomposed_embedding_{embedding_suffix}",
+            1,
+            exactly=True,
         ).check_not(
             "executorch_exir_dialects_edge__ops_quant_fusion__pack_embedding_weight_default",
         ).run(
@@ -480,4 +491,4 @@ def _test_embedding_torchao(
         self.assertTrue(torch.allclose(expected_outputs, actual_outputs2))
 
         # Can lower to executorch
-        exec_prog2 = m_copy.to_executorch() # noqa
+        exec_prog2 = m_copy.to_executorch()  # noqa
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -20,6 +20,7 @@
 from executorch.backends.transforms.duplicate_dynamic_quant_chain import (
     DuplicateDynamicQuantChainPass,
 )
+from executorch.backends.xnnpack._passes.convert_to_linear import ConvertToLinearPass
 from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower
 from executorch.exir.backend.partitioner import Partitioner
 
@@ -508,9 +509,9 @@ def to_executorch(
             # If there are Linear operations left in the graph, let's execute
             # them with the optimized op_linear rather than materializing a
             # transpose followed by a regular op_mm.
-            # Disabling because ConvertToLinearPass is not a sound pass: 
+            # TODO: ConvertToLinearPass is not a sound pass and we should fix it
             # https://github.com/pytorch/executorch/issues/10499
-            # ConvertToLinearPass(),
+            ConvertToLinearPass(),
         ]
         if passes:
             # pyre-fixme[6]: In call `list.extend`, for 1st positional argument,