Add quantize_and_export_to_edge and quantize_and_export_to_executorch

mcremon-meta · facebook-github-bot · commit c7034179e01f · 2025-04-22T17:45:20.000-07:00
Summary: Adding those APIs allows most users to use a single line for all of the compilation flow (vs quantizing first).

Reviewed By: zonglinpeng

Differential Revision: D73397438
diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
@@ -47,6 +47,8 @@
 from .utils import print_ops_info
 
 
+default_quantizer = CadenceDefaultQuantizer()
+
 # Note: this is not meant as a primary API since it can create inconsistencies
 # if the quantizer here is different from the quantizer used to convert. It is
 # however useful for unit tests to separate the converted model from the fused
@@ -145,7 +147,7 @@ def fuse_pt2(
 def quantize_pt2(
     model: torch.nn.Module,
     inputs: tuple[object, ...],
-    quantizer: Optional[CadenceQuantizer] = None,
+    quantizer: CadenceQuantizer = default_quantizer,
     calibration_data: Optional[list[tuple[object, ...]]] = None,
     dump_graphs: bool = False,
 ) -> torch.fx.GraphModule:
@@ -159,10 +161,6 @@ def quantize_pt2(
     # Make the model inference mode by calling model.eval()
     model.eval()
 
-    # Instantiate the quantizer to CadenceQuantizer if not supplied
-    if not quantizer:
-        quantizer = CadenceDefaultQuantizer()
-
     # Get converted graph module
     converted_gm = convert_pt2(
         model, inputs, quantizer, calibration_data, dump_graphs=dump_graphs
@@ -250,6 +248,28 @@ def export_to_edge(
     return edge_prog_manager
 
 
+def quantize_and_export_to_edge(
+    model: torch.nn.Module,
+    inputs: tuple[object, ...],
+    quantizer: CadenceQuantizer = default_quantizer,
+    dump_graphs: bool = False,
+    constant_methods: Optional[dict[str, object]] = None,
+) -> EdgeProgramManager:
+    quantized_model = quantize_pt2(
+        model,
+        inputs,
+        quantizer=quantizer,
+        dump_graphs=dump_graphs,
+    )
+
+    return export_to_edge(
+        quantized_model,
+        inputs,
+        dump_graphs=dump_graphs,
+        constant_methods=constant_methods,
+    )
+
+
 def export_to_cadence(
     model: torch.nn.Module,
     inputs: tuple[object, ...],
diff --git a/backends/cadence/aot/tests/test_fusion_ops_passes.py b/backends/cadence/aot/tests/test_fusion_ops_passes.py
@@ -12,7 +12,10 @@
 import executorch.backends.cadence.aot.ops_registrations  # noqa
 import torch
 from executorch.backends.cadence.aot import compiler
-from executorch.backends.cadence.aot.compiler import export_to_edge, quantize_pt2
+from executorch.backends.cadence.aot.compiler import (
+    export_to_edge,
+    quantize_and_export_to_edge,
+)
 from executorch.backends.cadence.aot.fuse_ops import (
     FuseFullThenReshapePass,
     FuseMulIntoDequantPass,
@@ -394,9 +397,8 @@ def forward(self, x):
 
         inputs = torch.randn(2, 12, 1, 6)
         model = M()
-        quantized_model = quantize_pt2(model, (inputs,))
         graph_module = (
-            export_to_edge(quantized_model, (inputs,)).exported_program().graph_module
+            quantize_and_export_to_edge(model, (inputs,)).exported_program().graph_module
         )
         graph_module = FuseQuantDequantToRequantizePass()(graph_module).graph_module
         self.check_op_counts(
diff --git a/backends/cadence/aot/tests/test_replace_ops_passes.py b/backends/cadence/aot/tests/test_replace_ops_passes.py
@@ -13,7 +13,10 @@
 import torch
 import torch.nn.functional as F
 from executorch.backends.cadence.aot import compiler
-from executorch.backends.cadence.aot.compiler import export_to_edge, quantize_pt2
+from executorch.backends.cadence.aot.compiler import (
+    export_to_edge,
+    quantize_and_export_to_edge,
+)
 from executorch.backends.cadence.aot.graph_builder import (
     GraphBuilder,
     single_op_builder,
@@ -850,9 +853,8 @@ def test_replace_single_element_tensor_arguments_from_full_op_with_scalar(
 
         inputs = (x,)
         model = torch.nn.Linear(in_features=in_features, out_features=out_features)
-        quantized_model = quantize_pt2(model, inputs)
 
-        exported_program = export_to_edge(quantized_model, inputs).exported_program()
+        exported_program = quantize_and_export_to_edge(model, inputs).exported_program()
 
         # By default, the quantized linear op should have constant scalar attributes.
         self.assertTargetCountsEqual(
@@ -897,9 +899,8 @@ def test_replace_single_element_tensor_arguments_from_full_op_with_scalar_tuple_
 
         inputs = (x,)
         model = torch.nn.Linear(in_features=in_features, out_features=out_features)
-        quantized_model = quantize_pt2(model, inputs)
 
-        exported_program = export_to_edge(quantized_model, inputs).exported_program()
+        exported_program = quantize_and_export_to_edge(model, inputs).exported_program()
 
         # By default, the quantized linear op should have constant scalar attributes.
         self.assertTargetCountsEqual(