revert change

shewu-quic · shewu-quic · commit 5fde193fa730 · 2025-06-20T10:57:47.000+08:00
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -1189,6 +1189,7 @@ def _load_llama_model(llm_config: LlmConfig) -> "LLMEdgeManager":
         calibration_seq_length=llm_config.quantization.calibration_seq_length,
         calibration_data=llm_config.quantization.calibration_data,
         tokenizer_path=llm_config.base.tokenizer_path,
+        use_legacy_export=llm_config.backend.qnn.enabled,
         save_exported_program=llm_config.export.export_only,
         verbose=llm_config.debug.verbose,
         metadata=_load_llama_model_metadata(
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -10,9 +10,11 @@
 
 # pyre-unsafe
 
+import contextlib
 import logging
 from enum import Enum
 from typing import Any, Callable, Dict, List, Optional, Tuple
+from unittest.mock import patch
 
 import torch
 from executorch.backends.transforms.duplicate_dynamic_quant_chain import (
@@ -94,6 +96,7 @@ def __init__(
         verbose: bool = False,
         metadata: Optional[dict] = None,
         dynamic_shapes: Optional[Any] = None,
+        use_legacy_export: bool = False,
         save_exported_program: bool = False,
     ):
         # Store necessary constructor arguments.
@@ -114,6 +117,7 @@ def __init__(
         self.verbose = verbose
         self.metadata = metadata
         self.dynamic_shapes = dynamic_shapes
+        self.use_legacy_export = use_legacy_export
         self.save_exported_program = save_exported_program
 
         # Note: treat this as the source of truth for the result of
@@ -225,20 +229,39 @@ def _export(self, module: Optional[torch.nn.Module] = None) -> ExportedProgram:
         # 1. torch.nn.attention.sdpa_kernel([SDPBackend.MATH]) is for bypassing the dynamo error when tracing
         # 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)
         with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
-            if module:
-                logging.info("Re-exporting with:")
+            if self.use_legacy_export:
+                # TODO: for use cases such as qnn, which does not work with new, non-functional export IR.
+                # See issue: https://github.com/pytorch/executorch/issues/7373
+
+                with patch.object(
+                    torch._utils_internal,
+                    "export_training_ir_rollout_check",
+                    return_value=False,
+                ):
+                    # TODO: this is temporary and export_for_training doesn't work with qnn either. We need a
+                    # functional graph. See issue https://github.com/pytorch/executorch/pull/4627 for more details
+                    exported_module = torch.export.export(
+                        self.model if not module else module,
+                        self.example_inputs,
+                        self.example_kwarg_inputs,
+                        dynamic_shapes=dynamic_shape,
+                        strict=True,
+                    )
             else:
-                logging.info("Exporting with:")
-            logging.info(f"inputs: {self.example_inputs}")
-            logging.info(f"kwargs: {self.example_kwarg_inputs}")
-            logging.info(f"dynamic shapes: {dynamic_shape}")
-            exported_module = export_for_training(
-                self.model if not module else module,
-                self.example_inputs,
-                kwargs=self.example_kwarg_inputs,
-                dynamic_shapes=dynamic_shape,
-                strict=True,
-            )
+                if module:
+                    logging.info("Re-exporting with:")
+                else:
+                    logging.info("Exporting with:")
+                logging.info(f"inputs: {self.example_inputs}")
+                logging.info(f"kwargs: {self.example_kwarg_inputs}")
+                logging.info(f"dynamic shapes: {dynamic_shape}")
+                exported_module = export_for_training(
+                    self.model if not module else module,
+                    self.example_inputs,
+                    kwargs=self.example_kwarg_inputs,
+                    dynamic_shapes=dynamic_shape,
+                    strict=True,
+                )
         return exported_module
 
     def export(self) -> "LLMEdgeManager":
@@ -423,15 +446,24 @@ def export_to_edge(self) -> "LLMEdgeManager":
                 # Run export() if it didn't run
                 self.export()
 
-            self.edge_manager = export_to_edge(
-                self.pre_autograd_graph_module,  # pyre-fixme[6]
-                self.example_inputs,
-                example_kwarg_inputs=self.example_kwarg_inputs,
-                dynamic_shapes=dynamic_shape,
-                edge_constant_methods=self.metadata,
-                edge_compile_config=edge_config,
-                verbose=self.verbose,
-            )
+            override_export_behaviour = contextlib.nullcontext()
+            if self.use_legacy_export:
+                override_export_behaviour = patch.object(
+                    torch._utils_internal,
+                    "export_training_ir_rollout_check",
+                    return_value=False,
+                )
+
+            with override_export_behaviour:
+                self.edge_manager = export_to_edge(
+                    self.pre_autograd_graph_module,  # pyre-fixme[6]
+                    self.example_inputs,
+                    example_kwarg_inputs=self.example_kwarg_inputs,
+                    dynamic_shapes=dynamic_shape,
+                    edge_constant_methods=self.metadata,
+                    edge_compile_config=edge_config,
+                    verbose=self.verbose,
+                )
         return self
 
     def to_backend(self, partitioners: Optional[List[Partitioner]]) -> "LLMEdgeManager":
diff --git a/extension/llm/export/partitioner_lib.py b/extension/llm/export/partitioner_lib.py
@@ -216,5 +216,6 @@ def get_qnn_partitioner(
         ),
         skip_node_id_set={},
         skip_node_op_set=skip_node_op_set,
-        skip_mutable_buffer=False,
+        # TODO: if deprecated legacy export, skip_mutable_buffer can be set False
+        skip_mutable_buffer=True,
     )

Original file line number	Diff line number	Diff line change
`@@ -216,5 +216,6 @@ def get_qnn_partitioner(`
`216`	`216`	`),`
`217`	`217`	`skip_node_id_set={},`
`218`	`218`	`skip_node_op_set=skip_node_op_set,`
`219`		`- skip_mutable_buffer=False,`
	`219`	`+ # TODO: if deprecated legacy export, skip_mutable_buffer can be set False`
	`220`	`+ skip_mutable_buffer=True,`
`220`	`221`	`)`