From 04b1dda2ff8a53779431fabe4b050194dd202284 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Tue, 8 Oct 2024 20:58:01 -0700
Subject: [PATCH] Unbreak test models llama CI (#6026)

Summary:

Did a bunch of debugging on OSS CI:https://github.com/pytorch/executorch/actions/runs/11241297226/job/31252590975

Was able to confirm although the problem happens in `ConvertToLinear` but the root cause is we are partitioning the graph differently between these two pytorch nightly: dev20240916 and dev20240917.

The exported graph looks the same but the partitioner was behaving differently and causes the `ConvertToLinear` pass to error out.

We can't really revert back to dev20240916 nightly because it breaks other CI jobs, see https://github.com/pytorch/executorch/pull/5987.

The current approach I'm taking avoids decomposing linear by using `to_edge_lower_and_transform` API. This avoids jumping into the rabbit hole of debugging the partitioning & tagging logic.

Reviewed By: digantdesai, Jack-Khuu, tugsbayasgalan

Differential Revision: D64074891
---
 examples/xnnpack/aot_compiler.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py
index 520aa82d7cf..f65f9b73a58 100644
--- a/examples/xnnpack/aot_compiler.py
+++ b/examples/xnnpack/aot_compiler.py
@@ -15,8 +15,12 @@
 import torch
 from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
 from executorch.devtools import generate_etrecord
-from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig
-from executorch.extension.export_util.utils import export_to_edge, save_pte_program
+from executorch.exir import (
+    EdgeCompileConfig,
+    ExecutorchBackendConfig,
+    to_edge_transform_and_lower,
+)
+from executorch.extension.export_util.utils import save_pte_program
 
 from ..models import MODEL_NAME_TO_MODEL
 from ..models.model_factory import EagerModelFactory
@@ -81,29 +85,27 @@
 
     model = model.eval()
     # pre-autograd export. eventually this will become torch.export
-    model = torch.export.export_for_training(model, example_inputs).module()
+    ep = torch.export.export_for_training(model, example_inputs)
+    model = ep.module()
 
     if args.quantize:
         logging.info("Quantizing Model...")
         # TODO(T165162973): This pass shall eventually be folded into quantizer
         model = quantize(model, example_inputs)
 
-    edge = export_to_edge(
-        model,
-        example_inputs,
-        edge_compile_config=EdgeCompileConfig(
+    edge = to_edge_transform_and_lower(
+        ep,
+        partitioner=[XnnpackPartitioner()],
+        compile_config=EdgeCompileConfig(
             _check_ir_validity=False if args.quantize else True,
             _skip_dim_order=True,  # TODO(T182187531): enable dim order in xnnpack
         ),
     )
-    logging.info(f"Exported graph:\n{edge.exported_program().graph}")
+    logging.info(f"Exported and lowered graph:\n{edge.exported_program().graph}")
 
     # this is needed for the ETRecord as lowering modifies the graph in-place
     edge_copy = copy.deepcopy(edge)
 
-    edge = edge.to_backend(XnnpackPartitioner())
-    logging.info(f"Lowered graph:\n{edge.exported_program().graph}")
-
     exec_prog = edge.to_executorch(
         config=ExecutorchBackendConfig(extract_delegate_segments=False)
     )