pytorch
diff --git a/‎.github/workflows/build-presets.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build-presets.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 24 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎CMakePresets.json‎
Lines changed: 20 additions & 0 deletions b/‎CMakePresets.json‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎backends/cadence/aot/compiler.py‎
Lines changed: 27 additions & 4 deletions b/‎backends/cadence/aot/compiler.py‎
Lines changed: 27 additions & 4 deletions
diff --git a/‎backends/cadence/aot/export_example.py‎
Lines changed: 1 addition & 5 deletions b/‎backends/cadence/aot/export_example.py‎
Lines changed: 1 addition & 5 deletions
@@ -20,7 +20,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        preset: [macos, ios, ios-simulator, pybind, llm]
+        preset: [macos, ios, ios-simulator, pybind, profiling, llm]
     with:
       job-name: build
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
 
@@ -278,6 +278,30 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
   )
 endif()
 
+if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
+  set(TORCHAO_BUILD_ATEN_OPS OFF)
+  set(TORCHAO_BUILD_EXECUTORCH_OPS ON)
+  set(TORCHAO_BUILD_CPU_AARCH64 ON)
+  set(TORCHAO_ENABLE_ARM_NEON_DOT ON)
+
+  list(APPEND TORCHAO_INCLUDE_DIRS
+    ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
+    ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
+    ${EXECUTORCH_ROOT}/third-party/ao
+  )
+
+  set(EXECUTORCH_INCLUDE_DIRS ${TORCHAO_INCLUDE_DIRS})
+
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental)
+  executorch_target_link_options_shared_lib(torchao_ops_executorch)
+  list(APPEND _executorch_kernels torchao_ops_executorch)
+endif()
+
+if(EXECUTORCH_BUILD_TESTS)
+  set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
+  include(CTest)
+endif()
+
 # TODO(dbort): Fix these warnings and remove this flag.
 set(_common_compile_options -Wno-deprecated-declarations -fPIC)
 
 
@@ -100,6 +100,26 @@
             "list": ["Darwin", "Linux", "Windows"]
         }
     },
+    {
+        "name": "profiling",
+        "displayName": "Build ExecuTorch with Profiling Enabled",
+        "inherits": [
+            "common"
+        ],
+        "cacheVariables": {
+            "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/profiling.cmake",
+            "CMAKE_OSX_DEPLOYMENT_TARGET": "12.0"
+        },
+        "condition": {
+            "type": "inList",
+            "string": "${hostSystemName}",
+            "list": [
+                "Darwin",
+                "Linux",
+                "Windows"
+            ]
+        }
+    },
     {
         "name": "zephyr",
         "displayName": "Build ExecuTorch for Zephyr RTOS",
 
@@ -54,7 +54,7 @@
 # if the quantizer here is different from the quantizer used to convert. It is
 # however useful for unit tests to separate the converted model from the fused
 # model, to be able to get reference numerics.
-# If this does not apply, please use quantize_and_fuse_pt2 instead.
+# If this does not apply, please use quantize_pt2 instead.
 def trace(
     model: torch.nn.Module,
     inputs: tuple[object, ...],
@@ -85,6 +85,29 @@ def trace(
 
 
 def prepare_pt2(
+    model: torch.nn.Module,
+    inputs: tuple[object, ...],
+    quantizer: CadenceQuantizer,
+    dump_graphs: bool = False,
+) -> torch.fx.GraphModule:
+    """
+    Trace and Prepare a model using the given quantizer.
+    The quantizer must be supplied and be the same as the one used to
+    fuse the model later, if applicable. If you do not expect that behavior,
+    please use quantize_pt2 instead, which will instantiate a
+    default quantizer for you if needed.
+    Returns a GraphModule with the prepared model.
+    """
+
+    traced_program = trace(model, inputs, dump_graphs=dump_graphs)
+    prepared_program = prepare_traced_pt2(
+        traced_program, quantizer, dump_graphs=dump_graphs
+    )
+
+    return prepared_program
+
+
+def prepare_traced_pt2(
     program: ExportedProgram,
     quantizer: CadenceQuantizer,
     dump_graphs: bool = False,
@@ -93,7 +116,7 @@ def prepare_pt2(
     Prepare a model using the given quantizer.
     The quantizer must be supplied and be the same as the one used to
     fuse the model later, if applicable. If you do not expect that behavior,
-    please use quantize_and_fuse_pt2 instead, which will instantiate a
+    please use quantize_pt2 instead, which will instantiate a
     default quantizer for you if needed.
     Returns a GraphModule with the prepared model.
     """
@@ -137,7 +160,7 @@ def fuse_pt2(
     """
     Fuse a converted graph module using the given quantizer.
     The quantizer must be the same as the one used to convert the model.
-    If you do not expect that behavior, please use quantize_and_fuse_pt2 instead,
+    If you do not expect that behavior, please use quantize_pt2 instead,
     which will instantiate a default quantizer for you if needed.
     Returns a GraphModule with the fused model.
     """
@@ -179,7 +202,7 @@ def quantize_pt2(
         logging.info(program.graph.print_tabular())
 
     # Get prepared graph module
-    prepared_gm = prepare_pt2(program, quantizer, dump_graphs=dump_graphs)
+    prepared_gm = prepare_pt2(model, inputs, quantizer, dump_graphs=dump_graphs)
 
     # Calibrate
     # If no calibration data is provided, use the inputs
 
@@ -19,7 +19,6 @@
     export_to_executorch_gen_etrecord,
     fuse_pt2,
     prepare_pt2,
-    trace,
 )
 
 from executorch.backends.cadence.aot.quantizer.quantizer import CadenceDefaultQuantizer
@@ -50,11 +49,8 @@ def export_model(
     # Instantiate the quantizer
     quantizer = CadenceDefaultQuantizer()
 
-    # Trace the model
-    ep = trace(model, example_inputs)
-
     # Prepare the model
-    prepared_gm = prepare_pt2(ep, quantizer)
+    prepared_gm = prepare_pt2(model, example_inputs, quantizer)
 
     # Calibrate the model
     for samples in [example_inputs]: