pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/check_c10_sync.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/check_c10_sync.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.lintrunner.toml‎
Lines changed: 16 additions & 16 deletions b/‎.lintrunner.toml‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 5 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/cadence/aot/compiler.py‎
Lines changed: 29 additions & 39 deletions b/‎backends/cadence/aot/compiler.py‎
Lines changed: 29 additions & 39 deletions
diff --git a/‎backends/cadence/aot/fuse_ops.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/aot/fuse_ops.py‎
Lines changed: 1 addition & 0 deletions
@@ -1 +1 @@
-9b498d3bb28b8e3411ce464dd2755c5b96d92c8f
+7cda4017ddda554752e89069ae205be5e8388f59
@@ -12,4 +12,4 @@ pushd pytorch
 git checkout "$pytorch_pin"
 popd
 "$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/c10 pytorch/c10
-"$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/torch/standalone pytorch/torch/standalone
+"$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/torch/headeronly pytorch/torch/headeronly
@@ -240,11 +240,11 @@ jobs:
 
         cxx_flags="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-context -DET_HAVE_PREAD=0"
         setup_script_args=""
-        if [[ ${{ matrix.os}} == "bare_metal" ]]; then 
+        if [[ ${{ matrix.os}} == "bare_metal" ]]; then
           toolchain_prefix=arm-none-eabi-
-          threshold="103268" # ~100KiB
+          threshold="104000" # should be ~103.7KB, set threshold to 104KB.
           toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
-        elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then 
+        elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
           setup_script_args="--target-toolchain zephyr"
           toolchain_prefix=arm-zephyr-eabi-
           threshold="133120" # should be ~125KB, set threshold to 130KB
 
@@ -10,7 +10,7 @@ exclude_patterns = [
     'exir/serde/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -19,7 +19,7 @@ command = [
     '@{{PATHSFILE}}'
 ]
 init_command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -41,7 +41,7 @@ exclude_patterns = [
     'exir/serde/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -50,7 +50,7 @@ command = [
     '@{{PATHSFILE}}'
 ]
 init_command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -84,7 +84,7 @@ exclude_patterns = [
     'runtime/core/portable_type/c10/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -95,7 +95,7 @@ command = [
     '@{{PATHSFILE}}'
 ]
 init_command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -117,7 +117,7 @@ exclude_patterns = [
     '**/third-party/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -127,7 +127,7 @@ command = [
     '@{{PATHSFILE}}',
 ]
 init_command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -151,7 +151,7 @@ exclude_patterns = [
     '**/third-party/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -192,7 +192,7 @@ exclude_patterns = [
     'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -234,7 +234,7 @@ exclude_patterns = [
     'util/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -287,7 +287,7 @@ exclude_patterns = [
     'util/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -337,7 +337,7 @@ exclude_patterns = [
     'backends/arm/test/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -349,7 +349,7 @@ command = [
     '@{{PATHSFILE}}'
 ]
 init_command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -368,7 +368,7 @@ exclude_patterns = [
     '.lintrunner.toml',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -397,7 +397,7 @@ exclude_patterns = [
 ]
 
 command = [
-  "python3",
+  "python",
   "-m",
   "lintrunner_adapters",
   "run",
 
@@ -490,7 +490,7 @@ install(
   INCLUDES
   DESTINATION ${_common_include_directories}
 )
-install(FILES tools/cmake/executorch-config.cmake
+install(FILES tools/cmake/Utils.cmake tools/cmake/executorch-config.cmake
         DESTINATION lib/cmake/ExecuTorch
 )
 
@@ -732,4 +732,8 @@ if(EXECUTORCH_BUILD_VULKAN)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/vulkan)
 endif()
 
+if(EXECUTORCH_BUILD_ANDROID_JNI)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/android)
+endif()
+
 include(Test.cmake)
@@ -8,7 +8,7 @@
 
 import logging
 from pathlib import Path
-from typing import Callable, cast, Optional
+from typing import Optional
 
 import executorch.backends.cadence.aot.ops_registrations  # noqa
 import torch
@@ -32,7 +32,6 @@
     ExecutorchBackendConfig,
     ExecutorchProgramManager,
 )
-from executorch.exir.pass_base import PassResult
 from executorch.exir.passes import ToOutVarPass
 from executorch.exir.passes.sym_shape_eval_pass import HintBasedSymShapeEvalPass
 from executorch.exir.program._program import to_edge_with_preserved_ops
@@ -41,7 +40,7 @@
 from torch.export.exported_program import ExportedProgram
 from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
-from .passes import get_cadence_passes
+from .passes import apply_exir_ops_passes, apply_torch_ops_passes
 
 from .utils import print_ops_info
 
@@ -210,6 +209,21 @@ def quantize_pt2(
     return program
 
 
+TO_EDGE_OP_EXCEPTION_LIST: list[torch._ops.OpOverload] = [
+    torch.ops.aten._linalg_det.default,
+    torch.ops.aten._linalg_svd.default,
+    torch.ops.aten._native_batch_norm_legit_functional.default,
+    torch.ops.aten.linear.default,
+    torch.ops.aten.linalg_vector_norm.default,
+    torch.ops.aten.unfold.default,
+    torch.ops.aten.angle.default,
+    torch.ops.aten.rms_norm.default,
+]
+TO_EDGE_PRESERVE_OPS: tuple[torch._ops.OpOverload, ...] = (
+    torch.ops.aten.rms_norm.default,
+)
+
+
 def _lower_ep_to_edge(
     expo_program: ExportedProgram,
     dump_graphs: bool = False,
@@ -226,20 +240,11 @@ def _lower_ep_to_edge(
         compile_config=EdgeCompileConfig(
             _skip_dim_order=True,
             # Allow specific non-core aten ops in the IR.
-            _core_aten_ops_exception_list=[
-                torch.ops.aten._linalg_det.default,
-                torch.ops.aten._linalg_svd.default,
-                torch.ops.aten._native_batch_norm_legit_functional.default,
-                torch.ops.aten.linear.default,
-                torch.ops.aten.linalg_vector_norm.default,
-                torch.ops.aten.unfold.default,
-                torch.ops.aten.angle.default,
-                torch.ops.aten.rms_norm.default,
-            ]
+            _core_aten_ops_exception_list=TO_EDGE_OP_EXCEPTION_LIST
             + (core_aten_exceptions or []),
         ),
         constant_methods=constant_methods,
-        preserve_ops=(torch.ops.aten.rms_norm.default,),
+        preserve_ops=TO_EDGE_PRESERVE_OPS,
     )
 
     if dump_graphs:
@@ -256,14 +261,20 @@ def export_to_edge(
     inputs: tuple[object, ...],
     dump_graphs: bool = False,
     constant_methods: Optional[dict[str, object]] = None,
+    core_aten_exceptions: Optional[list[torch._ops.OpOverload]] = None,
 ) -> EdgeProgramManager:
     assert isinstance(model, torch.nn.Module), "model should be an nn.Module"
 
     # Export the model into an ExportedProgram.
     expo_program = trace(model, inputs)
 
+    # Apply passes which transform the ExportedProgram before it gets lowered to edge.
+    expo_program = apply_torch_ops_passes(expo_program)
+
     # Lower the model to edge IR.
-    edge_prog_manager = _lower_ep_to_edge(expo_program, dump_graphs, constant_methods)
+    edge_prog_manager = _lower_ep_to_edge(
+        expo_program, dump_graphs, constant_methods, core_aten_exceptions
+    )
 
     return edge_prog_manager
 
@@ -305,14 +316,7 @@ def _lower_ep_to_cadence(
     Lower an existing ExportedProgram to edge IR and apply frontend optimization passes.
     """
     edge_prog_manager = _lower_ep_to_edge(program, dump_graphs=dump_graphs)
-    cadence_passes = get_cadence_passes(opt_level)
-
-    # Run a couple required passes for quant/dequant ops
-    cadence_prog_manager = edge_prog_manager.transform(
-        cast(
-            list[Callable[[torch.fx.GraphModule], Optional[PassResult]]], cadence_passes
-        )
-    )
+    cadence_prog_manager = apply_exir_ops_passes(opt_level, edge_prog_manager)
     return cadence_prog_manager
 
 
@@ -323,14 +327,7 @@ def export_to_cadence(
     opt_level: int = 1,
 ) -> EdgeProgramManager:
     edge_prog_manager = export_to_edge(model, inputs, dump_graphs=dump_graphs)
-    cadence_passes = get_cadence_passes(opt_level)
-
-    # Run a couple required passes for quant/dequant ops
-    cadence_prog_manager = edge_prog_manager.transform(
-        cast(
-            list[Callable[[torch.fx.GraphModule], Optional[PassResult]]], cadence_passes
-        )
-    )
+    cadence_prog_manager = apply_exir_ops_passes(opt_level, edge_prog_manager)
     return cadence_prog_manager
 
 
@@ -367,15 +364,8 @@ def export_to_executorch_gen_etrecord(
     memory_config: Optional[MemoryConfig] = None,
     dump_graphs: bool = False,
 ) -> ExecutorchProgramManager:
-    cadence_passes = get_cadence_passes(opt_level)
     edge_prog_manager = export_to_edge(model, inputs, dump_graphs)
-
-    # Run a couple required passes for quant/dequant ops
-    cadence_prog_manager = edge_prog_manager.transform(
-        cast(
-            list[Callable[[torch.fx.GraphModule], Optional[PassResult]]], cadence_passes
-        )
-    )
+    cadence_prog_manager = apply_exir_ops_passes(opt_level, edge_prog_manager)
 
     # Print some information to terminal
     print_ops_info(
 
@@ -1127,6 +1127,7 @@ class CadenceFuseOpsInGraph:
         FuseCascadedTransposeOrPermuteOps,
         FuseCascadedViewOps,
         FuseQuantDequantToRequantizePass,
+        FuseMulTensorIntoQuantPass,
         FuseMulTensorIntoDequantPass,
         FuseMulScalarIntoDequantPass,
         FuseFullThenReshapePass,
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-9b498d3bb28b8e3411ce464dd2755c5b96d92c8f`
	`1`	`+7cda4017ddda554752e89069ae205be5e8388f59`