pytorch
diff --git a/‎.ci/scripts/build_llama_android.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/build_llama_android.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 27 additions & 26 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 27 additions & 26 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 8 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎backends/apple/coreml/partition/coreml_partitioner.py‎
Lines changed: 14 additions & 8 deletions b/‎backends/apple/coreml/partition/coreml_partitioner.py‎
Lines changed: 14 additions & 8 deletions
diff --git a/‎backends/apple/coreml/test/test_coreml_partitioner.py‎
Lines changed: 7 additions & 2 deletions b/‎backends/apple/coreml/test/test_coreml_partitioner.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎backends/apple/coreml/test/test_torch_ops.py‎
Lines changed: 12 additions & 4 deletions b/‎backends/apple/coreml/test/test_torch_ops.py‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 8 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_masked_fill.py‎
Lines changed: 52 additions & 0 deletions b/‎backends/arm/_passes/decompose_masked_fill.py‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 1 addition & 0 deletions
@@ -19,7 +19,7 @@ install_executorch_and_backend_lib() {
   echo "Installing executorch and xnnpack backend"
   clean_executorch_install_folders
   mkdir cmake-android-out
-  ANDROID_NDK=/opt/ndk
+  ANDROID_NDK=${ANDROID_NDK:-/opt/ndk}
   BUCK2=buck2
   ANDROID_ABI=arm64-v8a
   cmake --preset llm \
 
@@ -632,32 +632,33 @@ jobs:
         # run eval_llama wikitext task
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_wikitext.sh
 
-  test-eval_llama-mmlu-linux:
-    name: test-eval_llama-mmlu-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.24xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
-
-        # install llama requirements
-        bash examples/models/llama/install_requirements.sh
-
-        # run eval_llama mmlu task
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_mmlu.sh
+  # TODO(larryliu0820): Fix this issue before reenabling it: https://gist.github.com/larryliu0820/7377ecd0d79dbc06076cec8d9f2b85d2
+  # test-eval_llama-mmlu-linux:
+  #   name: test-eval_llama-mmlu-linux
+  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  #   permissions:
+  #     id-token: write
+  #     contents: read
+  #   strategy:
+  #     fail-fast: false
+  #   with:
+  #     runner: linux.24xlarge
+  #     docker-image: ci-image:executorch-ubuntu-22.04-clang12
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 90
+  #     script: |
+  #       # The generic Linux job chooses to use base env, not the one setup by the image
+  #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+  #       conda activate "${CONDA_ENV}"
+
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+  #       # install llama requirements
+  #       bash examples/models/llama/install_requirements.sh
+
+  #       # run eval_llama mmlu task
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_mmlu.sh
 
   test-llama_runner_eager-linux:
     name: test-llama_runner_eager-linux
 
@@ -161,6 +161,10 @@ endif()
 
 if(EXECUTORCH_BUILD_TESTS)
   include(CTest)
+else()
+  # It looks like some of our third-party deps will try to turn this on if it's
+  # not explicitly set, leading to confusing behavior.
+  set(BUILD_TESTING OFF)
 endif()
 
 add_subdirectory(third-party)
@@ -737,7 +741,10 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
     endif()
 
     set(CMAKE_EXECUTABLE_SUFFIX ".html")
-    target_link_options(executor_runner PUBLIC -sALLOW_MEMORY_GROWTH --embed-file "${WASM_MODEL_DIR}@/")
+    target_link_options(
+      executor_runner PUBLIC -sALLOW_MEMORY_GROWTH --embed-file
+      "${WASM_MODEL_DIR}@/"
+    )
   endif()
 endif()
 
 
@@ -23,25 +23,27 @@
 from torch.fx.passes.operator_support import OperatorSupportBase
 
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.WARNING)
+logger.setLevel(logging.INFO)
 
 
-class OperatorsSupportedForCoreMLBackend(OperatorSupportBase):
+class _OperatorsSupportedForCoreMLBackend(OperatorSupportBase):
     def __init__(
         self,
         skip_ops_for_coreml_delegation: Optional[List[str]] = None,
         lower_full_graph: bool = False,
+        log: bool = False,
     ) -> None:
         if skip_ops_for_coreml_delegation is None:
             skip_ops_for_coreml_delegation = []
         super().__init__()
         self.skip_ops_for_coreml_delegation = skip_ops_for_coreml_delegation
         self.lower_full_graph = lower_full_graph
         self._logged_msgs = set()
+        self._log = log
 
     def log_once(self, msg: str) -> None:
-        if msg not in self._logged_msgs:
-            logging.info(msg)
+        if self._log and msg not in self._logged_msgs:
+            logger.info(msg)
             self._logged_msgs.add(msg)
 
     def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
@@ -154,8 +156,10 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
 
         capability_partitioner = CapabilityBasedPartitioner(
             exported_program.graph_module,
-            OperatorsSupportedForCoreMLBackend(
-                self.skip_ops_for_coreml_delegation, self.lower_full_graph
+            _OperatorsSupportedForCoreMLBackend(
+                self.skip_ops_for_coreml_delegation,
+                self.lower_full_graph,
+                log=True,
             ),
             allows_single_node_partition=True,
         )
@@ -191,8 +195,10 @@ def ops_to_not_decompose(
         self, ep: ExportedProgram
     ) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
         do_not_decompose = []
-        op_support = OperatorsSupportedForCoreMLBackend(
-            self.skip_ops_for_coreml_delegation, self.lower_full_graph
+        op_support = _OperatorsSupportedForCoreMLBackend(
+            self.skip_ops_for_coreml_delegation,
+            self.lower_full_graph,
+            log=False,
         )
 
         # CoreML prevents certain ops (like triu) from lowering to CoreML when put in the ExecuTorch op namespace
 
@@ -16,7 +16,6 @@
 from executorch.backends.apple.coreml.compiler import CoreMLBackend
 from executorch.backends.apple.coreml.partition import CoreMLPartitioner
 from executorch.exir.backend.utils import format_delegated_graph
-from executorch.runtime import Runtime
 
 
 @torch.library.custom_op("unsupported::linear", mutates_args=())
@@ -37,7 +36,13 @@ def _(
     return torch.ops.aten.linear.default(x, w, b)
 
 
-_TEST_RUNTIME = sys.platform == "darwin"
+def is_fbcode():
+    return not hasattr(torch.version, "git_version")
+
+
+_TEST_RUNTIME = (sys.platform == "darwin") and not is_fbcode()
+if _TEST_RUNTIME:
+    from executorch.runtime import Runtime
 
 
 class TestCoreMLPartitioner(unittest.TestCase):
 
@@ -14,12 +14,20 @@
 
 from executorch.backends.apple.coreml.compiler import CoreMLBackend
 from executorch.backends.apple.coreml.partition import CoreMLPartitioner
-from executorch.runtime import Runtime
 from torchao.quantization import IntxWeightOnlyConfig, PerAxis, PerGroup, quantize_
 
-_TEST_RUNTIME = sys.platform == "darwin" and tuple(
-    map(int, platform.mac_ver()[0].split("."))
-) >= (15, 0)
+
+def is_fbcode():
+    return not hasattr(torch.version, "git_version")
+
+
+_TEST_RUNTIME = (
+    (sys.platform == "darwin")
+    and not is_fbcode()
+    and tuple(map(int, platform.mac_ver()[0].split("."))) >= (15, 0)
+)
+if _TEST_RUNTIME:
+    from executorch.runtime import Runtime
 
 
 class TestTorchOps(unittest.TestCase):
 
@@ -40,6 +40,7 @@
 from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass  # noqa
 from .decompose_linalg_vector_norm_pass import DecomposeLinearVectorNormPass  # noqa
 from .decompose_linear_pass import DecomposeLinearPass  # noqa
+from .decompose_masked_fill import DecomposeMaskedFill  # noqa
 from .decompose_maxpool2d_with_dilation import DecomposeMaxPool2DPass  # noqa
 from .decompose_meandim_pass import DecomposeMeanDimPass  # noqa
 from .decompose_ne_pass import DecomposeNotEqualPass  # noqa
 
@@ -45,6 +45,7 @@
     DecomposeLeakyReLUPass,
     DecomposeLinearPass,
     DecomposeLinearVectorNormPass,
+    DecomposeMaskedFill,
     DecomposeMaxPool2DPass,
     DecomposeMeanDimPass,
     DecomposeNotEqualPass,
@@ -113,6 +114,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(
             DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
         )
+
         self.add_pass(ConvertFullLikeToFullPass())
         self.add_pass(ConvertToClampPass())
         self.add_pass(ConvertMinMaxPass())
@@ -146,6 +148,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(DecomposeMaxPool2DPass())
         self.add_pass(SizeAdjustInputPass())
         self.add_pass(DecomposeSelectPass())
+
         self.add_pass(ConvertSqueezesToViewPass())
 
         self.add_pass(FuseViewCopyTransform())
@@ -160,6 +163,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         return self._transform(exported_program.graph_module)
 
     def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
+        self.add_pass(DecomposeMaskedFill())
         self.add_pass(DecomposeRoundPass())
         self.add_pass(DecomposeAcoshPass())
         self.add_pass(DecomposeAsinPass())
@@ -285,4 +289,8 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(ReplaceInfValues())
         self.add_pass(DecomposeSumPass())
 
+        if not self.tosa_spec.is_U55_subset:
+            # Uses where which is not supported on Ethos-U55
+            self.add_pass(DecomposeMaskedFill())
+
         return self._transform(graph_module)
@@ -0,0 +1,52 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+
+import torch
+
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+
+edge_ops = (exir_ops.edge.aten.masked_fill.Scalar,)
+aten_ops = (torch.ops.aten.masked_fill.Scalar,)
+
+
+def _get_decomposition(op) -> tuple:
+    if op in edge_ops:
+        return (
+            exir_ops.edge.aten.where.self,
+            exir_ops.edge.aten.full_like.default,
+        )
+    if op in aten_ops:
+        return (
+            torch.ops.aten.where.self,
+            torch.ops.aten.full_like.default,
+        )
+    raise RuntimeError(f"Unable to get decomposition for op {op}")
+
+
+class DecomposeMaskedFill(ArmPass):
+    """
+    Masked fill takes in a boolean mask, a tensor and a scalar value.
+    Fills the tensor with the scalar value according to the boolean mask.
+    Decomposed to a where and a full_like operator.
+    """
+
+    def call_operator(self, op, args, kwargs, meta, updated=False):
+        if op not in (edge_ops + aten_ops):
+            return super().call_operator(op, args, kwargs, meta, updated)
+
+        x, mask, scalar = args
+
+        where_op, full_like_op = _get_decomposition(op)
+
+        scalar_tensor = super().call_operator(full_like_op, (x, scalar), {}, meta, True)
+
+        return super().call_operator(
+            where_op, (mask, scalar_tensor, x), kwargs, meta, True
+        )
@@ -254,6 +254,7 @@ def is_node_supported(
             exir_ops.edge.aten.asin.default,
             exir_ops.edge.aten.atanh.default,
             exir_ops.edge.aten.addmm.default,
+            exir_ops.edge.aten.masked_fill.Scalar,
         ]
 
         return supported
Original file line number	Diff line number	Diff line change
`@@ -254,6 +254,7 @@ def is_node_supported(`
`254`	`254`	`exir_ops.edge.aten.asin.default,`
`255`	`255`	`exir_ops.edge.aten.atanh.default,`
`256`	`256`	`exir_ops.edge.aten.addmm.default,`
	`257`	`+ exir_ops.edge.aten.masked_fill.Scalar,`
`257`	`258`	`]`
`258`	`259`
`259`	`260`	`return supported`