Skip to content

Commit 42e348e

Browse files
Merge branch 'main' into enable-per-channel-quantization-for-VgfPipeline
2 parents b674933 + 3ab7063 commit 42e348e

File tree

79 files changed

+3064
-435
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+3064
-435
lines changed

.ci/scripts/build_llama_android.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ install_executorch_and_backend_lib() {
1919
echo "Installing executorch and xnnpack backend"
2020
clean_executorch_install_folders
2121
mkdir cmake-android-out
22-
ANDROID_NDK=/opt/ndk
22+
ANDROID_NDK=${ANDROID_NDK:-/opt/ndk}
2323
BUCK2=buck2
2424
ANDROID_ABI=arm64-v8a
2525
cmake --preset llm \

.github/workflows/pull.yml

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -632,32 +632,33 @@ jobs:
632632
# run eval_llama wikitext task
633633
PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_wikitext.sh
634634
635-
test-eval_llama-mmlu-linux:
636-
name: test-eval_llama-mmlu-linux
637-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
638-
permissions:
639-
id-token: write
640-
contents: read
641-
strategy:
642-
fail-fast: false
643-
with:
644-
runner: linux.24xlarge
645-
docker-image: ci-image:executorch-ubuntu-22.04-clang12
646-
submodules: 'recursive'
647-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
648-
timeout: 90
649-
script: |
650-
# The generic Linux job chooses to use base env, not the one setup by the image
651-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
652-
conda activate "${CONDA_ENV}"
653-
654-
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
655-
656-
# install llama requirements
657-
bash examples/models/llama/install_requirements.sh
658-
659-
# run eval_llama mmlu task
660-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_mmlu.sh
635+
# TODO(larryliu0820): Fix this issue before reenabling it: https://gist.github.com/larryliu0820/7377ecd0d79dbc06076cec8d9f2b85d2
636+
# test-eval_llama-mmlu-linux:
637+
# name: test-eval_llama-mmlu-linux
638+
# uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
639+
# permissions:
640+
# id-token: write
641+
# contents: read
642+
# strategy:
643+
# fail-fast: false
644+
# with:
645+
# runner: linux.24xlarge
646+
# docker-image: ci-image:executorch-ubuntu-22.04-clang12
647+
# submodules: 'recursive'
648+
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
649+
# timeout: 90
650+
# script: |
651+
# # The generic Linux job chooses to use base env, not the one setup by the image
652+
# CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
653+
# conda activate "${CONDA_ENV}"
654+
655+
# PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
656+
657+
# # install llama requirements
658+
# bash examples/models/llama/install_requirements.sh
659+
660+
# # run eval_llama mmlu task
661+
# PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_mmlu.sh
661662

662663
test-llama_runner_eager-linux:
663664
name: test-llama_runner_eager-linux

CMakeLists.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,10 @@ endif()
161161

162162
if(EXECUTORCH_BUILD_TESTS)
163163
include(CTest)
164+
else()
165+
# It looks like some of our third-party deps will try to turn this on if it's
166+
# not explicitly set, leading to confusing behavior.
167+
set(BUILD_TESTING OFF)
164168
endif()
165169

166170
add_subdirectory(third-party)
@@ -737,7 +741,10 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
737741
endif()
738742

739743
set(CMAKE_EXECUTABLE_SUFFIX ".html")
740-
target_link_options(executor_runner PUBLIC -sALLOW_MEMORY_GROWTH --embed-file "${WASM_MODEL_DIR}@/")
744+
target_link_options(
745+
executor_runner PUBLIC -sALLOW_MEMORY_GROWTH --embed-file
746+
"${WASM_MODEL_DIR}@/"
747+
)
741748
endif()
742749
endif()
743750

backends/apple/coreml/partition/coreml_partitioner.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,27 @@
2323
from torch.fx.passes.operator_support import OperatorSupportBase
2424

2525
logger = logging.getLogger(__name__)
26-
logger.setLevel(logging.WARNING)
26+
logger.setLevel(logging.INFO)
2727

2828

29-
class OperatorsSupportedForCoreMLBackend(OperatorSupportBase):
29+
class _OperatorsSupportedForCoreMLBackend(OperatorSupportBase):
3030
def __init__(
3131
self,
3232
skip_ops_for_coreml_delegation: Optional[List[str]] = None,
3333
lower_full_graph: bool = False,
34+
log: bool = False,
3435
) -> None:
3536
if skip_ops_for_coreml_delegation is None:
3637
skip_ops_for_coreml_delegation = []
3738
super().__init__()
3839
self.skip_ops_for_coreml_delegation = skip_ops_for_coreml_delegation
3940
self.lower_full_graph = lower_full_graph
4041
self._logged_msgs = set()
42+
self._log = log
4143

4244
def log_once(self, msg: str) -> None:
43-
if msg not in self._logged_msgs:
44-
logging.info(msg)
45+
if self._log and msg not in self._logged_msgs:
46+
logger.info(msg)
4547
self._logged_msgs.add(msg)
4648

4749
def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
@@ -154,8 +156,10 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
154156

155157
capability_partitioner = CapabilityBasedPartitioner(
156158
exported_program.graph_module,
157-
OperatorsSupportedForCoreMLBackend(
158-
self.skip_ops_for_coreml_delegation, self.lower_full_graph
159+
_OperatorsSupportedForCoreMLBackend(
160+
self.skip_ops_for_coreml_delegation,
161+
self.lower_full_graph,
162+
log=True,
159163
),
160164
allows_single_node_partition=True,
161165
)
@@ -191,8 +195,10 @@ def ops_to_not_decompose(
191195
self, ep: ExportedProgram
192196
) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
193197
do_not_decompose = []
194-
op_support = OperatorsSupportedForCoreMLBackend(
195-
self.skip_ops_for_coreml_delegation, self.lower_full_graph
198+
op_support = _OperatorsSupportedForCoreMLBackend(
199+
self.skip_ops_for_coreml_delegation,
200+
self.lower_full_graph,
201+
log=False,
196202
)
197203

198204
# CoreML prevents certain ops (like triu) from lowering to CoreML when put in the ExecuTorch op namespace

backends/apple/coreml/test/test_coreml_partitioner.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from executorch.backends.apple.coreml.compiler import CoreMLBackend
1717
from executorch.backends.apple.coreml.partition import CoreMLPartitioner
1818
from executorch.exir.backend.utils import format_delegated_graph
19-
from executorch.runtime import Runtime
2019

2120

2221
@torch.library.custom_op("unsupported::linear", mutates_args=())
@@ -37,7 +36,13 @@ def _(
3736
return torch.ops.aten.linear.default(x, w, b)
3837

3938

40-
_TEST_RUNTIME = sys.platform == "darwin"
39+
def is_fbcode():
40+
return not hasattr(torch.version, "git_version")
41+
42+
43+
_TEST_RUNTIME = (sys.platform == "darwin") and not is_fbcode()
44+
if _TEST_RUNTIME:
45+
from executorch.runtime import Runtime
4146

4247

4348
class TestCoreMLPartitioner(unittest.TestCase):

backends/apple/coreml/test/test_torch_ops.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,20 @@
1414

1515
from executorch.backends.apple.coreml.compiler import CoreMLBackend
1616
from executorch.backends.apple.coreml.partition import CoreMLPartitioner
17-
from executorch.runtime import Runtime
1817
from torchao.quantization import IntxWeightOnlyConfig, PerAxis, PerGroup, quantize_
1918

20-
_TEST_RUNTIME = sys.platform == "darwin" and tuple(
21-
map(int, platform.mac_ver()[0].split("."))
22-
) >= (15, 0)
19+
20+
def is_fbcode():
21+
return not hasattr(torch.version, "git_version")
22+
23+
24+
_TEST_RUNTIME = (
25+
(sys.platform == "darwin")
26+
and not is_fbcode()
27+
and tuple(map(int, platform.mac_ver()[0].split("."))) >= (15, 0)
28+
)
29+
if _TEST_RUNTIME:
30+
from executorch.runtime import Runtime
2331

2432

2533
class TestTorchOps(unittest.TestCase):

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass # noqa
4141
from .decompose_linalg_vector_norm_pass import DecomposeLinearVectorNormPass # noqa
4242
from .decompose_linear_pass import DecomposeLinearPass # noqa
43+
from .decompose_masked_fill import DecomposeMaskedFill # noqa
4344
from .decompose_maxpool2d_with_dilation import DecomposeMaxPool2DPass # noqa
4445
from .decompose_meandim_pass import DecomposeMeanDimPass # noqa
4546
from .decompose_ne_pass import DecomposeNotEqualPass # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
DecomposeLeakyReLUPass,
4646
DecomposeLinearPass,
4747
DecomposeLinearVectorNormPass,
48+
DecomposeMaskedFill,
4849
DecomposeMaxPool2DPass,
4950
DecomposeMeanDimPass,
5051
DecomposeNotEqualPass,
@@ -113,6 +114,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
113114
self.add_pass(
114115
DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
115116
)
117+
116118
self.add_pass(ConvertFullLikeToFullPass())
117119
self.add_pass(ConvertToClampPass())
118120
self.add_pass(ConvertMinMaxPass())
@@ -146,6 +148,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
146148
self.add_pass(DecomposeMaxPool2DPass())
147149
self.add_pass(SizeAdjustInputPass())
148150
self.add_pass(DecomposeSelectPass())
151+
149152
self.add_pass(ConvertSqueezesToViewPass())
150153

151154
self.add_pass(FuseViewCopyTransform())
@@ -160,6 +163,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
160163
return self._transform(exported_program.graph_module)
161164

162165
def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
166+
self.add_pass(DecomposeMaskedFill())
163167
self.add_pass(DecomposeRoundPass())
164168
self.add_pass(DecomposeAcoshPass())
165169
self.add_pass(DecomposeAsinPass())
@@ -285,4 +289,8 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
285289
self.add_pass(ReplaceInfValues())
286290
self.add_pass(DecomposeSumPass())
287291

292+
if not self.tosa_spec.is_U55_subset:
293+
# Uses where which is not supported on Ethos-U55
294+
self.add_pass(DecomposeMaskedFill())
295+
288296
return self._transform(graph_module)
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
# pyre-unsafe
7+
8+
9+
import torch
10+
11+
from executorch.backends.arm._passes import ArmPass
12+
from executorch.exir.dialects._ops import ops as exir_ops
13+
14+
15+
edge_ops = (exir_ops.edge.aten.masked_fill.Scalar,)
16+
aten_ops = (torch.ops.aten.masked_fill.Scalar,)
17+
18+
19+
def _get_decomposition(op) -> tuple:
20+
if op in edge_ops:
21+
return (
22+
exir_ops.edge.aten.where.self,
23+
exir_ops.edge.aten.full_like.default,
24+
)
25+
if op in aten_ops:
26+
return (
27+
torch.ops.aten.where.self,
28+
torch.ops.aten.full_like.default,
29+
)
30+
raise RuntimeError(f"Unable to get decomposition for op {op}")
31+
32+
33+
class DecomposeMaskedFill(ArmPass):
34+
"""
35+
Masked fill takes in a boolean mask, a tensor and a scalar value.
36+
Fills the tensor with the scalar value according to the boolean mask.
37+
Decomposed to a where and a full_like operator.
38+
"""
39+
40+
def call_operator(self, op, args, kwargs, meta, updated=False):
41+
if op not in (edge_ops + aten_ops):
42+
return super().call_operator(op, args, kwargs, meta, updated)
43+
44+
x, mask, scalar = args
45+
46+
where_op, full_like_op = _get_decomposition(op)
47+
48+
scalar_tensor = super().call_operator(full_like_op, (x, scalar), {}, meta, True)
49+
50+
return super().call_operator(
51+
where_op, (mask, scalar_tensor, x), kwargs, meta, True
52+
)

backends/arm/operator_support/tosa_supported_operators.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ def is_node_supported(
254254
exir_ops.edge.aten.asin.default,
255255
exir_ops.edge.aten.atanh.default,
256256
exir_ops.edge.aten.addmm.default,
257+
exir_ops.edge.aten.masked_fill.Scalar,
257258
]
258259

259260
return supported

0 commit comments

Comments
 (0)