Skip to content

Commit 3f6b5bf

Browse files
committed
Merge remote-tracking branch 'origin/main' into toupstream/tanh_op
2 parents 22d4c68 + 8673567 commit 3f6b5bf

File tree

104 files changed

+2641
-709
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

104 files changed

+2641
-709
lines changed

.ci/docker/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ case "${IMAGE_NAME}" in
4141
LINTRUNNER=""
4242
CLANG_VERSION=12
4343
# From https://developer.android.com/ndk/downloads
44-
ANDROID_NDK_VERSION=r26c
44+
ANDROID_NDK_VERSION=r27b
4545
;;
4646
*)
4747
echo "Invalid image name ${IMAGE_NAME}"

.github/workflows/android-perf.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ jobs:
205205
206206
# Let's see how expensive this job is, we might want to tone it down by running it periodically
207207
benchmark-on-device:
208+
if: always()
208209
permissions:
209210
id-token: write
210211
contents: read

.github/workflows/android-release-artifacts.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,24 @@ concurrency:
1313
cancel-in-progress: true
1414

1515
jobs:
16+
check-if-aar-exists:
17+
name: check-if-aar-exists
18+
runs-on: ubuntu-22.04
19+
timeout-minutes: 10
20+
steps:
21+
- name: Check if this RC version is already in S3
22+
shell: bash
23+
run: |
24+
VERSION="${{ inputs.version }}"
25+
if curl -I "https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}/executorch.aar" | grep "200 OK"; then
26+
echo "AAR already exists at https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}/executorch.aar"
27+
echo "Will skip build/upload"
28+
exit 1
29+
fi
30+
1631
build-aar:
1732
name: build-aar
33+
needs: check-if-aar-exists
1834
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
1935
with:
2036
runner: linux.2xlarge

.github/workflows/android.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ jobs:
5353
# NB: Use metal install for KVM support to run the emulator faster
5454
runs-on: linux.24xl.spr-metal
5555
env:
56-
ANDROID_NDK_VERSION: r26c
56+
ANDROID_NDK_VERSION: r27b
5757
API_LEVEL: 34
5858
steps:
5959
- name: Setup SSH (Click me for login details)
@@ -80,6 +80,11 @@ jobs:
8080
# Reuse the script that install Android on ET Docker image
8181
sudo -E bash .ci/docker/common/install_android.sh
8282
83+
# After https://github.com/ReactiveCircus/android-emulator-runner/releases/tag/v2.33.0 release,
84+
# it seems that we need to chown the Android setup to the current user instead of root to
85+
# avoid permission issue
86+
sudo chown -R "${USER}" /opt/android
87+
8388
- name: Gradle cache
8489
uses: gradle/actions/setup-gradle@v3
8590

.github/workflows/apple-perf.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ jobs:
279279
path: ${{ runner.temp }}/artifacts/
280280

281281
benchmark-on-device:
282+
if: always()
282283
needs:
283284
- set-parameters
284285
- upload-benchmark-app

.lintrunner.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ command = [
151151
'lintrunner_adapters',
152152
'run',
153153
'grep_linter',
154-
'--pattern= Executorch\W+',
154+
# Exclude "ExecuTorch" pattern within URLs
155+
'--pattern= Executorch(?!\\W*(://|\\.[a-z]{2,}))\\W+',
155156
'--linter-name=ExecuTorchCapitalization',
156157
'--error-name=Incorrect capitalization for ExecuTorch',
157158
"""--error-description=

backends/arm/_passes/arm_pass_manager.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
ConvertSplitToSlicePass,
2020
)
2121
from executorch.backends.arm._passes.decompose_div_pass import DecomposeDivPass
22+
from executorch.backends.arm._passes.insert_squeeze_after_sum_pass import (
23+
InsertSqueezeAfterSumPass,
24+
)
2225
from executorch.backends.arm._passes.meandim_to_averagepool_pass import (
2326
ConvertMeanDimToAveragePool,
2427
)
@@ -47,6 +50,7 @@ def transform_to_backend_pipeline(
4750
self.add_pass(ConvertExpandCopyToRepeatPass())
4851
self.add_pass(ConvertMeanDimToAveragePool())
4952
self.add_pass(DecomposeDivPass())
53+
self.add_pass(InsertSqueezeAfterSumPass())
5054
self.add_pass(ConvertSplitToSlicePass())
5155
for spec in compile_spec:
5256
if spec.key == "permute_memory_format":
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Copyright 2024 Arm Limited and/or its affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from typing import cast
8+
9+
import torch
10+
import torch.fx
11+
from executorch.backends.arm._passes.arm_pass_utils import create_node, insert_q_dq_pair
12+
13+
from executorch.backends.arm.tosa_quant_utils import get_quant_node_args, is_quant_node
14+
from executorch.exir.dialects._ops import ops as exir_ops
15+
from executorch.exir.pass_base import ExportPass, PassResult
16+
17+
18+
class InsertSqueezeAfterSumPass(ExportPass):
19+
"""
20+
In Pytorch, the default behaviour of Tensor.sum is to squeeze
21+
the dimension that is summed (keep_dim = False).
22+
However, in TOSA, REDUCE_SUM always preserves the
23+
rank of the input (keep_dim = True).
24+
To get a 1-1 mapping in the sum lowering, normalize the
25+
keep_dim = False case to keep_dim = True and add squeeze ops.
26+
27+
Original:
28+
sum(dims, keep_dim = False)
29+
After pass:
30+
sum(dims, keep_dim = True)
31+
(q)
32+
(dq)
33+
squeeze(dim = dims)
34+
"""
35+
36+
def call(self, graph_module: torch.fx.GraphModule):
37+
for node in graph_module.graph.nodes:
38+
if node.op != "call_function":
39+
continue
40+
if node.target != exir_ops.edge.aten.sum.dim_IntList:
41+
continue
42+
sum_node = cast(torch.fx.Node, node)
43+
keep_dim = cast(bool, sum_node.args[2] if len(sum_node.args) > 2 else False)
44+
if keep_dim:
45+
continue
46+
47+
dim_list = cast(list[int], sum_node.args[1])
48+
quantized = is_quant_node(sum_node)
49+
if quantized:
50+
qparams = get_quant_node_args(sum_node.all_input_nodes[0])
51+
qparams = qparams + (torch.int8,)
52+
else:
53+
qparams = None
54+
55+
# Add keep_dim = True arg to sum node.
56+
sum_node.args = sum_node.args[0:2] + (True,)
57+
58+
with graph_module.graph.inserting_after(sum_node):
59+
squeeze_node = create_node(
60+
graph_module.graph, exir_ops.edge.aten.squeeze_copy.dims, ()
61+
)
62+
sum_node.replace_all_uses_with(squeeze_node)
63+
squeeze_node.args = (sum_node, dim_list)
64+
if quantized:
65+
sum_node = insert_q_dq_pair(graph_module.graph, sum_node, qparams)
66+
graph_module.graph.eliminate_dead_code()
67+
graph_module.recompile()
68+
graph_module = super().call(graph_module).graph_module
69+
return PassResult(graph_module, True)

backends/arm/arm_partitioner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
6363
exir_ops.edge.aten._softmax.default,
6464
exir_ops.edge.aten.slice_copy.Tensor,
6565
exir_ops.edge.aten.sub.Tensor,
66+
exir_ops.edge.aten.sum.dim_IntList,
6667
exir_ops.edge.aten.tanh.default,
6768
exir_ops.edge.aten.view_copy.default,
6869
exir_ops.edge.aten.clone.default,

backends/arm/operators/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
op_softmax,
3535
op_squeeze,
3636
op_sub,
37+
op_sum,
3738
op_tanh,
3839
op_unsqueeze,
3940
op_view,

0 commit comments

Comments
 (0)