Skip to content

Commit 592ba3d

Browse files
authored
Merge branch 'main' into shoumikhin-patch-6
2 parents 20f5d6c + 072403b commit 592ba3d

27 files changed

+698
-212
lines changed

.ci/scripts/gather_benchmark_configs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"apple_iphone_15": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/3b5acd2e-92e2-4778-b651-7726bafe129d",
2222
"apple_iphone_15+ios_18": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/12c8b15c-8d03-4e07-950d-0a627e7595b4",
2323
"samsung_galaxy_s22": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa",
24+
"samsung_galaxy_s22_private": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/ea6b049d-1508-4233-9a56-5d9eacbe1078",
2425
"samsung_galaxy_s24": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",
2526
"google_pixel_8_pro": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",
2627
"google_pixel_3_private_rooted": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98d23ca8-ea9e-4fb7-b725-d402017b198d",

.github/workflows/android-perf-private-device-experiment.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ on:
2323
description: Target devices to run benchmark
2424
required: false
2525
type: string
26-
default: google_pixel_3_private_rooted
26+
default: samsung_galaxy_s22_private
2727
benchmark_configs:
2828
description: The list of configs used the benchmark
2929
required: false
@@ -39,7 +39,7 @@ on:
3939
description: Target devices to run benchmark
4040
required: false
4141
type: string
42-
default: google_pixel_3_private_rooted
42+
default: samsung_galaxy_s22_private
4343
benchmark_configs:
4444
description: The list of configs used the benchmark
4545
required: false
@@ -58,5 +58,5 @@ jobs:
5858
contents: read
5959
with:
6060
models: ${{ inputs.models || 'mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8' }}
61-
devices: google_pixel_3_private_rooted
61+
devices: samsung_galaxy_s22_private
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/pull.yml

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -481,37 +481,38 @@ jobs:
481481
build-tool: buck2
482482
docker-image: executorch-ubuntu-22.04-clang12
483483

484-
unittest-arm:
484+
unittest-arm-backend-with-no-fvp:
485+
name: unittest-arm-backend-with-no-fvp
485486
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
486487
permissions:
487488
id-token: write
488489
contents: read
490+
strategy:
491+
matrix:
492+
include:
493+
- test_arm_baremetal: test_pytest_ops
494+
- test_arm_baremetal: test_pytest_models
495+
fail-fast: false
489496
with:
490497
runner: linux.2xlarge
491498
docker-image: executorch-ubuntu-22.04-arm-sdk
492499
submodules: 'recursive'
493500
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
494501
timeout: 90
495502
script: |
496-
set -eux
497-
498503
# The generic Linux job chooses to use base env, not the one setup by the image
499504
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
500505
conda activate "${CONDA_ENV}"
501506
502-
BUILD_TOOL="cmake"
503-
504-
# Setup MacOS dependencies as there is no Docker support on MacOS atm
505-
PYTHON_EXECUTABLE=python \
506-
CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON" \
507-
EXECUTORCH_BUILD_ARM_BAREMETAL=ON \
508-
.ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
507+
source .ci/scripts/utils.sh
508+
install_executorch "--use-pt-pinned-commit"
509509
510-
# Install Arm dependencies
511510
.ci/scripts/setup-arm-baremetal-tools.sh
512511
513-
# Run pytest without simulator
514-
backends/arm/test/test_arm_baremetal.sh test_pytest
512+
ARM_TEST=${{ matrix.test_arm_baremetal }}
513+
514+
# Test test_arm_baremetal.sh with test
515+
backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}"
515516
516517
test-llama-runner-qnn-linux:
517518
name: test-llama-runner-qnn-linux

.github/workflows/trunk.yml

Lines changed: 15 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -176,12 +176,22 @@ jobs:
176176
# Test selective build
177177
PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}"
178178
179-
test-arm-backend-delegation:
180-
name: test-arm-backend-delegation
179+
test-arm-backend:
180+
name: test-arm-backend
181181
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
182182
permissions:
183183
id-token: write
184184
contents: read
185+
strategy:
186+
matrix:
187+
include:
188+
- test_arm_baremetal: test_pytest_ops_ethosu_fvp
189+
- test_arm_baremetal: test_pytest_models_ethosu_fvp
190+
- test_arm_baremetal: test_run_ethosu_fvp
191+
- test_arm_baremetal: test_models_tosa
192+
- test_arm_baremetal: test_models_ethos-u55
193+
- test_arm_baremetal: test_models_ethos-u85
194+
fail-fast: false
185195
with:
186196
runner: linux.2xlarge.memory
187197
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -202,34 +212,10 @@ jobs:
202212
# Hopefully this is high enough for this setup.
203213
sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024
204214
205-
# Test ethos-u delegate examples with run.sh
206-
backends/arm/test/test_arm_baremetal.sh test_full_ethosu_fvp
207-
208-
209-
test-arm-reference-delegation:
210-
name: test-arm-reference-delegation
211-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
212-
permissions:
213-
id-token: write
214-
contents: read
215-
with:
216-
runner: linux.2xlarge.memory
217-
docker-image: executorch-ubuntu-22.04-arm-sdk
218-
submodules: 'recursive'
219-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
220-
timeout: 90
221-
script: |
222-
# The generic Linux job chooses to use base env, not the one setup by the image
223-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
224-
conda activate "${CONDA_ENV}"
225-
226-
source .ci/scripts/utils.sh
227-
install_executorch "--use-pt-pinned-commit"
228-
229-
.ci/scripts/setup-arm-baremetal-tools.sh
215+
ARM_TEST=${{ matrix.test_arm_baremetal }}
230216
231-
# Run arm unit tests using the simulator
232-
backends/arm/test/test_arm_baremetal.sh test_pytest_ethosu_fvp
217+
# Test test_arm_baremetal.sh with test
218+
backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}"
233219
234220
test-arm-cortex-m-size-test:
235221
name: test-arm-cortex-m-size-test

backends/arm/TARGETS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ python_library(
77
"ethosu_partitioner.py",
88
"tosa_backend.py",
99
"tosa_partitioner.py",
10+
"vgf_backend.py",
11+
"vgf_partitioner.py",
1012
],
1113
deps = [
1214
":arm_backend",

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
)
4040
from .fuse_batchnorm2d_pass import FuseBatchnorm2DPass # noqa
4141
from .fuse_constant_ops_pass import ComputeConstantOpsAOT, FuseConstantArgsPass # noqa
42+
from .fuse_equal_placeholders_pass import FuseEqualPlaceholdersPass # noqa
4243
from .fuse_quantized_activation_pass import FuseQuantizedActivationPass # noqa
4344
from .insert_rescales_pass import InsertRescalePass # noqa
4445
from .insert_table_ops import InsertTableOpsPass # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
FoldAndAnnotateQParamsPass,
4141
FuseBatchnorm2DPass,
4242
FuseConstantArgsPass,
43+
FuseEqualPlaceholdersPass,
4344
FuseQuantizedActivationPass,
4445
InsertRescalePass,
4546
InsertTableOpsPass,
@@ -113,6 +114,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
113114
self.add_pass(FuseConstantArgsPass(exported_program))
114115

115116
self.add_pass(InsertTableOpsPass(exported_program))
117+
self.add_pass(FuseEqualPlaceholdersPass(exported_program))
116118
self.add_pass(AnnotateChannelsLastDimOrder())
117119
self.add_pass(InsertRescalePass())
118120

@@ -164,6 +166,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
164166
self.add_pass(FuseViewCopyTransform())
165167
self.add_pass(FuseConstantArgsPass(exported_program))
166168
self.add_pass(InsertTableOpsPass(exported_program))
169+
self.add_pass(FuseEqualPlaceholdersPass(exported_program))
167170
self.add_pass(AnnotateChannelsLastDimOrder())
168171
self.add_pass(InsertRescalePass())
169172

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
import torch
7+
from executorch.backends.arm._passes.arm_pass_utils import (
8+
get_constant_placeholder_kind,
9+
get_param_tensor,
10+
is_param_node,
11+
)
12+
from executorch.backends.transforms.utils import (
13+
create_constant_placeholder,
14+
delete_constant_placeholder,
15+
)
16+
from executorch.exir import ExportedProgram
17+
from executorch.exir.pass_base import ExportPass, PassResult
18+
19+
20+
class FuseEqualPlaceholdersPass(ExportPass):
21+
"""
22+
This pass optimizes memory usage by finding constant placeholders
23+
pointing to identical tensors and fusing them to one single placeholder
24+
with multiple users.
25+
"""
26+
27+
def __init__(self, exported_program: ExportedProgram):
28+
self.exported_program = exported_program
29+
super().__init__()
30+
31+
def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
32+
modified = False
33+
const_placeholder_nodes = []
34+
for node in graph_module.graph.nodes:
35+
if is_param_node(self.exported_program, node):
36+
const_placeholder_nodes.append(node)
37+
38+
while const_placeholder_nodes:
39+
40+
# Find equal tensors
41+
node1 = const_placeholder_nodes.pop()
42+
eq_nodes = [node1]
43+
tensor1 = get_param_tensor(self.exported_program, node1)
44+
if tensor1 is None:
45+
continue
46+
47+
for node2 in const_placeholder_nodes:
48+
tensor2 = get_param_tensor(self.exported_program, node2)
49+
if tensor2 is None:
50+
continue
51+
52+
if torch.equal(tensor1, tensor2):
53+
eq_nodes.append(node2)
54+
55+
if len(eq_nodes) > 1:
56+
common_name = node1.name + "_common"
57+
common_kind = get_constant_placeholder_kind(
58+
self.exported_program, node1
59+
)
60+
common_persisten_buffer = True
61+
62+
with graph_module.graph.inserting_before(node1):
63+
common_node = create_constant_placeholder(
64+
self.exported_program,
65+
graph_module.graph,
66+
common_name,
67+
common_kind,
68+
tensor1,
69+
common_persisten_buffer,
70+
)
71+
72+
for eq_node in eq_nodes:
73+
eq_node.replace_all_uses_with(common_node)
74+
delete_constant_placeholder(self.exported_program, eq_node)
75+
if eq_node != node1:
76+
const_placeholder_nodes.remove(eq_node)
77+
78+
modified = True
79+
80+
if modified:
81+
graph_module.recompile()
82+
graph_module = super().call(graph_module).graph_module
83+
return PassResult(graph_module=graph_module, modified=modified)

0 commit comments

Comments
 (0)