Skip to content

Commit ee588d7

Browse files
committed
Merge branch 'main' of https://github.com/pytorch/executorch into change-1024712
2 parents ded3172 + 4450532 commit ee588d7

File tree

127 files changed

+3023
-690
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

127 files changed

+3023
-690
lines changed

.ci/scripts/gather_benchmark_configs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
"samsung_galaxy_s24": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",
2525
"google_pixel_8_pro": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",
2626
"google_pixel_3_private_rooted": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98d23ca8-ea9e-4fb7-b725-d402017b198d",
27+
"apple_iphone_15_private": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/55929353-2f28-4ee5-bdff-d1a95f58cb28",
2728
}
2829

2930
# Predefined benchmark configurations
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
name: apple-perf (private devices)
2+
3+
on:
4+
# TODO (huydhn): Disable the schedule run until we land the change to add device pool and device name
5+
# to separate between public and private iOS devices
6+
# schedule:
7+
# - cron: 0 0,4,8,12,16,20 * * *
8+
pull_request:
9+
paths:
10+
- .github/workflows/apple-perf-private-device-experiment.yml
11+
# push:
12+
# branches:
13+
# - main
14+
# paths:
15+
# - .github/workflows/apple-perf-private-device-experiment.yml
16+
# Note: GitHub has an upper limit of 10 inputs
17+
workflow_dispatch:
18+
inputs:
19+
models:
20+
description: Models to be benchmarked
21+
required: false
22+
type: string
23+
default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8
24+
devices:
25+
description: Target devices to run benchmark
26+
required: false
27+
type: string
28+
default: apple_iphone_15_private
29+
benchmark_configs:
30+
description: The list of configs used the benchmark
31+
required: false
32+
type: string
33+
workflow_call:
34+
inputs:
35+
models:
36+
description: Models to be benchmarked
37+
required: false
38+
type: string
39+
default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8
40+
devices:
41+
description: Target devices to run benchmark
42+
required: false
43+
type: string
44+
default: apple_iphone_15_private
45+
benchmark_configs:
46+
description: The list of configs used the benchmark
47+
required: false
48+
type: string
49+
50+
concurrency:
51+
group: apple-perf-private-devices-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
52+
cancel-in-progress: true
53+
54+
jobs:
55+
apple:
56+
uses: ./.github/workflows/apple-perf.yml
57+
secrets: inherit
58+
permissions:
59+
id-token: write
60+
contents: read
61+
with:
62+
models: ${{ inputs.models || 'mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8' }}
63+
devices: apple_iphone_15_private
64+
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/pull.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ jobs:
399399
size=${arr[4]}
400400
# threshold=48120 on devserver with gcc11.4
401401
# todo(lfq): update once binary size is below 50kb.
402-
threshold="51504"
402+
threshold="51408"
403403
if [[ "$size" -le "$threshold" ]]; then
404404
echo "Success $size <= $threshold"
405405
else
@@ -436,7 +436,7 @@ jobs:
436436
size=${arr[4]}
437437
# threshold=48120 on devserver with gcc11.4
438438
# todo(lfq): update once binary size is below 50kb.
439-
threshold="51784"
439+
threshold="47552"
440440
if [[ "$size" -le "$threshold" ]]; then
441441
echo "Success $size <= $threshold"
442442
else

backends/arm/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
from .arm_backend import ArmCompileSpecBuilder # noqa # usort: skip
7+
from .tosa_backend import TOSABackend # noqa # usort: skip
8+
from .tosa_partitioner import TOSAPartitioner # noqa # usort: skip
9+
from .ethosu_backend import EthosUBackend # noqa # usort: skip
10+
from .ethosu_partitioner import EthosUPartitioner # noqa # usort: skip

backends/arm/_passes/convert_expand_copy_to_repeat.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1-
# Copyright 2024 Arm Limited and/or its affiliates.
2-
# All rights reserved.
1+
# Copyright 2024-2025 Arm Limited and/or its affiliates.
32
#
43
# This source code is licensed under the BSD-style license found in the
54
# LICENSE file in the root directory of this source tree.
65

76
# pyre-unsafe
87

8+
import logging
99
from typing import cast
1010

1111
from executorch.exir.dialects._ops import ops as exir_ops
1212
from executorch.exir.pass_base import ExportPass
1313

14+
logger = logging.getLogger(__name__)
15+
1416

1517
class ConvertExpandCopyToRepeatPass(ExportPass):
1618
"""
@@ -41,6 +43,14 @@ def call_operator(self, op, args, kwargs, meta):
4143
multiples[i] if multiples[i] != -1 and extended_shape[i] == 1 else 1
4244
for i in range(expanded_rank)
4345
]
46+
47+
if all((x == 1 for x in multiples)):
48+
# All dimensions/repetitions occur only once. Remove node
49+
# altogether since it's in practice just a copy.
50+
logger.warning("Found redundant expand node (no-op). Removing it.")
51+
52+
return args[0]
53+
4454
return super().call_operator(
4555
op=self.repeat, args=(args[0], multiples), kwargs=kwargs, meta=meta
4656
)

backends/arm/ethosu_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
import logging
1515
from typing import final, List
1616

17-
from executorch.backends.arm.arm_vela import vela_compile
17+
from executorch.backends.arm import TOSABackend
1818

19-
from executorch.backends.arm.tosa_backend import TOSABackend
19+
from executorch.backends.arm.arm_vela import vela_compile
2020
from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
2121
from executorch.exir.backend.compile_spec_schema import CompileSpec
2222
from torch.export.exported_program import ExportedProgram

backends/arm/ethosu_partitioner.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010
from executorch.backends.arm.arm_backend import (
1111
is_ethosu,
1212
) # usort: skip
13-
from executorch.backends.arm.ethosu_backend import EthosUBackend
14-
from executorch.backends.arm.tosa_partitioner import TOSAPartitioner
13+
from executorch.backends.arm import EthosUBackend, TOSAPartitioner
1514
from executorch.exir.backend.compile_spec_schema import CompileSpec
1615
from executorch.exir.backend.partitioner import DelegationSpec
1716
from torch.fx.passes.operator_support import OperatorSupportBase

backends/arm/operator_support/tosa_supported_operators.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ def is_node_supported(
207207
exir_ops.edge.aten._log_softmax.default,
208208
exir_ops.edge.aten.sub.Tensor,
209209
exir_ops.edge.aten.tanh.default,
210+
exir_ops.edge.aten.upsample_bilinear2d.vec,
210211
exir_ops.edge.aten.upsample_nearest2d.vec,
211212
exir_ops.edge.aten.var.correction,
212213
exir_ops.edge.aten.var.dim,
@@ -365,6 +366,7 @@ def is_node_supported(
365366
exir_ops.edge.aten.sigmoid.default,
366367
exir_ops.edge.aten.sub.Tensor,
367368
exir_ops.edge.aten.tanh.default,
369+
exir_ops.edge.aten.upsample_bilinear2d.vec,
368370
exir_ops.edge.aten.upsample_nearest2d.vec,
369371
exir_ops.edge.aten.gelu.default,
370372
):

backends/arm/operators/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
op_to_copy,
4747
op_to_dim_order_copy,
4848
op_transpose,
49+
op_upsample_bilinear2d,
4950
op_upsample_nearest2d,
5051
op_view,
5152
op_where,
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
# pyre-unsafe
7+
from typing import List
8+
9+
import torch
10+
11+
import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
12+
13+
from executorch.backends.arm.operators.node_visitor import (
14+
NodeVisitor,
15+
register_node_visitor,
16+
)
17+
from executorch.backends.arm.tosa_mapping import TosaArg
18+
from executorch.backends.arm.tosa_quant_utils import build_rescale
19+
from executorch.backends.arm.tosa_utils import get_resize_parameters, tosa_shape
20+
from tosa_tools.v0_80.tosa.ResizeMode import ResizeMode # type: ignore
21+
22+
23+
@register_node_visitor
24+
class UpsampleBilinear2dVisitor_0_80(NodeVisitor):
25+
target = "aten.upsample_bilinear2d.vec"
26+
27+
def __init__(self, *args):
28+
super().__init__(*args)
29+
30+
def define_node(
31+
self,
32+
node: torch.fx.Node,
33+
tosa_graph: ts.TosaSerializer,
34+
inputs: List[TosaArg],
35+
output: TosaArg,
36+
) -> None:
37+
assert (
38+
inputs[0].shape is not None and output.shape is not None
39+
), "Only static shapes are supported"
40+
41+
input_dtype = inputs[0].dtype
42+
43+
# tosa_shape output is NHWC, take HW
44+
input_size_yx = torch.tensor(
45+
tosa_shape(inputs[0].shape, inputs[0].dim_order)[1:3]
46+
)
47+
# Ignore scale and size parameters, directly use the output size as
48+
# we only support static shapes currently
49+
output_size_yx = torch.tensor(tosa_shape(output.shape, output.dim_order)[1:3])
50+
51+
scale_n_yx, scale_d_yx, offset_yx, border_yx = get_resize_parameters(
52+
input_size_yx, output_size_yx, ResizeMode.NEAREST, align_corners=True
53+
)
54+
55+
def in_int16_range(x):
56+
return torch.all(x >= -(2**15)) and torch.all(x <= 2**15 - 1)
57+
58+
assert in_int16_range(scale_n_yx)
59+
assert in_int16_range(scale_d_yx)
60+
assert in_int16_range(border_yx)
61+
62+
attr = ts.TosaSerializerAttribute()
63+
attr.ResizeAttribute(
64+
scale=[scale_n_yx[0], scale_d_yx[0], scale_n_yx[1], scale_d_yx[1]],
65+
offset=offset_yx.tolist(),
66+
border=border_yx.tolist(),
67+
mode=ResizeMode.BILINEAR,
68+
)
69+
70+
if input_dtype == output.dtype == ts.DType.FP32:
71+
tosa_graph.addOperator(
72+
ts.TosaOp.Op().RESIZE, [inputs[0].name], [output.name], attr
73+
)
74+
return
75+
elif input_dtype == output.dtype == ts.DType.INT8:
76+
intermediate = tosa_graph.addIntermediate(
77+
tosa_shape(output.shape, output.dim_order), ts.DType.INT32
78+
)
79+
80+
tosa_graph.addOperator(
81+
ts.TosaOp.Op().RESIZE, [inputs[0].name], [intermediate.name], attr
82+
)
83+
84+
final_output_scale = float(1 / (scale_n_yx[0] * scale_n_yx[1]))
85+
86+
build_rescale(
87+
tosa_fb=tosa_graph,
88+
scale=[final_output_scale],
89+
input_node=intermediate,
90+
output_name=output.name,
91+
output_type=ts.DType.INT8,
92+
output_shape=output.shape,
93+
input_zp=0,
94+
output_zp=0,
95+
is_double_round=False,
96+
)
97+
else:
98+
raise ValueError(
99+
"Input/output dtype not in {float32, int8}: {input_dtype=} {output.dtype=}"
100+
)

0 commit comments

Comments
 (0)