Skip to content

Commit a29bcc5

Browse files
author
pytorchbot
committed
2025-11-01 nightly release (bbc0967)
1 parent 13009d4 commit a29bcc5

File tree

108 files changed

+2837
-489
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

108 files changed

+2837
-489
lines changed

.github/workflows/apple.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
id: set_version
3939
shell: bash
4040
run: |
41-
VERSION="0.8.0.$(TZ='PST8PDT' date +%Y%m%d)"
41+
VERSION="1.1.0.$(TZ='PST8PDT' date +%Y%m%d)"
4242
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
4343
4444
build-demo-ios:

backends/aoti/aoti_partitioner.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
PartitionResult,
1616
)
1717
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
18+
from torch._export.utils import is_buffer, is_lifted_tensor_constant, is_param
1819
from torch.export.exported_program import ExportedProgram
1920

2021

@@ -61,6 +62,18 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
6162
tag_constant_data(exported_program)
6263
tag_mutated_buffer(exported_program)
6364

65+
# Tag constant placeholders that have no users
66+
# tag_constant_data only tags constants that have users with delegation_tag
67+
# but we need to tag all constants for this partition
68+
for node in exported_program.graph.nodes:
69+
if node.op == "placeholder" and (
70+
is_param(exported_program, node)
71+
or is_buffer(exported_program, node)
72+
or is_lifted_tensor_constant(exported_program, node)
73+
):
74+
if "delegation_tag" not in node.meta:
75+
node.meta["delegation_tag"] = tag
76+
6477
return PartitionResult(
6578
tagged_exported_program=exported_program, partition_tags=partition_tags
6679
)

backends/arm/TARGETS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ runtime.python_library(
2020
srcs = [
2121
"common/__init__.py",
2222
"common/debug.py",
23+
"common/type.py",
2324
],
2425
deps = [
2526
"fbsource//third-party/tosa_tools:serializer",

backends/arm/_passes/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from .decompose_elu_pass import DecomposeEluPass # noqa
4343
from .decompose_embedding_pass import DecomposeEmbeddingPass # noqa # noqa
4444
from .decompose_expm1_pass import DecomposeExpm1Pass # noqa
45+
from .decompose_floor_divide_pass import DecomposeFloorDividePass # noqa
4546
from .decompose_gelu_pass import DecomposeGeluPass # noqa
4647
from .decompose_glu_pass import DecomposeGluPass # noqa
4748
from .decompose_grouped_conv import DecomposeGroupedConv # noqa
@@ -58,6 +59,7 @@
5859
from .decompose_maxpool2d_with_dilation import DecomposeMaxPool2DPass # noqa
5960
from .decompose_meandim_pass import DecomposeMeanDimPass # noqa
6061
from .decompose_ne_pass import DecomposeNotEqualPass # noqa
62+
from .decompose_remainder_pass import DecomposeRemainderPass # noqa
6163
from .decompose_round_pass import DecomposeRoundPass # noqa
6264
from .decompose_select import DecomposeSelectPass # noqa
6365
from .decompose_sign_pass import DecomposeSignPass # noqa
@@ -75,6 +77,7 @@
7577
)
7678
from .fuse_batchnorm2d_pass import FuseBatchnorm2DPass # noqa
7779
from .fuse_constant_ops_pass import ComputeConstantOpsAOT, FuseConstantArgsPass # noqa
80+
from .fuse_duplicate_users_pass import FuseDuplicateUsersPass # noqa
7881
from .fuse_equal_placeholders_pass import FuseEqualPlaceholdersPass # noqa
7982
from .fuse_quantized_activation_pass import FuseQuantizedActivationPass # noqa
8083
from .insert_int32_casts_after_int64_placeholders import ( # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
DecomposeEluPass,
5050
DecomposeEmbeddingPass,
5151
DecomposeExpm1Pass,
52+
DecomposeFloorDividePass,
5253
DecomposeGeluPass,
5354
DecomposeGluPass,
5455
DecomposeGroupedConv,
@@ -62,6 +63,7 @@
6263
DecomposeMaxPool2DPass,
6364
DecomposeMeanDimPass,
6465
DecomposeNotEqualPass,
66+
DecomposeRemainderPass,
6567
DecomposeRoundPass,
6668
DecomposeSelectPass,
6769
DecomposeSignPass,
@@ -76,6 +78,7 @@
7678
FoldAndAnnotateQParamsPass,
7779
FuseBatchnorm2DPass,
7880
FuseConstantArgsPass,
81+
FuseDuplicateUsersPass,
7982
FuseEqualPlaceholdersPass,
8083
FuseQuantizedActivationPass,
8184
InsertInt32CastsAfterInt64PlaceholdersPass,
@@ -175,6 +178,7 @@ def _tosa_INT_pipeline(
175178
self.add_pass(QuantizeOperatorArguments())
176179
self.add_pass(ConvertELUParamsPass())
177180
self.add_pass(FoldAndAnnotateQParamsPass(exported_program)) # type: ignore[call-arg]
181+
self.add_pass(FuseDuplicateUsersPass())
178182
self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
179183
self.add_pass(MatchArgRanksPass(exported_program))
180184
if self.tosa_spec.is_U55_subset:
@@ -209,6 +213,7 @@ def _tosa_INT_pipeline(
209213
self.add_pass(RewriteMatmulPass())
210214
self.add_pass(RewriteUpsamplePass())
211215
self.add_pass(FuseEqualPlaceholdersPass(exported_program))
216+
212217
self.add_pass(InsertRescaleInt32Pass())
213218
self.add_pass(DecomposeSumPass())
214219
self.add_pass(ToTosaMemoryFormatPass(exported_program))
@@ -222,6 +227,7 @@ def _tosa_FP_pipeline(
222227
self, exported_program: ExportedProgram, graph_module: GraphModule
223228
) -> GraphModule:
224229
self.add_pass(AnnotateOutputDimOrderPass())
230+
self.add_pass(FuseDuplicateUsersPass())
225231
self.add_pass(DecomposeExpm1Pass())
226232
self.add_pass(DecomposeLogitPass())
227233
self.add_pass(DecomposeMaskedFill())
@@ -240,8 +246,11 @@ def _tosa_FP_pipeline(
240246
self.add_pass(CastBoolToInt8Pass())
241247
self.add_pass(DecomposeSinhPass())
242248
self.add_pass(DecomposeSignPass())
249+
self.add_pass(DecomposeFloorDividePass())
243250
self.add_pass(DecomposeDivTensorModePass())
244251
self.add_pass(ReplaceScalarWithTensorByProfilePass())
252+
self.add_pass(DecomposeRemainderPass())
253+
self.add_pass(DecomposeDivTensorModePass())
245254
self.add_pass(DecomposeEmbeddingPass())
246255
self.add_pass(FuseQuantizedActivationPass())
247256
self.add_pass(RemoveGetItemPass())
@@ -331,9 +340,11 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
331340
self.add_pass(CastBoolToInt8Pass())
332341
self.add_pass(DecomposeSignPass())
333342
self.add_pass(DecomposeAddmmPass())
343+
self.add_pass(ReplaceScalarWithTensorByProfilePass())
344+
self.add_pass(DecomposeRemainderPass())
345+
self.add_pass(DecomposeFloorDividePass())
334346
self.add_pass(DecomposeDivTensorModePass())
335347
self.add_pass(DecomposeAddSubAlphaPass())
336-
self.add_pass(ReplaceScalarWithTensorByProfilePass())
337348
self.add_pass(ScalarsToAttributePass())
338349
self.add_pass(DecomposeGroupNormPass())
339350
self.add_pass(DecomposeLayerNormPass())
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
from typing import Set, Type
7+
8+
import torch
9+
from executorch.backends.arm._passes import ArmPass
10+
from executorch.backends.arm._passes.decompose_div_tensor_mode import (
11+
DecomposeDivTensorModePass,
12+
)
13+
from executorch.exir.dialects._ops import ops as exir_ops
14+
from executorch.exir.pass_base import ExportPass
15+
16+
edge_floor_divide_ops = (exir_ops.edge.aten.floor_divide.default,)
17+
aten_floor_divide_ops = (torch.ops.aten.floor_divide.default,)
18+
19+
20+
def get_floor_divide_decomposition(op) -> tuple:
21+
"""
22+
Returns the decomposition of the given aten.floor_div operation into
23+
its equivalent TOSA-supported operations
24+
25+
This handles both edge dialect ops and core PyTorch ops. The decomposition strategy
26+
is:
27+
floor_div(x, y) → div_tensor_mode(x, y, rounding_mode="floor")
28+
29+
Returns:
30+
A tuple (div_op,) corresponding to the appropriate operator overload for the input op.
31+
32+
Raises:
33+
RuntimeError: If the provided operator is not a supported floor_divide variant.
34+
"""
35+
36+
if op in edge_floor_divide_ops:
37+
return (
38+
exir_ops.edge.aten.div.Tensor_mode,
39+
exir_ops.edge.aten.full_like.default,
40+
)
41+
if op in aten_floor_divide_ops:
42+
return (
43+
torch.ops.aten.div.Tensor_mode,
44+
torch.ops.aten.full_like.default,
45+
)
46+
47+
raise RuntimeError(f"Can't get floor_div decomposition for op {op}")
48+
49+
50+
class DecomposeFloorDividePass(ArmPass):
51+
"""
52+
Decomposes aten.floor_divide into aten.div.Tensor_mode with rounding_mode="floor".
53+
"""
54+
55+
_passes_required_after: Set[Type[ExportPass]] = {DecomposeDivTensorModePass}
56+
57+
def call_operator(self, op, args, kwargs, meta):
58+
if op not in (edge_floor_divide_ops + aten_floor_divide_ops):
59+
return super().call_operator(op, args, kwargs, meta, updated=False)
60+
61+
(div_op, full_op) = get_floor_divide_decomposition(op)
62+
63+
input = args[0]
64+
other = args[1]
65+
66+
if isinstance(other, int):
67+
other = super().call_operator(
68+
full_op, (input, other), {}, meta, updated=False
69+
)
70+
71+
div_node = super().call_operator(
72+
div_op, (input, other), {"rounding_mode": "floor"}, meta, updated=True
73+
)
74+
75+
return div_node

backends/arm/_passes/decompose_int16_activation_conv2d_pass.py

Lines changed: 12 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,8 @@ def call_operator(self, op, args, kwargs, meta):
4949
)
5050

5151
# convolution with bias and activation is int16
52-
# The bias is assumed to be quantized with the same quantization parameters as
53-
# as the output of the convolution
5452
bias = args[2]
55-
assert (
56-
meta.data["output_qparams"][0].dtype == bias.data.dtype
57-
), "Bias needs to have same type as quantized output type"
53+
5854
no_bias_args = list(args)
5955
no_bias_args[2] = None
6056
# split up to convolution + bias
@@ -79,46 +75,30 @@ def call_operator(self, op, args, kwargs, meta):
7975
# The conv will get the output int48 scaled to int32 in serialization step.
8076
# To be able to add the bias we need to first scale (cast?) the output to int32.
8177
# The resulting i32 sum will then need to be scaled back to the output dtype.
82-
83-
# calculate common rescale factor from convolution output and bias quantization
8478
output_qparams = cast(QuantArgs, meta.data["output_qparams"][0])
8579
conv_output_scale = output_qparams.scale
86-
bias_qparams = cast(QuantArgs, meta.data["input_qparams"][2])
87-
bias_scale = bias_qparams.scale
8880

89-
common_scale = max(bias_scale, conv_output_scale)
90-
91-
# calculate how we can rescale bias and conv to a common scale and maximize the output range
92-
bias_rescale_factor = bias_scale / common_scale
93-
conv_rescale_factor = conv_output_scale / common_scale
81+
bias_qparams = cast(QuantArgs, meta.data["input_qparams"][2])
82+
per_channel_quant = bias_qparams.per_channel
9483

95-
# Either of conv output or bias now covers the full int16 range and the other one a smaller range.
96-
# Since we are upscaling to int32 we have 16 additional bits to work with to maximize the output range.
97-
# Worst case here is that both bias and conv output covers the full int16 range so we leave one bit
98-
# and then one for the sign bit.
99-
bits_left_to_shift = 14
84+
if per_channel_quant:
85+
bias_scale = bias_qparams.get_scale_per_channel()
86+
else:
87+
bias_scale = [bias_qparams.get_scale_per_tensor()]
10088

101-
# update rescale factors
102-
bias_rescale_factor *= 1 << bits_left_to_shift
103-
conv_rescale_factor *= 1 << bits_left_to_shift
89+
conv_rescale_factors = [1.0] * len(bias_scale)
90+
final_output_scale = [b / conv_output_scale for b in bias_scale]
10491

10592
conv_output = super().call_operator(
10693
exir_ops.backend.tosa.RESCALE.default,
107-
(convolution, torch.int32, [conv_rescale_factor], 0, 0),
108-
{},
109-
new_meta,
110-
)
111-
112-
bias_rescaled = super().call_operator(
113-
exir_ops.backend.tosa.RESCALE.default,
114-
(channel_bias, torch.int32, [bias_rescale_factor], 0, 0),
94+
(convolution, torch.int32, conv_rescale_factors, 0, 0),
11595
{},
11696
new_meta,
11797
)
11898

11999
add = super().call_operator(
120100
exir_ops.edge.aten.add.Tensor,
121-
(conv_output, bias_rescaled),
101+
(conv_output, channel_bias),
122102
{},
123103
new_meta,
124104
)
@@ -128,7 +108,7 @@ def call_operator(self, op, args, kwargs, meta):
128108
(
129109
add,
130110
output_dtype,
131-
[(common_scale / (conv_output_scale * (1 << bits_left_to_shift)))],
111+
final_output_scale,
132112
0,
133113
0,
134114
),
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
from typing import Set, Type
7+
8+
import torch
9+
from executorch.backends.arm._passes import ArmPass
10+
from executorch.backends.arm._passes.decompose_div_tensor_mode import (
11+
DecomposeDivTensorModePass,
12+
)
13+
from executorch.exir.dialects._ops import ops as exir_ops
14+
from executorch.exir.dialects.edge._ops import EdgeOpOverload
15+
from executorch.exir.pass_base import ExportPass
16+
from torch._ops import OpOverload
17+
18+
Op = OpOverload | EdgeOpOverload
19+
20+
21+
def _get_remainder_decomposition_ops(op: Op) -> tuple[Op, Op, Op]:
22+
"""
23+
Returns the (div_mode_op, mul_op, sub_op) needed to lower the provided
24+
remainder operator. The concrete ops depend on whether the remainder op is
25+
the aten or edge variant.
26+
"""
27+
if op == exir_ops.edge.aten.remainder.Tensor:
28+
return (
29+
exir_ops.edge.aten.div.Tensor_mode,
30+
exir_ops.edge.aten.mul.Tensor,
31+
exir_ops.edge.aten.sub.Tensor,
32+
)
33+
if op == torch.ops.aten.remainder.Tensor:
34+
return (
35+
torch.ops.aten.div.Tensor_mode,
36+
torch.ops.aten.mul.Tensor,
37+
torch.ops.aten.sub.Tensor,
38+
)
39+
raise RuntimeError(f"Can't get remainder decomposition ops for op {op}")
40+
41+
42+
class DecomposeRemainderPass(ArmPass):
43+
"""
44+
Decompose the remainder operation into primitive arithmetic:
45+
remainder(x, y) -> x - floor_div(x, y) * y
46+
where floor_div(x, y) == div(x, y, rounding_mode=\"floor\").
47+
"""
48+
49+
_passes_required_after: Set[Type[ExportPass]] = {DecomposeDivTensorModePass}
50+
51+
def call_operator(self, op, args, kwargs, meta, updated=False):
52+
supported_ops = (
53+
exir_ops.edge.aten.remainder.Tensor,
54+
torch.ops.aten.remainder.Tensor,
55+
)
56+
if op not in supported_ops:
57+
return super().call_operator(op, args, kwargs, meta, updated)
58+
59+
div_op, mul_op, sub_op = _get_remainder_decomposition_ops(op)
60+
x, y = args[0], args[1]
61+
62+
floor_div = super().call_operator(
63+
div_op, (x, y), {"rounding_mode": "floor"}, meta, updated=True
64+
)
65+
product = super().call_operator(mul_op, (floor_div, y), {}, meta, updated=True)
66+
return super().call_operator(sub_op, (x, product), {}, meta, updated=True)

backends/arm/_passes/decompose_sum_pass.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ def call_operator(self, op, args, kwargs, meta):
6868
case _:
6969
raise ValueError(f"Invalid number of arguments ({len(args)}) provided.")
7070

71-
# If dims is None, sum over all dimensions
72-
if dims is None:
71+
# If dims evaluates to False (None or []), sum over all dimensions
72+
if not dims:
7373
shape = input_node.data.size()
7474
dims = list(range(len(shape)))
7575

0 commit comments

Comments
 (0)