Skip to content

Commit 56fb60e

Browse files
authored
Merge branch 'main' into sync-pt-commit
2 parents 8bcdd54 + 57a7903 commit 56fb60e

File tree

29 files changed

+600
-134
lines changed

29 files changed

+600
-134
lines changed

.github/workflows/cuda.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ jobs:
7171
strategy:
7272
fail-fast: false
7373
matrix:
74-
model: [linear, add, add_mul, resnet18]
74+
model: [linear, add, add_mul, resnet18, conv1d]
7575
with:
7676
timeout: 90
7777
runner: linux.g5.4xlarge.nvidia.gpu

backends/arm/_passes/convert_split_to_slice.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,24 @@ def call(self, graph_module: torch.fx.GraphModule):
4646
dim = (dim + rank) % rank
4747

4848
# Validate that split lengths cover the entire dimension
49-
length_sum = sum(split_lengths)
49+
5050
dim_size = shape[dim]
51-
if length_sum != dim_size:
52-
raise ValueError(
53-
f"Split sizes {split_lengths} sum to {length_sum}, "
54-
f"but dimension {dim} has size {dim_size}"
55-
)
51+
if isinstance(split_lengths, int):
52+
if split_lengths <= 0:
53+
raise ValueError(
54+
f"Split size must be positive, got {split_lengths}"
55+
)
56+
full_chunks, remainder = divmod(dim_size, split_lengths)
57+
split_lengths = [split_lengths] * full_chunks
58+
if remainder:
59+
split_lengths.append(remainder)
60+
else:
61+
length_sum = sum(split_lengths)
62+
if length_sum != dim_size:
63+
raise ValueError(
64+
f"Split sizes {split_lengths} sum to {length_sum}, "
65+
f"but dimension {dim} has size {dim_size}"
66+
)
5667

5768
# Convert split argument 'split_lengths' to slice arguments start and end.
5869
starts = [0] * len(split_lengths)

backends/arm/operator_support/tosa_profile_supported_op_lists.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
exir_ops.edge.aten.log.default,
5656
exir_ops.edge.aten.linear.default,
5757
exir_ops.edge.aten.split_with_sizes_copy.default,
58+
exir_ops.edge.aten.split_copy.Tensor,
5859
exir_ops.edge.aten.floor.default,
5960
exir_ops.edge.aten.full.default,
6061
exir_ops.edge.aten.full_like.default,
@@ -152,6 +153,7 @@
152153
exir_ops.edge.aten.log.default,
153154
exir_ops.edge.aten.linear.default,
154155
exir_ops.edge.aten.split_with_sizes_copy.default,
156+
exir_ops.edge.aten.split_copy.Tensor,
155157
exir_ops.edge.aten.floor.default,
156158
exir_ops.edge.aten.full.default,
157159
exir_ops.edge.aten.full_like.default,

backends/arm/quantizer/quantization_annotator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ def _match_pattern(
330330
torch.ops.aten.slice_copy.Tensor,
331331
torch.ops.aten.split.Tensor,
332332
torch.ops.aten.split_with_sizes.default,
333+
torch.ops.aten.split_copy.Tensor,
333334
torch.ops.aten.transpose.Dimname,
334335
torch.ops.aten.transpose.int,
335336
torch.ops.aten.transpose_copy.int,

backends/arm/test/ops/test_slice.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from typing import Tuple
99

10+
import pytest
1011
import torch
1112
from executorch.backends.arm.quantizer.arm_quantizer import (
1213
get_symmetric_a16w8_quantization_config,
@@ -43,7 +44,6 @@
4344

4445

4546
class Slice(torch.nn.Module):
46-
4747
def forward(self, x: torch.Tensor, s: list[tuple[int, int]]):
4848
slices = [slice(*i) for i in s]
4949
return x[slices]
@@ -153,6 +153,9 @@ def get_symmetric_a16w8_slice_quantizer(per_channel_quantization=False):
153153

154154

155155
@common.parametrize("test_data", test_data_suite)
156+
@pytest.mark.xfail(
157+
reason="missing int16 slice ops support; fails at TOSA reference model with Unsupported operation type or rank. See: https://github.com/pytorch/executorch/issues/13976"
158+
)
156159
def test_slice_tensor_16a8w_tosa_INT(test_data: torch.Tensor):
157160
"""Test slice operation with 16A8W quantization (16-bit activations, 8-bit weights)"""
158161
per_channel_quantization = False
@@ -178,6 +181,9 @@ def test_slice_tensor_16a8w_tosa_INT(test_data: torch.Tensor):
178181

179182
@common.parametrize("test_data", test_data_suite)
180183
@common.XfailIfNoCorstone300
184+
@pytest.mark.xfail(
185+
reason="Vela compilation fails with 'Invalid arguments' for int16 slice operations"
186+
)
181187
def test_slice_tensor_16a8w_u55_INT16(test_data: torch.Tensor):
182188
"""Test slice operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
183189
per_channel_quantization = False
@@ -202,6 +208,9 @@ def test_slice_tensor_16a8w_u55_INT16(test_data: torch.Tensor):
202208

203209
@common.parametrize("test_data", test_data_suite)
204210
@common.XfailIfNoCorstone320
211+
@pytest.mark.xfail(
212+
reason="Vela compilation fails with 'Invalid arguments' for int16 slice operations"
213+
)
205214
def test_slice_tensor_16a8w_u85_INT16(test_data: torch.Tensor):
206215
"""Test slice operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
207216
per_channel_quantization = False

backends/arm/test/ops/test_split.py

Lines changed: 85 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222

2323

2424
class Split(torch.nn.Module):
25-
2625
test_data = {
2726
"split_1d_2_size_0_dim": lambda: (torch.rand(10), 2, 0),
2827
"split_2d_3_size_1_dim": lambda: (torch.rand(10, 10), 3, 1),
@@ -60,12 +59,24 @@ def forward(
6059
return x.split(split_size=split_size_or_sections, dim=dim)[1:3]
6160

6261

62+
class SplitCopy(torch.nn.Module):
63+
aten_op = "torch.ops.aten.split_copy.Tensor"
64+
exir_op = "executorch_exir_dialects_edge__ops_aten_split_copy_Tensor"
65+
66+
def forward(
67+
self,
68+
x: torch.Tensor,
69+
split_size: int,
70+
dim: int,
71+
):
72+
return torch.split_copy(x, split_size=split_size, dim=dim)
73+
74+
6375
@common.parametrize(
6476
"test_data",
6577
(Split.test_data | Split.test_data_list),
6678
)
6779
def test_split_with_sizes_tosa_FP(test_data: input_t1):
68-
6980
pipeline = TosaPipelineFP[input_t1](
7081
Split(),
7182
test_data(),
@@ -77,7 +88,6 @@ def test_split_with_sizes_tosa_FP(test_data: input_t1):
7788

7889
@common.parametrize("test_data", Split.test_data_list)
7990
def test_split_with_sizes_tosa_FP_2(test_data: input_t1):
80-
8191
pipeline = TosaPipelineFP[input_t1](
8292
SplitWithSizes(),
8393
test_data(),
@@ -92,7 +102,6 @@ def test_split_with_sizes_tosa_FP_2(test_data: input_t1):
92102
(Split.test_data | Split.test_data_list),
93103
)
94104
def test_split_with_sizes_tosa_FP_one_out(test_data: input_t1):
95-
96105
pipeline = TosaPipelineFP[input_t1](
97106
SplitSingleOut(),
98107
test_data(),
@@ -107,7 +116,6 @@ def test_split_with_sizes_tosa_FP_one_out(test_data: input_t1):
107116
(Split.test_data | Split.test_data_list),
108117
)
109118
def test_split_with_sizes_tosa_FP_two_out(test_data: input_t1):
110-
111119
pipeline = TosaPipelineFP[input_t1](
112120
SplitTwoOut(),
113121
test_data(),
@@ -122,7 +130,6 @@ def test_split_with_sizes_tosa_FP_two_out(test_data: input_t1):
122130
(Split.test_data | Split.test_data_list),
123131
)
124132
def test_split_with_sizes_tosa_INT(test_data: input_t1):
125-
126133
pipeline = TosaPipelineINT[input_t1](
127134
Split(),
128135
test_data(),
@@ -161,7 +168,6 @@ def test_split_with_sizes_u55_INT(test_data: input_t1):
161168
)
162169
@common.XfailIfNoCorstone320
163170
def test_split_with_sizes_u85_INT(test_data: input_t1):
164-
165171
pipeline = EthosU85PipelineINT[input_t1](
166172
Split(),
167173
test_data(),
@@ -190,7 +196,6 @@ def test_split_with_sizes_vgf_FP(test_data: input_t1):
190196
@common.parametrize("test_data", Split.test_data_list)
191197
@common.SkipIfNoModelConverter
192198
def test_split_with_sizes_vgf_FP_2(test_data: input_t1):
193-
194199
pipeline = VgfPipeline[input_t1](
195200
SplitWithSizes(),
196201
test_data(),
@@ -207,7 +212,6 @@ def test_split_with_sizes_vgf_FP_2(test_data: input_t1):
207212
)
208213
@common.SkipIfNoModelConverter
209214
def test_split_with_sizes_vgf_FP_one_out(test_data: input_t1):
210-
211215
pipeline = VgfPipeline[input_t1](
212216
SplitSingleOut(),
213217
test_data(),
@@ -224,7 +228,6 @@ def test_split_with_sizes_vgf_FP_one_out(test_data: input_t1):
224228
)
225229
@common.SkipIfNoModelConverter
226230
def test_split_with_sizes_vgf_FP_two_out(test_data: input_t1):
227-
228231
pipeline = VgfPipeline[input_t1](
229232
SplitTwoOut(),
230233
test_data(),
@@ -241,7 +244,6 @@ def test_split_with_sizes_vgf_FP_two_out(test_data: input_t1):
241244
)
242245
@common.SkipIfNoModelConverter
243246
def test_split_with_sizes_vgf_INT(test_data: input_t1):
244-
245247
pipeline = VgfPipeline[input_t1](
246248
Split(),
247249
test_data(),
@@ -250,3 +252,75 @@ def test_split_with_sizes_vgf_INT(test_data: input_t1):
250252
tosa_version="TOSA-1.0+INT",
251253
)
252254
pipeline.run()
255+
256+
257+
@common.parametrize("test_data", Split.test_data)
258+
def test_split_tensor_tosa_FP(test_data: Tuple):
259+
pipeline = TosaPipelineFP[input_t1](
260+
SplitCopy(),
261+
test_data(),
262+
aten_op=SplitCopy.aten_op,
263+
exir_op=SplitCopy.exir_op,
264+
)
265+
pipeline.run()
266+
267+
268+
@common.parametrize("test_data", Split.test_data)
269+
def test_split_tensor_tosa_INT(test_data: Tuple):
270+
pipeline = TosaPipelineINT[input_t1](
271+
SplitCopy(),
272+
test_data(),
273+
aten_op=SplitCopy.aten_op,
274+
exir_op=SplitCopy.exir_op,
275+
)
276+
pipeline.run()
277+
278+
279+
@common.XfailIfNoCorstone300
280+
@common.parametrize("test_data", Split.test_data)
281+
def test_split_tensor_u55_INT(test_data: Tuple):
282+
pipeline = EthosU55PipelineINT[input_t1](
283+
SplitCopy(),
284+
test_data(),
285+
aten_ops=SplitCopy.aten_op,
286+
exir_ops=SplitCopy.exir_op,
287+
)
288+
pipeline.run()
289+
290+
291+
@common.XfailIfNoCorstone320
292+
@common.parametrize("test_data", Split.test_data)
293+
def test_split_tensor_u85_INT(test_data: Tuple):
294+
pipeline = EthosU85PipelineINT[input_t1](
295+
SplitCopy(),
296+
test_data(),
297+
aten_ops=SplitCopy.aten_op,
298+
exir_ops=SplitCopy.exir_op,
299+
)
300+
pipeline.run()
301+
302+
303+
@common.parametrize("test_data", Split.test_data)
304+
@common.SkipIfNoModelConverter
305+
def test_split_tensor_vgf_FP(test_data: Tuple):
306+
pipeline = VgfPipeline[input_t1](
307+
SplitCopy(),
308+
test_data(),
309+
aten_op=SplitCopy.aten_op,
310+
exir_op=SplitCopy.exir_op,
311+
tosa_version="TOSA-1.0+FP",
312+
)
313+
pipeline.run()
314+
315+
316+
@common.parametrize("test_data", Split.test_data)
317+
@common.SkipIfNoModelConverter
318+
def test_split_tensor_vgf_INT(test_data: Tuple):
319+
pipeline = VgfPipeline[input_t1](
320+
SplitCopy(),
321+
test_data(),
322+
aten_op=SplitCopy.aten_op,
323+
exir_op=SplitCopy.exir_op,
324+
tosa_version="TOSA-1.0+INT",
325+
)
326+
pipeline.run()
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
from .quantized_linear_fusion_pass import QuantizedLinearFusionPass # noqa
7+
from .quantized_op_fusion_pass import QuantizedOpFusionPass # noqa
8+
from .replace_quant_nodes_pass import ReplaceQuantNodesPass # noqa
9+
from .cortex_m_pass_manager import CortexMPassManager # noqa # usort: skip
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
7+
from executorch.backends.cortex_m.passes import (
8+
QuantizedLinearFusionPass,
9+
QuantizedOpFusionPass,
10+
ReplaceQuantNodesPass,
11+
)
12+
from executorch.backends.xnnpack._passes import XNNPACKPassManager
13+
from executorch.exir.pass_base import ExportPass
14+
15+
16+
class CortexMPassManager(XNNPACKPassManager):
17+
18+
pass_list: list[ExportPass] = [
19+
ReplaceQuantNodesPass,
20+
QuantizedOpFusionPass,
21+
QuantizedLinearFusionPass,
22+
]
23+
24+
def __init__(self, exported_program, passes=None):
25+
super().__init__(exported_program, passes or self.pass_list)

backends/cortex_m/test/tester.py

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,7 @@
1010
import torch
1111
from executorch.backends.arm.test.common import get_u55_compile_spec
1212
from executorch.backends.arm.test.tester.arm_tester import Serialize
13-
from executorch.backends.cortex_m.passes.quantized_linear_fusion_pass import (
14-
QuantizedLinearFusionPass,
15-
)
16-
from executorch.backends.cortex_m.passes.quantized_op_fusion_pass import (
17-
QuantizedOpFusionPass,
18-
)
19-
20-
from executorch.backends.cortex_m.passes.replace_quant_nodes_pass import (
21-
ReplaceQuantNodesPass,
22-
)
13+
from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager
2314
from executorch.backends.test.harness import Tester as TesterBase
2415
from executorch.backends.test.harness.stages import (
2516
Export,
@@ -29,7 +20,6 @@
2920
ToEdgeTransformAndLower,
3021
ToExecutorch,
3122
)
32-
from executorch.backends.xnnpack._passes import XNNPACKPassManager
3323

3424
from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import (
3525
get_symmetric_quantization_config,
@@ -47,12 +37,8 @@ def __init__(self):
4737
class CortexMRunPasses(RunPasses):
4838
def __init__(self):
4939
super().__init__(
50-
XNNPACKPassManager,
51-
pass_list=[
52-
ReplaceQuantNodesPass,
53-
QuantizedLinearFusionPass,
54-
QuantizedOpFusionPass,
55-
],
40+
CortexMPassManager,
41+
CortexMPassManager.pass_list,
5642
)
5743

5844

backends/cuda/cuda_backend.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,14 @@
2424
)
2525
from executorch.exir.backend.compile_spec_schema import CompileSpec
2626
from torch._inductor.codegen.cpp_wrapper_cpu import CppWrapperCpu
27+
from torch._inductor.decomposition import conv1d_to_conv2d
2728
from torch.export.passes import move_to_device_pass
2829
from torch.nn.attention import SDPBackend
2930

31+
cuda_decomposition_table = {
32+
torch.ops.aten.conv1d.default: conv1d_to_conv2d,
33+
}
34+
3035
# exist fallback operators in et namespace;
3136
supported_fallback_kernels: Dict[str, Any] = {}
3237

@@ -119,6 +124,10 @@ def preprocess(
119124
# replace slice_copy with slice
120125
ReplaceSliceCopyWithSlicePass()(cuda_edge_program.graph_module)
121126

127+
cuda_edge_program = cuda_edge_program.run_decompositions(
128+
cuda_decomposition_table
129+
)
130+
122131
edge_program_module = cuda_edge_program.module()
123132

124133
# Grab all input placeholders from the graph

0 commit comments

Comments
 (0)