Skip to content

Commit 496268d

Browse files
Merge branch 'main' into change-1119290
2 parents edc4dc1 + 05799c9 commit 496268d

File tree

25 files changed

+422
-32
lines changed

25 files changed

+422
-32
lines changed

backends/arm/operators/op_bmm.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,12 @@ def define_node(
7979
input1_zp = input_qparams[1].get_zp_per_tensor()
8080
bmm_result = tosa_graph.addIntermediate(output.shape, ts.DType.INT32)
8181
bmm_output_name = bmm_result.name
82+
elif inputs[0].dtype == ts.DType.INT16:
83+
input_qparams = get_input_qparams(node)
84+
input0_zp = input_qparams[0].get_zp_per_tensor()
85+
input1_zp = input_qparams[1].get_zp_per_tensor()
86+
bmm_result = tosa_graph.addIntermediate(output.shape, ts.DType.INT48)
87+
bmm_output_name = bmm_result.name
8288
else:
8389
bmm_output_name = output.name
8490
input0_zp, input1_zp = 0, 0
@@ -118,3 +124,20 @@ def define_node(
118124
output_zp=[output_qparams.get_zp_per_tensor()],
119125
rounding_mode=RoundingMode.SINGLE_ROUND,
120126
)
127+
elif output.dtype == ts.DType.INT16:
128+
output_qparams = get_output_qparams(node)[0]
129+
final_output_scale = (
130+
input_qparams[0].get_scale_per_tensor() * input_qparams[1].get_scale_per_tensor() # type: ignore[possibly-undefined] # pyre-ignore[61]
131+
) / output_qparams.get_scale_per_tensor()
132+
133+
build_rescale(
134+
tosa_fb=tosa_graph,
135+
scale=[final_output_scale],
136+
# pyre-ignore[61]: Uninitialized local [61]: Local variable `bmm_result` is undefined, or not always defined.
137+
input_node=bmm_result, # type: ignore[possibly-undefined]
138+
output_name=output.name,
139+
output_type=ts.DType.INT16,
140+
input_zp=[0],
141+
output_zp=[output_qparams.get_zp_per_tensor()],
142+
rounding_mode=RoundingMode.SINGLE_ROUND,
143+
)

backends/arm/test/ops/test_addmm.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,6 @@ def get_symmetric_a16w8_addmm_quantizer(per_channel_quantization=False):
213213

214214

215215
@common.parametrize("test_data", test_data_suite)
216-
@pytest.mark.xfail(
217-
reason="missing int16 addmm ops support; fails at TOSA reference model with Unsupported operation type or rank. See: https://github.com/pytorch/executorch/issues/13979"
218-
)
219216
def test_addmm_16a8w_tosa_INT(test_data: input_t1):
220217
"""Test addmm (FC layer) operation with 16A8W quantization (16-bit activations, 8-bit weights)"""
221218
per_channel_quantization = False
@@ -268,9 +265,6 @@ def test_addmm_16a8w_u55_INT16(test_data: input_t1):
268265

269266
@common.parametrize("test_data", test_data_suite)
270267
@common.XfailIfNoCorstone320
271-
@pytest.mark.xfail(
272-
reason="Vela compilation fails with 'Invalid arguments' for int16 addmm operations"
273-
)
274268
def test_addmm_16a8w_u85_INT16(test_data: input_t1):
275269
"""Test addmm (FC layer) operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
276270
per_channel_quantization = False

backends/cadence/aot/replace_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,10 @@ def replace_logical_nop_where_with_where(
8989

9090
# Get the third arg node and its input
9191
logical_not_node = node.args[0]
92-
logical_not_input_tensor = logical_not_node.args[0].to_tensor()
92+
logical_not_input_node = logical_not_node.args[0]
9393

9494
# If the logical_not input is not a boolean tensor, bail.
95-
if logical_not_input_tensor.meta["spec"].dtype != torch.bool:
95+
if logical_not_input_node.meta["val"].dtype != torch.bool:
9696
continue
9797

9898
# Replace the where op with another one, flipping the inputs and using the boolean

backends/cuda/TARGETS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ runtime.python_library(
66
name = "cuda_backend",
77
srcs = [
88
"cuda_backend.py",
9+
"replace_slice_copy_with_slice.py",
910
],
1011
visibility = [
1112
"//executorch/...",

backends/cuda/cuda_backend.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,9 @@ def preprocess(
144144
}
145145

146146
with collect_unsupported_fallback_kernels(), torch.nn.attention.sdpa_kernel(
147-
[SDPBackend.MATH]
147+
[
148+
SDPBackend.MATH # pyre-ignore[16]: Module `torch.nn.attention` has no attribute `SDPBackend`.
149+
]
148150
), torch.no_grad():
149151
# torch._logging.set_logs(post_grad_graphs=True)
150152
so_path = torch._inductor.aot_compile(edge_program_module, tuple(user_input_placeholders), options=options) # type: ignore[arg-type]

backends/cuda/replace_slice_copy_with_slice.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,23 @@
66

77
# pyre-strict
88

9-
from typing import Iterable
9+
from typing import Dict, Iterable, Tuple
1010

1111
import torch
1212
from executorch.exir.dialects._ops import ops
13+
from executorch.exir.dialects.edge._ops import EdgeOpOverload
1314
from executorch.exir.pass_base import ExportPass, PassResult
1415
from torch import fx
1516

1617

17-
_SLICE_COPY_TARGETS = (
18+
_SLICE_COPY_TARGETS: Tuple[torch._ops.OpOverload | EdgeOpOverload] = (
1819
torch.ops.aten.slice_copy.Tensor,
1920
ops.edge.aten.slice_copy.Tensor,
2021
)
2122

22-
_SLICE_TARGETS = {
23+
_SLICE_TARGETS: Dict[
24+
torch._ops.OpOverload | EdgeOpOverload, torch._ops.OpOverload | EdgeOpOverload
25+
] = {
2326
torch.ops.aten.slice_copy.Tensor: torch.ops.aten.slice.Tensor,
2427
ops.edge.aten.slice_copy.Tensor: ops.edge.aten.slice.Tensor,
2528
}
@@ -99,8 +102,8 @@ def _is_view_user(self, node: fx.Node, user: fx.Node) -> bool:
99102
return False
100103

101104
def _argument_mutates(
102-
self, schema: torch._C.FunctionSchema, key
103-
) -> bool: # pyre-ignore[11]
105+
self, schema: torch._C.FunctionSchema, key: int | str
106+
) -> bool:
104107
arguments = schema.arguments
105108
if isinstance(key, int):
106109
if key >= len(arguments):

backends/cuda/tests/test_cuda_export.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from typing import Tuple
99

1010
import torch
11+
from executorch.backends.cuda.cuda_backend import CudaBackend
1112
from executorch.backends.cuda.cuda_partitioner import CudaPartitioner
1213
from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower
1314
from torch.export import export
@@ -30,7 +31,9 @@ def _export_to_cuda_with_lower(
3031
exported_program = export(module, inputs, strict=True)
3132

3233
# Create partitioner and compile specs
33-
partitioner = CudaPartitioner([])
34+
partitioner = CudaPartitioner(
35+
[CudaBackend.generate_method_name_compile_spec("forward")]
36+
)
3437

3538
# Use to_edge_transform_and_lower for complete pipeline
3639
edge_program_manager = to_edge_transform_and_lower(

backends/nxp/backend/edge_program_converter.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
exir_ops.edge.aten.permute_copy.default: PermuteCopyConverter, # noqa F405
4444
exir_ops.edge.aten.relu.default: ReLUConverter, # noqa F405
4545
exir_ops.edge.aten._softmax.default: SoftmaxConverter, # noqa F405
46+
exir_ops.edge.aten.sub.Tensor: SubTensorConverter, # noqa F405
4647
exir_ops.edge.aten.tanh.default: TanhConverter, # noqa F405
4748
exir_ops.edge.aten.view_copy.default: ViewCopyConverter, # noqa F405
4849
exir_ops.edge.aten.sigmoid.default: SigmoidConverter, # noqa F405

backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@
5656
from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.softmax_converter import (
5757
SoftmaxConverter,
5858
)
59+
from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.sub_tensor_converter import (
60+
SubTensorConverter,
61+
)
5962
from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.tanh_converter import (
6063
TanhConverter,
6164
)
@@ -80,6 +83,7 @@
8083
"MaxPool2dConverter",
8184
"AvgPool2dConverter",
8285
"AddTensorConverter",
86+
"SubTensorConverter",
8387
"CloneConverter",
8488
"AbsConverter",
8589
"AdaptiveAvgPool2dConverter",
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Copyright 2025 NXP
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
from executorch.backends.nxp.backend.ir.converter.conversion.common import (
7+
node_uses_shape_broadcasting,
8+
)
9+
from executorch.backends.nxp.backend.ir.converter.node_converter import (
10+
CustomDelegationOptions,
11+
NodeConverter,
12+
)
13+
from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
14+
sub_options,
15+
)
16+
from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
17+
from torch.fx import Node
18+
from torch.nn import Parameter
19+
20+
21+
class SubTensorConverter(NodeConverter):
22+
@staticmethod
23+
def _is_supported_on_target(
24+
node: Node,
25+
neutron_target_spec: NeutronTargetSpec,
26+
parameters_mapping: dict[str, Parameter],
27+
custom_delegation_options: CustomDelegationOptions,
28+
) -> bool:
29+
if node_uses_shape_broadcasting(node):
30+
# Shape broadcasting may require the addition of `Transpose` ops during conversion.
31+
return False
32+
33+
return True
34+
35+
@staticmethod
36+
def _is_supported_in_IR(
37+
node: Node,
38+
parameters_mapping: dict[str, Parameter],
39+
custom_delegation_options: CustomDelegationOptions,
40+
) -> bool:
41+
if len(node.args) != 2:
42+
return False
43+
44+
# The `alpha` attribute can be represented by adding an extra `Mul` operator.
45+
# However, this is not implemented as `alpha` is rarely used.
46+
if hasattr(node.kwargs, "alpha"):
47+
return False
48+
49+
return True
50+
51+
# sub.Tensor Node format: (Tensor self, Tensor other, *, Scalar alpha=1)
52+
def convert(self, node: Node):
53+
"""Convert 'sub_tensor' operator to NeutronIR 'Sub'."""
54+
self.assert_convertible(node)
55+
56+
t_op = self._create_tflite_op_with_io_tensors(node)
57+
58+
t_op.builtin_options = sub_options.Sub()
59+
self.builder.append_operators([t_op])

0 commit comments

Comments
 (0)