Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions backends/cortex_m/passes/quantized_linear_fusion_pass.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# Copyright 2025 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
Expand All @@ -19,17 +20,18 @@
)

from executorch.backends.transforms.utils import create_mutable_buffer, get_param_tensor

from executorch.backends.xnnpack._passes.xnnpack_pass import XNNPACKPass
from executorch.exir import ExportedProgram
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_base import ExportPass
from torch.fx import Node
from torch.fx.passes.infra.pass_manager import PassResult

logger = logging.getLogger("quantized_linear_fusion_pass")
logger.setLevel(logging.INFO)


class QuantizedLinearFusionPass(ExportPass):
class QuantizedLinearFusionPass(XNNPACKPass):
"""
Cortex-M backend pass that fuses quantized linear-like patterns.
Fuses: dequantize -> [linear/addmm/fc_ops] -> quantize
Expand All @@ -44,8 +46,7 @@ class QuantizedLinearFusionPass(ExportPass):
requires_exported_program = True

def __init__(self, exported_program: ExportedProgram):
super().__init__()
self._exported_program = exported_program
super().__init__(exported_program)
self.nodes_to_erase = []

def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
Expand Down
22 changes: 13 additions & 9 deletions backends/cortex_m/test/ops/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@

import torch
from executorch.backends.arm.test.common import parametrize
from executorch.backends.cortex_m.test.tester import CortexMTester, McuTestCase
from executorch.backends.cortex_m.test.tester import (
CortexMTester,
McuTestCase,
ramp_tensor,
)
from executorch.backends.test.suite.operators.test_add import Model, ModelAlpha


Expand Down Expand Up @@ -80,19 +84,19 @@ class CortexMAlphaAdd(ModelAlpha):
),
"self_rank_2_pos": McuTestCase(
CortexMSelfAdd(),
(torch.linspace(0, 1000, 10).reshape((10, 1)),),
(ramp_tensor(0, 1000, (10, 1)),),
),
"self_rank_3_neg": McuTestCase(
CortexMSelfAdd(),
(torch.linspace(-100, 0, 8).reshape((2, 2, 2)),),
(ramp_tensor(-100, 0, (2, 2, 2)),),
),
"self_rank_4_small": McuTestCase(
CortexMSelfAdd(),
(torch.linspace(-0.1, 0.1, 16).reshape(2, 2, 2, 2),),
(ramp_tensor(-0.1, 0.1, (2, 2, 2, 2)),),
),
"self_rank_5": McuTestCase(
CortexMSelfAdd(),
(torch.linspace(-5, 5, 32).reshape(2, 2, 2, 2, 2),),
(ramp_tensor(-5, 5, (2, 2, 2, 2, 2)),),
),
"scalar_scalar": McuTestCase(
CortexMScalarAdd(),
Expand All @@ -117,15 +121,15 @@ class CortexMAlphaAdd(ModelAlpha):
"broadcast_3": McuTestCase(
CortexMTensorAdd(),
(
torch.linspace(-2, 2, 4).reshape(2, 1, 2, 1),
torch.linspace(-5, 5, 4).reshape(1, 2, 1, 2),
ramp_tensor(-2, 2, (2, 1, 2, 1)),
ramp_tensor(-5, 5, (1, 2, 1, 2)),
),
),
"alpha": McuTestCase(
CortexMAlphaAdd(0.5),
(
torch.linspace(-10, 10, 20).reshape(4, 5),
torch.linspace(-20, 20, 20).reshape(4, 5),
ramp_tensor(-10, 10, (4, 5)),
ramp_tensor(-20, 20, (4, 5)),
),
),
}
Expand Down
211 changes: 211 additions & 0 deletions backends/cortex_m/test/ops/test_linear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
# Copyright 2025 Arm Limited and/or its affiliates.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️

#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.


import torch
from executorch.backends.arm.test.common import parametrize
from executorch.backends.cortex_m.test.tester import (
CortexMTester,
McuTestCase,
ramp_tensor,
)


class CortexMMm(torch.nn.Module):
def forward(self, x, y):
return torch.mm(x, y)

ops_before_transforms = {
"executorch_exir_dialects_edge__ops_aten_mm_default": 1,
"executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 2,
"executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 3,
}

ops_after_transforms = {
"executorch_exir_dialects_edge__ops_cortex_m_quantized_linear_default": 1,
"executorch_exir_dialects_edge__ops_cortex_m_quantize_per_tensor_default": 1,
"executorch_exir_dialects_edge__ops_cortex_m_dequantize_per_tensor_default": 1,
}


class CortexMBmm(torch.nn.Module):
def forward(self, x, y):
return torch.bmm(x, y)

ops_before_transforms = {
"executorch_exir_dialects_edge__ops_aten_bmm_default": 1,
"executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 2,
"executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 3,
}

ops_after_transforms = {
"executorch_exir_dialects_edge__ops_cortex_m_quantized_linear_default": 1,
"executorch_exir_dialects_edge__ops_cortex_m_quantize_per_tensor_default": 1,
"executorch_exir_dialects_edge__ops_cortex_m_dequantize_per_tensor_default": 1,
}


class CortexMAddmm(torch.nn.Module):
def forward(self, x, y, z, alpha=None, beta=None):
return torch.addmm(beta, x, alpha, y, z)

ops_before_transforms = {
"executorch_exir_dialects_edge__ops_aten_addmm_default": 1,
"executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 2,
"executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 3,
}

ops_after_transforms = {
"executorch_exir_dialects_edge__ops_cortex_m_quantized_linear_default": 1,
"executorch_exir_dialects_edge__ops_cortex_m_quantize_per_tensor_default": 1,
"executorch_exir_dialects_edge__ops_cortex_m_dequantize_per_tensor_default": 1,
}


class CortexMAt(CortexMMm):
def forward(self, x, y):
return x @ y


class CortexMMatmul(CortexMMm):
def forward(self, x, y):
return torch.matmul(x, y)


class CortexMLinear(CortexMMatmul):
def __init__(self, *args, **kwargs):
super().__init__()
self.linear = torch.nn.Linear(*args, bias=False)

def forward(self, x):
return self.linear(x)


class CortexMLinearBias(CortexMAddmm):
def __init__(self, *args, **kwargs):
super().__init__()
self.linear = torch.nn.Linear(*args, bias=True)

def forward(self, x):
return self.linear(x)


test_cases = {
"mm": McuTestCase(
model=CortexMMm(),
example_inputs=(
ramp_tensor(0, 10, (1, 16)),
ramp_tensor(0, 10, (16, 16)),
),
),
"bmm": McuTestCase(
model=CortexMBmm(),
example_inputs=(
ramp_tensor(0, 10, (1, 16, 16)),
ramp_tensor(0, 10, (1, 16, 16)),
),
),
"addmm": McuTestCase(
model=CortexMAddmm(),
example_inputs=(
ramp_tensor(0, 10, (1, 16)),
ramp_tensor(0, 10, (16, 16)),
ramp_tensor(0, 10, (16, 16)),
2,
4,
),
),
"addmm_scalars": McuTestCase(
model=CortexMAddmm(),
example_inputs=(
ramp_tensor(0, 10, (1, 16)),
ramp_tensor(0, 10, (16, 16)),
ramp_tensor(0, 10, (16, 16)),
),
),
"@-operator": McuTestCase(
model=CortexMAt(),
example_inputs=(
ramp_tensor(0, 10, (1, 16)),
ramp_tensor(0, 10, (16, 16)),
),
),
"matmul": McuTestCase(
model=CortexMMatmul(),
example_inputs=(
ramp_tensor(0, 10, (1, 16)),
ramp_tensor(0, 10, (16, 16)),
),
),
"linear_rank1": McuTestCase(
model=CortexMLinear(2, 3),
example_inputs=(ramp_tensor(-1, 1, (2,)),),
),
"linear_rank2_pos": McuTestCase(
model=CortexMLinear(8, 3),
example_inputs=(ramp_tensor(0, 10, (2, 8)),),
),
"linear_rank3_neg": McuTestCase(
model=CortexMLinear(5, 3),
example_inputs=(ramp_tensor(-40, 0, (4, 2, 5)),),
),
"linear_rank4": McuTestCase(
model=CortexMLinear(16, 32),
example_inputs=(ramp_tensor(-100, 100, (2, 1, 2, 16)),),
),
"linear_rank5": McuTestCase(
model=CortexMLinear(4, 3),
example_inputs=(ramp_tensor(-2, 2, (5, 2, 1, 2, 4)),),
),
"linear_bias": McuTestCase(
model=CortexMLinearBias(61, 37),
example_inputs=(ramp_tensor(0, 10, (8, 61)),),
),
}

dialect_xfails = {
"mm": ("torch.mm ops are currently not quantized", RuntimeError),
"bmm": ("torch.bmm ops are currently not quantized", RuntimeError),
"addmm": ("torch.addmm ops are currently not quantized", RuntimeError),
"addmm_scalars": ("torch.addmm ops are currently not quantized", RuntimeError),
"matmul": ("torch.matmul ops are currently not quantized", RuntimeError),
"@-operator": ("@ ops are currently not quantized", RuntimeError),
"linear_rank1": ("Only rank 2 linear ops are fused currently", RuntimeError),
"linear_rank2_pos": ("name 'int32' is not defined", NameError),
"linear_rank3_neg": ("Only rank 2 linear ops are fused currently", RuntimeError),
"linear_rank4": ("Only rank 2 linear ops are fused currently", RuntimeError),
"linear_rank5": ("Only rank 2 linear ops are fused currently", RuntimeError),
"linear_bias": ("name 'int32' is not defined", NameError),
}


@parametrize("test_case", test_cases, dialect_xfails)
def test_dialect_linear(test_case):
tester = CortexMTester(test_case.model, test_case.example_inputs)
tester.test_dialect(
test_case.model.ops_before_transforms, test_case.model.ops_after_transforms
)


implementation_xfails = {
"mm": ("torch.mm ops are currently not quantized", RuntimeError),
"bmm": ("torch.bmm ops are currently not quantized", RuntimeError),
"addmm": ("torch.addmm ops are currently not quantized", RuntimeError),
"addmm_scalars": ("torch.addmm ops are currently not quantized", RuntimeError),
"matmul": ("torch.matmul ops are currently not quantized", RuntimeError),
"@-operator": ("@ ops are currently not quantized", RuntimeError),
"linear_rank1": ("Only rank 2 linear ops are fused currently", RuntimeError),
"linear_rank2_pos": ("Output 0 does not match reference output.", AssertionError),
"linear_rank3_neg": ("Only rank 2 linear ops are fused currently", RuntimeError),
"linear_rank4": ("Only rank 2 linear ops are fused currently", RuntimeError),
"linear_rank5": ("Only rank 2 linear ops are fused currently", RuntimeError),
"linear_bias": ("Output 0 does not match reference output.", AssertionError),
}


@parametrize("test_case", test_cases, implementation_xfails)
def test_implementation_linear(test_case):
tester = CortexMTester(test_case.model, test_case.example_inputs)
tester.test_implementation()
Loading
Loading