Skip to content

Commit 06cfdc9

Browse files
authored
Merge branch 'main' into add_hardsigmoid_op
2 parents 67853c4 + 9d4e1ee commit 06cfdc9

File tree

5 files changed

+87
-5
lines changed

5 files changed

+87
-5
lines changed

backends/arm/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,18 @@ The you can run the tests with
122122
pytest -c /dev/null -v -n auto backends/arm/test --arm_run_corstoneFVP
123123
```
124124

125+
## Passes
126+
127+
With the default passes in the Arm Ethos-U backend, assuming the model lowers fully to the
128+
Ethos-U, the exported program is composed of a Quantize node, Ethos-U custom delegate
129+
and a Dequantize node. In some circumstances, you may want to feed quantized input to the Neural
130+
Network straight away, e.g. if you have a camera sensor outputting (u)int8 data and keep all the
131+
arithmetic of the application in the int8 domain. For these cases, you can apply the
132+
`exir/passes/quantize_io_pass.py`. See the unit test in `executorch/backends/arm/
133+
test/passes/test_ioquantization_pass.py`for an example how to feed quantized inputs and
134+
obtain quantized outputs.
135+
136+
125137
### Code coverage
126138

127139
To get code coverage:

backends/arm/arm_vela.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,13 @@ def vela_compile(tosa_graph, args: List[str], shape_order=None):
9696
block_name = block_name + b"\x00" * (16 - len(block_name))
9797

9898
# We need the acual unpadded block lengths for hw setup
99-
block_length = struct.pack("<iiii", len(bin_blocks[key]), 0, 0, 0) # type: ignore[assignment]
99+
block_length_bytes = struct.pack("<iiii", len(bin_blocks[key]), 0, 0, 0)
100100

101101
# Pad block data to multiple of 16 bytes
102102
block_data = bin_blocks[key]
103103
block_data = block_data + b"\x00" * (15 - (len(block_data) - 1) % 16)
104104

105-
block = block_name + block_length + block_data # type: ignore[operator]
105+
block = block_name + block_length_bytes + block_data
106106
blocks = blocks + block
107107

108108
return blocks

backends/arm/operators/node_visitor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def define_node(
4444

4545

4646
# container for all node visitors
47-
_node_visitor_dicts = { # type: ignore[var-annotated]
47+
_node_visitor_dicts: Dict[TosaSpecification, Dict] = {
4848
TosaSpecification.create_from_string("TOSA-0.80+BI"): {},
4949
TosaSpecification.create_from_string("TOSA-0.80+MI"): {},
5050
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import unittest
8+
9+
import torch
10+
11+
from executorch.backends.arm.test import common
12+
13+
from executorch.backends.arm.test.tester.arm_tester import ArmTester
14+
from executorch.exir.passes.quantize_io_pass import QuantizeInputs, QuantizeOutputs
15+
16+
17+
class SimpleModel(torch.nn.Module):
18+
def forward(self, x, y):
19+
return x + y
20+
21+
def get_inputs(self):
22+
a = torch.rand(1, 2, 2, 1)
23+
b = torch.rand(1, 2, 2, 1)
24+
return (a, b)
25+
26+
27+
class TestIOQuantizationPass(unittest.TestCase):
28+
"""
29+
Test the executorch/exir/passes/quanize_io_pass pass works(meaning we don't get Q/DQ nodes) on a simple model
30+
"""
31+
32+
def test_ioquantisation_pass(self):
33+
model = SimpleModel()
34+
tester = (
35+
ArmTester(
36+
model,
37+
example_inputs=model.get_inputs(),
38+
compile_spec=common.get_u55_compile_spec(),
39+
)
40+
.quantize()
41+
.export()
42+
.to_edge()
43+
.check_count(
44+
{
45+
"executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 3
46+
}
47+
)
48+
.check_count(
49+
{
50+
"executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 3
51+
}
52+
)
53+
.partition()
54+
.check_count(
55+
{
56+
"executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 2
57+
}
58+
)
59+
.check_count(
60+
{
61+
"executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 1
62+
}
63+
)
64+
)
65+
edge = tester.get_artifact()
66+
edge.transform(
67+
passes=[QuantizeInputs(edge, [0, 1]), QuantizeOutputs(edge, [0])]
68+
)
69+
tester.check_not(["edge__ops_quantized_decomposed_quantize_per_tensor"])
70+
tester.check_not(["edge__ops_quantized_decomposed_dequantize_per_tensor"])

backends/arm/util/arm_model_evaluator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def __init__(
5959
if tosa_output_path:
6060
self.tosa_output_path = tosa_output_path
6161
else:
62-
self.tosa_output_path = None # type: ignore[assignment]
62+
self.tosa_output_path = ""
6363

6464
def get_model_error(self) -> defaultdict:
6565
"""
@@ -104,7 +104,7 @@ def get_compression_ratio(self) -> float:
104104

105105
return compression_ratio
106106

107-
def evaluate(self) -> dict[Any]: # type: ignore[type-arg]
107+
def evaluate(self) -> dict[str, Any]:
108108
model_error_dict = self.get_model_error()
109109

110110
output_metrics = {"name": self.model_name, "metrics": dict(model_error_dict)}

0 commit comments

Comments
 (0)