Merge branch 'main' into add_hardsigmoid_op

tom-arm · web-flow · commit 06cfdc954fa6 · 2025-01-31T13:31:42.000Z
diff --git a/backends/arm/README.md b/backends/arm/README.md
@@ -122,6 +122,18 @@ The you can run the tests with
 pytest -c /dev/null -v -n auto backends/arm/test --arm_run_corstoneFVP
 ```
 
+## Passes
+
+With the default passes in the Arm Ethos-U backend, assuming the model lowers fully to the
+Ethos-U, the exported program is composed of a Quantize node, Ethos-U custom delegate
+and a Dequantize node. In some circumstances, you may want to feed quantized input to the Neural
+Network straight away, e.g. if you have a camera sensor outputting (u)int8 data and keep all the
+arithmetic of the application in the int8 domain. For these cases, you can apply the
+`exir/passes/quantize_io_pass.py`. See the unit test in `executorch/backends/arm/
+test/passes/test_ioquantization_pass.py`for an example how to feed quantized inputs and
+obtain quantized outputs.
+
+
 ### Code coverage
 
 To get code coverage:
diff --git a/backends/arm/arm_vela.py b/backends/arm/arm_vela.py
@@ -96,13 +96,13 @@ def vela_compile(tosa_graph, args: List[str], shape_order=None):
                 block_name = block_name + b"\x00" * (16 - len(block_name))
 
                 # We need the acual unpadded block lengths for hw setup
-                block_length = struct.pack("<iiii", len(bin_blocks[key]), 0, 0, 0)  # type: ignore[assignment]
+                block_length_bytes = struct.pack("<iiii", len(bin_blocks[key]), 0, 0, 0)
 
                 # Pad block data to multiple of 16 bytes
                 block_data = bin_blocks[key]
                 block_data = block_data + b"\x00" * (15 - (len(block_data) - 1) % 16)
 
-                block = block_name + block_length + block_data  # type: ignore[operator]
+                block = block_name + block_length_bytes + block_data
                 blocks = blocks + block
 
         return blocks
diff --git a/backends/arm/operators/node_visitor.py b/backends/arm/operators/node_visitor.py
@@ -44,7 +44,7 @@ def define_node(
 
 
 # container for all node visitors
-_node_visitor_dicts = {  # type: ignore[var-annotated]
+_node_visitor_dicts: Dict[TosaSpecification, Dict] = {
     TosaSpecification.create_from_string("TOSA-0.80+BI"): {},
     TosaSpecification.create_from_string("TOSA-0.80+MI"): {},
 }
diff --git a/backends/arm/test/passes/test_ioquantization_pass.py b/backends/arm/test/passes/test_ioquantization_pass.py
@@ -0,0 +1,70 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+
+from executorch.backends.arm.test import common
+
+from executorch.backends.arm.test.tester.arm_tester import ArmTester
+from executorch.exir.passes.quantize_io_pass import QuantizeInputs, QuantizeOutputs
+
+
+class SimpleModel(torch.nn.Module):
+    def forward(self, x, y):
+        return x + y
+
+    def get_inputs(self):
+        a = torch.rand(1, 2, 2, 1)
+        b = torch.rand(1, 2, 2, 1)
+        return (a, b)
+
+
+class TestIOQuantizationPass(unittest.TestCase):
+    """
+    Test the executorch/exir/passes/quanize_io_pass pass works(meaning we don't get Q/DQ nodes) on a simple model
+    """
+
+    def test_ioquantisation_pass(self):
+        model = SimpleModel()
+        tester = (
+            ArmTester(
+                model,
+                example_inputs=model.get_inputs(),
+                compile_spec=common.get_u55_compile_spec(),
+            )
+            .quantize()
+            .export()
+            .to_edge()
+            .check_count(
+                {
+                    "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 3
+                }
+            )
+            .check_count(
+                {
+                    "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 3
+                }
+            )
+            .partition()
+            .check_count(
+                {
+                    "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 2
+                }
+            )
+            .check_count(
+                {
+                    "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 1
+                }
+            )
+        )
+        edge = tester.get_artifact()
+        edge.transform(
+            passes=[QuantizeInputs(edge, [0, 1]), QuantizeOutputs(edge, [0])]
+        )
+        tester.check_not(["edge__ops_quantized_decomposed_quantize_per_tensor"])
+        tester.check_not(["edge__ops_quantized_decomposed_dequantize_per_tensor"])
diff --git a/backends/arm/util/arm_model_evaluator.py b/backends/arm/util/arm_model_evaluator.py
@@ -59,7 +59,7 @@ def __init__(
         if tosa_output_path:
             self.tosa_output_path = tosa_output_path
         else:
-            self.tosa_output_path = None  # type: ignore[assignment]
+            self.tosa_output_path = ""
 
     def get_model_error(self) -> defaultdict:
         """
@@ -104,7 +104,7 @@ def get_compression_ratio(self) -> float:
 
         return compression_ratio
 
-    def evaluate(self) -> dict[Any]:  # type: ignore[type-arg]
+    def evaluate(self) -> dict[str, Any]:
         model_error_dict = self.get_model_error()
 
         output_metrics = {"name": self.model_name, "metrics": dict(model_error_dict)}

Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ def define_node(`
`44`	`44`
`45`	`45`
`46`	`46`	`# container for all node visitors`
`47`		`-_node_visitor_dicts = { # type: ignore[var-annotated]`
	`47`	`+_node_visitor_dicts: Dict[TosaSpecification, Dict] = {`
`48`	`48`	`TosaSpecification.create_from_string("TOSA-0.80+BI"): {},`
`49`	`49`	`TosaSpecification.create_from_string("TOSA-0.80+MI"): {},`
`50`	`50`	`}`