Arm backend: Improve quantizer configuration in tests (pytorch#16073)

Erik-Lundell · web-flow · commit 4d7e3ee3805f · 2025-12-04T16:48:47.000+01:00
Updating the quantizer in test pipelines has been rather cumbersome. As
we anticipate more tests with different quantization, we want to make it
easy.

The quantizer can now be accessed, and modified using
pipeline.quantizer.set_[...]

See the patch for examples.

Signed-off-by: Erik Lundell &lt;erik.lundell@arm.com&gt;
diff --git a/backends/arm/test/misc/test_quant_custom_meta.py b/backends/arm/test/misc/test_quant_custom_meta.py
@@ -5,13 +5,7 @@
 
 import pytest
 import torch
-from executorch.backends.arm.quantizer import (
-    get_symmetric_quantization_config,
-    TOSAQuantizer,
-)
 from executorch.backends.arm.test.tester.test_pipeline import TosaPipelineINT
-from executorch.backends.arm.tosa import TosaSpecification
-from executorch.backends.xnnpack.test.tester import Quantize
 
 
 class AddSigmoidMul(torch.nn.Module):
@@ -23,15 +17,6 @@ def forward(self, x, y):
         return self.sigmoid(x + y) * x
 
 
-def get_selective_quantizer(modules):
-    quantizer = TOSAQuantizer(TosaSpecification.create_from_string("TOSA-1.0+INT"))
-    quantizer.set_global(get_symmetric_quantization_config())
-    for module in modules:
-        quantizer.set_module_type(module, None)
-
-    return Quantize(quantizer, get_symmetric_quantization_config())
-
-
 @pytest.mark.parametrize("fp_extension", [True, False])
 def test_qdq_squeezed_fp_op(fp_extension: bool):
     """Test that a float operation surrounded by quantize-dequantize pairs
@@ -52,7 +37,7 @@ def test_qdq_squeezed_fp_op(fp_extension: bool):
         exir_op=exir_op,
         tosa_extensions=["FP"] if fp_extension else None,
     )
-    pipeline.change_args("quantize", get_selective_quantizer([torch.nn.Sigmoid]))
+    pipeline.quantizer.set_module_type(torch.nn.Sigmoid, None)  # type: ignore
 
     if not fp_extension:
         # In case we don't have the FP extension, the unquantized part of the
@@ -114,7 +99,7 @@ def test_quantized_to_float_transition(fp_extension: bool):
                 "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 2,
             },
         )
-    pipeline.change_args(
-        "quantize", get_selective_quantizer([torch.nn.Sigmoid, torch.nn.Conv1d])
-    )
+    pipeline.quantizer.set_module_type(torch.nn.Sigmoid, None)  # type: ignore
+    pipeline.quantizer.set_module_type(torch.nn.Conv1d, None)  # type: ignore
+
     pipeline.run()
diff --git a/backends/arm/test/models/test_lstm_arm.py b/backends/arm/test/models/test_lstm_arm.py
@@ -9,10 +9,9 @@
 import torch
 from executorch.backends.arm.quantizer.arm_quantizer import (
     get_symmetric_a16w8_quantization_config,
-    TOSAQuantizer,
 )
 
-from executorch.backends.arm.test import common, conftest
+from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
     EthosU85PipelineINT,
@@ -21,9 +20,6 @@
     VgfPipeline,
 )
 
-from executorch.backends.arm.tosa import TosaSpecification
-from executorch.backends.xnnpack.test.tester import Quantize
-
 from torch.nn.quantizable.modules import rnn
 
 input_t = Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]  #  (h0, c0)
@@ -144,27 +140,6 @@ def test_lstm_vgf_FP():
     pipeline.run()
 
 
-def get_symmetric_a16w8_lstm_quantizer(per_channel_quantization=False):
-    tosa_version = conftest.get_option("tosa_version")
-    tosa_profiles = {
-        "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT+int16"),
-    }
-
-    quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
-    quantizer.set_global(
-        get_symmetric_a16w8_quantization_config(
-            is_per_channel=per_channel_quantization, epsilon=2**-16
-        )
-    )
-
-    return Quantize(
-        quantizer,
-        get_symmetric_a16w8_quantization_config(
-            is_per_channel=per_channel_quantization, epsilon=2**-16
-        ),
-    )
-
-
 def test_lstm_16a8w_tosa_INT():
     """Test LSTM model with 16A8W quantization (16-bit activations, 8-bit weights)"""
 
@@ -177,8 +152,9 @@ def test_lstm_16a8w_tosa_INT():
         use_to_edge_transform_and_lower=True,
         tosa_extensions=["int16"],
     )
-
-    pipeline.change_args("quantize", get_symmetric_a16w8_lstm_quantizer())
+    pipeline.quantizer.set_global(
+        get_symmetric_a16w8_quantization_config(is_per_channel=False, epsilon=2**-16)
+    )
     pipeline.run()
 
 
@@ -195,7 +171,10 @@ def test_lstm_16a8w_u55_INT():
         use_to_edge_transform_and_lower=True,
     )
 
-    pipeline.change_args("quantize", get_symmetric_a16w8_lstm_quantizer())
+    pipeline.quantizer.set_global(
+        get_symmetric_a16w8_quantization_config(is_per_channel=False, epsilon=2**-16)
+    )
+
     pipeline.run()
 
 
@@ -208,5 +187,8 @@ def test_lstm_16a8w_u85_INT():
         exir_ops=[],
         use_to_edge_transform_and_lower=True,
     )
-    pipeline.change_args("quantize", get_symmetric_a16w8_lstm_quantizer())
+    pipeline.quantizer.set_global(
+        get_symmetric_a16w8_quantization_config(is_per_channel=False, epsilon=2**-16)
+    )
+
     pipeline.run()
diff --git a/backends/arm/test/ops/test_add.py b/backends/arm/test/ops/test_add.py
@@ -5,24 +5,21 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import cast, Tuple
+from typing import Tuple
 
 import torch
 from executorch.backends.arm.quantizer import arm_quantizer
 from executorch.backends.arm.quantizer.arm_quantizer import (
     get_symmetric_a16w8_quantization_config,
-    TOSAQuantizer,
 )
-from executorch.backends.arm.test import common, conftest
+from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
     EthosU85PipelineINT,
     TosaPipelineFP,
     TosaPipelineINT,
     VgfPipeline,
 )
-from executorch.backends.arm.tosa import TosaSpecification
-from executorch.backends.xnnpack.test.tester import Quantize
 from torchao.quantization.pt2e import HistogramObserver
 from torchao.quantization.pt2e.quantizer import QuantizationSpec
 
@@ -101,14 +98,8 @@ def test_add_tensor_tosa_INT(test_data: input_t1):
 @common.parametrize("test_data", Add.test_data)
 def test_add_tensor_tosa_INT_i32(test_data: input_t1):
     pipeline = TosaPipelineINT[input_t1](Add(), test_data(), aten_op, exir_op)
-    tosa_version = cast(str, conftest.get_option("tosa_version"))
-    tosa_profiles = {
-        "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT"),
-    }
-    # Create a  quantizer with int8 quantization on the input and output but int32 on everything else.
-    quantizer = arm_quantizer.TOSAQuantizer(tosa_profiles[tosa_version])
 
-    quantizer.set_io(arm_quantizer.get_symmetric_quantization_config())
+    pipeline.quantizer.set_io(arm_quantizer.get_symmetric_quantization_config())
     observer_options = {"eps": 2**-16}
     observer = HistogramObserver.with_args(**observer_options)
     input_act_qspec = QuantizationSpec(
@@ -125,12 +116,10 @@ def test_add_tensor_tosa_INT_i32(test_data: input_t1):
         quant_max=2**31 - 1,
         quant_min=-(2**31),
     )
-    # This quantization_config will be set as global config.
     quantization_config = arm_quantizer.QuantizationConfig(
         input_act_qspec, output_act_qspec, None, None
     )
-    quantize_stage = Quantize(quantizer, quantization_config)
-    pipeline.change_args("quantize", quantize_stage)
+    pipeline.quantizer.set_global(quantization_config)
 
     # Check that we get the additional (dq -> q
     pipeline.add_stage_after(
@@ -239,25 +228,6 @@ def test_add_tensor_vgf_INT(test_data: input_t1):
     pipeline.run()
 
 
-def get_symmetric_a16w8_add_quantizer(per_channel_quantization=False):
-    tosa_version = conftest.get_option("tosa_version")
-    tosa_profiles = {
-        "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT+int16"),
-    }
-
-    quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
-    quantizer.set_global(
-        get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
-    )
-
-    return Quantize(
-        quantizer,
-        get_symmetric_a16w8_quantization_config(
-            is_per_channel=per_channel_quantization
-        ),
-    )
-
-
 @common.parametrize("test_data", Add.test_data)
 def test_add_tensor_16a8w_tosa_INT(test_data: input_t1):
     """Test add operation with 16A8W quantization (16-bit activations, 8-bit weights)"""
@@ -273,11 +243,8 @@ def test_add_tensor_16a8w_tosa_INT(test_data: input_t1):
         tosa_extensions=["int16"],
     )
 
-    pipeline.change_args(
-        "quantize",
-        get_symmetric_a16w8_add_quantizer(
-            per_channel_quantization=per_channel_quantization
-        ),
+    pipeline.quantizer.set_global(
+        get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
     )
     pipeline.run()
 
@@ -297,11 +264,8 @@ def test_add_tensor_16a8w_u55_INT16(test_data: input_t1):
         use_to_edge_transform_and_lower=True,
     )
 
-    pipeline.change_args(
-        "quantize",
-        get_symmetric_a16w8_add_quantizer(
-            per_channel_quantization=per_channel_quantization
-        ),
+    pipeline.quantizer.set_global(
+        get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
     )
     pipeline.run()
 
@@ -321,10 +285,7 @@ def test_add_tensor_16a8w_u85_INT16(test_data: input_t1):
         use_to_edge_transform_and_lower=True,
     )
 
-    pipeline.change_args(
-        "quantize",
-        get_symmetric_a16w8_add_quantizer(
-            per_channel_quantization=per_channel_quantization
-        ),
+    pipeline.quantizer.set_global(
+        get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
     )
     pipeline.run()
diff --git a/backends/arm/test/ops/test_cat.py b/backends/arm/test/ops/test_cat.py
@@ -11,9 +11,8 @@
 import torch
 from executorch.backends.arm.quantizer.arm_quantizer import (
     get_symmetric_a16w8_quantization_config,
-    TOSAQuantizer,
 )
-from executorch.backends.arm.test import common, conftest
+from executorch.backends.arm.test import common
 
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
@@ -22,8 +21,6 @@
     TosaPipelineINT,
     VgfPipeline,
 )
-from executorch.backends.arm.tosa.specification import TosaSpecification
-from executorch.backends.xnnpack.test.tester import Quantize
 
 input_t1 = Tuple[torch.Tensor]  # Input x
 
@@ -157,25 +154,6 @@ def test_cat_vgf_INT(test_data: Tuple):
     pipeline.run()
 
 
-def get_symmetric_a16w8_cat_quantizer(per_channel_quantization=False):
-    tosa_version = conftest.get_option("tosa_version")
-    tosa_profiles = {
-        "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT+int16"),
-    }
-
-    quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
-    quantizer.set_global(
-        get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
-    )
-
-    return Quantize(
-        quantizer,
-        get_symmetric_a16w8_quantization_config(
-            is_per_channel=per_channel_quantization
-        ),
-    )
-
-
 @common.parametrize("test_data", Cat.test_parameters)
 def test_cat_16a8w_tosa_INT(test_data: Tuple):
     """Test cat operation with 16A8W quantization (16-bit activations, 8-bit weights)"""
@@ -190,12 +168,8 @@ def test_cat_16a8w_tosa_INT(test_data: Tuple):
         use_to_edge_transform_and_lower=True,
         tosa_extensions=["int16"],
     )
-
-    pipeline.change_args(
-        "quantize",
-        get_symmetric_a16w8_cat_quantizer(
-            per_channel_quantization=per_channel_quantization
-        ),
+    pipeline.quantizer.set_global(
+        get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
     )
     pipeline.run()
 
@@ -214,13 +188,10 @@ def test_cat_16a8w_u55_INT16(test_data: Tuple):
         per_channel_quantization=per_channel_quantization,
         use_to_edge_transform_and_lower=True,
     )
-
-    pipeline.change_args(
-        "quantize",
-        get_symmetric_a16w8_cat_quantizer(
-            per_channel_quantization=per_channel_quantization
-        ),
+    pipeline.quantizer.set_global(
+        get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
     )
+
     pipeline.run()
 
 
@@ -238,11 +209,7 @@ def test_cat_16a8w_u85_INT16(test_data: Tuple):
         per_channel_quantization=per_channel_quantization,
         use_to_edge_transform_and_lower=True,
     )
-
-    pipeline.change_args(
-        "quantize",
-        get_symmetric_a16w8_cat_quantizer(
-            per_channel_quantization=per_channel_quantization
-        ),
+    pipeline.quantizer.set_global(
+        get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
     )
     pipeline.run()
diff --git a/backends/arm/test/ops/test_sigmoid_32bit.py b/backends/arm/test/ops/test_sigmoid_32bit.py
diff --git a/backends/arm/test/tester/test_pipeline.py b/backends/arm/test/tester/test_pipeline.py