Arm backend: Make per-channel quantization default

Martin Lindström · Martin Lindström · commit 8175bb3e2ca7 · 2025-06-24T08:42:05.000+02:00
Update the default quantization granularity to per channel rather than
per tensor.

Change-Id: I35d5c62741c7f93b916560874689245db96a588b
diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py
@@ -60,7 +60,7 @@
 
 @functools.lru_cache
 def get_symmetric_quantization_config(
-    is_per_channel: bool = False,
+    is_per_channel: bool = True,
     is_qat: bool = False,
     is_dynamic: bool = False,
     act_qmin: int = -128,
diff --git a/backends/arm/test/models/test_mobilenet_v2_arm.py b/backends/arm/test/models/test_mobilenet_v2_arm.py
@@ -46,7 +46,6 @@ def test_mv2_tosa_BI():
         aten_op=[],
         exir_op=[],
         use_to_edge_transform_and_lower=True,
-        per_channel_quantization=True,
         atol=0.25,
         qtol=1,
     )
@@ -63,7 +62,6 @@ def test_mv2_u55_BI():
         exir_ops=[],
         run_on_fvp=True,
         use_to_edge_transform_and_lower=True,
-        per_channel_quantization=True,
         atol=0.25,
         qtol=1,
     )
@@ -80,7 +78,6 @@ def test_mv2_u85_BI():
         exir_ops=[],
         run_on_fvp=True,
         use_to_edge_transform_and_lower=True,
-        per_channel_quantization=True,
         atol=0.25,
         qtol=1,
     )
diff --git a/backends/arm/test/tester/test_pipeline.py b/backends/arm/test/tester/test_pipeline.py
@@ -299,7 +299,7 @@ def __init__(
         run_on_tosa_ref_model: bool = True,
         tosa_version: str = "TOSA-0.80+BI",
         symmetric_io_quantization: bool = False,
-        per_channel_quantization: bool = False,
+        per_channel_quantization: bool = True,
         use_to_edge_transform_and_lower: bool = True,
         custom_path: str = None,
         atol: float = 1e-03,
@@ -316,16 +316,14 @@ def __init__(
         compile_spec = common.get_tosa_compile_spec(
             tosa_profiles[tosa_version], custom_path=custom_path
         )
-        if symmetric_io_quantization or per_channel_quantization:
-            quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
-            quantization_config = get_symmetric_quantization_config(
-                is_per_channel=per_channel_quantization
-            )
-            if symmetric_io_quantization:
-                quantizer.set_io(quantization_config)
-            quant_stage = Quantize(quantizer, quantization_config)
-        else:
-            quant_stage = None
+
+        quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
+        quantization_config = get_symmetric_quantization_config(
+            is_per_channel=per_channel_quantization
+        )
+        if symmetric_io_quantization:
+            quantizer.set_io(quantization_config)
+        quant_stage = Quantize(quantizer, quantization_config)
 
         super().__init__(
             module,
@@ -474,24 +472,21 @@ def __init__(
         exir_ops: Optional[str | List[str]] = None,
         run_on_fvp: bool = True,
         symmetric_io_quantization: bool = False,
-        per_channel_quantization: bool = False,
+        per_channel_quantization: bool = True,
         use_to_edge_transform_and_lower: bool = True,
         custom_path: str = None,
         atol: float = 1e-03,
         rtol: float = 1e-03,
         qtol: int = 1,
     ):
         compile_spec = common.get_u55_compile_spec(custom_path=custom_path)
-        if symmetric_io_quantization or per_channel_quantization:
-            quantizer = EthosUQuantizer(compile_spec)
-            quantization_config = get_symmetric_quantization_config(
-                is_per_channel=per_channel_quantization
-            )
-            if symmetric_io_quantization:
-                quantizer.set_io(quantization_config)
-            quant_stage = Quantize(quantizer, quantization_config)
-        else:
-            quant_stage = None
+        quantizer = EthosUQuantizer(compile_spec)
+        quantization_config = get_symmetric_quantization_config(
+            is_per_channel=per_channel_quantization
+        )
+        if symmetric_io_quantization:
+            quantizer.set_io(quantization_config)
+        quant_stage = Quantize(quantizer, quantization_config)
 
         super().__init__(
             module,
@@ -564,24 +559,21 @@ def __init__(
         exir_ops: str | List[str] = None,
         run_on_fvp: bool = True,
         symmetric_io_quantization: bool = False,
-        per_channel_quantization: bool = False,
+        per_channel_quantization: bool = True,
         use_to_edge_transform_and_lower: bool = True,
         custom_path: str = None,
         atol: float = 1e-03,
         rtol: float = 1e-03,
         qtol: int = 1,
     ):
         compile_spec = common.get_u85_compile_spec(custom_path=custom_path)
-        if symmetric_io_quantization or per_channel_quantization:
-            quantizer = EthosUQuantizer(compile_spec)
-            quantization_config = get_symmetric_quantization_config(
-                is_per_channel=per_channel_quantization
-            )
-            if symmetric_io_quantization:
-                quantizer.set_io(quantization_config)
-            quant_stage = Quantize(quantizer, quantization_config)
-        else:
-            quant_stage = None
+        quantizer = EthosUQuantizer(compile_spec)
+        quantization_config = get_symmetric_quantization_config(
+            is_per_channel=per_channel_quantization
+        )
+        if symmetric_io_quantization:
+            quantizer.set_io(quantization_config)
+        quant_stage = Quantize(quantizer, quantization_config)
 
         super().__init__(
             module,
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
@@ -160,8 +160,7 @@ def quantize(
     else:
         raise RuntimeError("Unsupported compilespecs for quantization!")
 
-    # if we set is_per_channel to True, we also need to add out_variant of quantize_per_channel/dequantize_per_channel
-    operator_config = get_symmetric_quantization_config(is_per_channel=False)
+    operator_config = get_symmetric_quantization_config()
     quantizer.set_global(operator_config)
     m = prepare_pt2e(model, quantizer)
 
diff --git a/examples/arm/ethos_u_minimal_example.ipynb b/examples/arm/ethos_u_minimal_example.ipynb
@@ -101,7 +101,7 @@
     "\n",
     "# Create and configure quantizer to use a symmetric quantization config globally on all nodes\n",
     "quantizer = EthosUQuantizer(compile_spec)\n",
-    "operator_config = get_symmetric_quantization_config(is_per_channel=False)\n",
+    "operator_config = get_symmetric_quantization_config(is_per_channel=True)\n",
     "quantizer.set_global(operator_config)\n",
     "\n",
     "# Post training quantization\n",