Arm backend: Fix shared qspec edge case (#11147)

AdrianLundell · web-flow · commit 8edf7ab467fd · 2025-05-27T21:19:27.000+02:00
Moving ops in the quantization annotator from _parent_shared_qspec to
_one_to_one_shared_input_or_input_act_qspec makes sure they are properly
annotated also when not preceeded by an annotated node. This removes the
need for symmetric_io_quantization=True in the unittests for these ops.

Signed-off-by: Adrian Lundell &lt;adrian.lundell@arm.com&gt;
diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py
@@ -237,9 +237,8 @@ def _match_pattern(
     torch.ops.aten.clamp.Tensor,
 ]
 
-# Operators that can inherit the quantization specs from its parent node
-# as SharedQuantizationSpec.
-_parent_shared_qspec = [
+_one_to_one_shared_input_or_input_act_qspec = [
+    torch.ops.aten.clone.default,
     torch.ops.aten.hardtanh.default,
     torch.ops.aten.hardtanh_.default,
     torch.ops.aten.relu.default,
@@ -254,11 +253,6 @@ def _match_pattern(
     torch.ops.aten.flatten.using_ints,
     torch.ops.aten.dropout.default,
     torch.ops.aten.dropout_.default,
-    torch.ops.aten.where,
-    operator.getitem,
-]
-
-_one_to_one_shared_input_or_input_act_qspec = [
     torch.ops.aten.adaptive_avg_pool2d.default,
     torch.ops.aten.alias_copy.default,
 ]
@@ -404,6 +398,9 @@ def any_or_hardtanh_min_zero(n: Node):
         ]
         quant_properties.quant_output = _QuantProperty(0, shared_qspec)  # type: ignore[arg-type]
     elif node.target in _one_to_one_shared_input_or_input_act_qspec:
+        if not isinstance(node.args[0], Node):
+            return None
+
         input_qspec = (
             SharedQuantizationSpec(node.args[0])  # type: ignore[arg-type]
             if is_output_annotated(node.args[0])  # type: ignore
@@ -458,19 +455,16 @@ def any_or_hardtanh_min_zero(n: Node):
             ),
         ]
         quant_properties.quant_output = None
-    elif node.target in _parent_shared_qspec:
-        if not isinstance(node.args[0], Node):
-            return None
-
-        if not is_output_annotated(node.args[0]):  # type: ignore[attr-defined]
+    elif node.target in [torch.ops.aten.scalar_tensor.default]:
+        quant_properties.quant_inputs = []
+        quant_properties.quant_output = _QuantProperty(0, output_act_qspec)
+    elif node.target in [operator.getitem]:
+        if not is_output_annotated(node.args[0]):  # type: ignore[attr-defined, arg-type]
             return None
 
-        shared_qspec = SharedQuantizationSpec(node.args[0])
+        shared_qspec = SharedQuantizationSpec(node.args[0])  # type: ignore[arg-type]
         quant_properties.quant_inputs = [_QuantProperty(0, shared_qspec)]  # type: ignore[arg-type]
         quant_properties.quant_output = _QuantProperty(0, shared_qspec)  # type: ignore[arg-type]
-    elif node.target in [torch.ops.aten.scalar_tensor.default]:
-        quant_properties.quant_inputs = []
-        quant_properties.quant_output = _QuantProperty(0, output_act_qspec)
     else:
         return None
 
diff --git a/backends/arm/test/ops/test_avg_pool2d.py b/backends/arm/test/ops/test_avg_pool2d.py
@@ -99,7 +99,6 @@ def test_avg_pool2d_tosa_BI(test_module):
         input_tensor,
         aten_op,
         exir_op,
-        symmetric_io_quantization=True,
         run_on_tosa_ref_model=conftest.is_option_enabled("tosa_ref_model"),
     )
     if conftest.is_option_enabled("tosa_ref_model"):
@@ -118,7 +117,6 @@ def test_avg_pool2d_u55_BI(test_module):
         aten_op,
         exir_op,
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
     pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1)
     pipeline.run()
@@ -135,7 +133,6 @@ def test_avg_pool2d_u85_BI(test_module):
         aten_op,
         exir_op,
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
     pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1)
 
@@ -164,7 +161,7 @@ def test_avg_pool2d_u85_BI(test_module):
 
 
 @common.parametrize("reject_module", reject_modules)
-def test_avg_pool2d_tosa_BI_not_delegated(reject_module):
+def test_avg_pool2d_u55_BI_not_delegated(reject_module):
 
     model, test_data = reject_module()
 
@@ -174,5 +171,6 @@ def test_avg_pool2d_tosa_BI_not_delegated(reject_module):
         non_delegated_ops={},
         n_expected_delegates=0,
         quantize=True,
+        u55_subset=True,
     )
     pipeline.run()
diff --git a/backends/arm/test/ops/test_clamp.py b/backends/arm/test/ops/test_clamp.py
@@ -77,7 +77,6 @@ def test_clamp_tosa_BI(test_data):
         (input_tensor,),
         aten_op,
         exir_op,
-        symmetric_io_quantization=True,
     )
     pipeline.change_args("run_method_and_compare_outputs", qtol=1)
 
@@ -97,7 +96,6 @@ def test_clamp_u55_BI(test_data):
         aten_op,
         exir_op,
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
 
     pipeline.change_args("run_method_and_compare_outputs", qtol=1)
@@ -117,7 +115,6 @@ def test_clamp_u85_BI(test_data):
         aten_op,
         exir_op,
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
     pipeline.change_args("run_method_and_compare_outputs", qtol=1)
 
diff --git a/backends/arm/test/ops/test_clone.py b/backends/arm/test/ops/test_clone.py
@@ -65,7 +65,6 @@ def test_clone_tosa_BI(test_data):
         test_data(),
         aten_op,
         exir_op,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -82,7 +81,6 @@ def test_clone_u55_BI(test_data):
         aten_op,
         exir_op,
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
 
     pipeline.run()
@@ -100,7 +98,6 @@ def test_clone_u85_BI(test_data):
         aten_op,
         exir_op,
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
 
     pipeline.run()
diff --git a/backends/arm/test/ops/test_hardtanh.py b/backends/arm/test/ops/test_hardtanh.py
@@ -58,7 +58,6 @@ def test_hardtanh_tosa_BI(test_data: torch.Tensor):
         (test_data(),),
         aten_op,
         exir_op,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -72,7 +71,6 @@ def test_hardtanh_u55_BI(test_data: torch.Tensor):
         aten_op,
         exir_op,
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -86,6 +84,5 @@ def test_hardtanh_u85_BI(test_data: torch.Tensor):
         aten_op,
         exir_op,
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
diff --git a/backends/arm/test/ops/test_max_pool.py b/backends/arm/test/ops/test_max_pool.py
@@ -74,7 +74,6 @@ def test_max_pool2d_tosa_BI(test_data: torch.Tensor):
         (test_data,),
         aten_op,
         exir_op,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -88,7 +87,6 @@ def test_max_pool2d_u55_BI(test_data: torch.Tensor):
         (test_data,),
         aten_op,
         exir_ops=[],
-        symmetric_io_quantization=True,
         run_on_fvp=True,
     ).run()
 
@@ -102,7 +100,6 @@ def test_max_pool2d_u85_BI(test_data: torch.Tensor):
         (test_data,),
         aten_op,
         exir_ops=[],
-        symmetric_io_quantization=True,
         run_on_fvp=True,
     ).run()
 
@@ -127,7 +124,6 @@ def test_max_pool2d_tosa_BI_mult_batches(test_data: torch.Tensor):
         (test_data,),
         aten_op,
         exir_op,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -145,7 +141,6 @@ def test_max_pool2d_u55_BI_mult_batches(test_data: torch.Tensor):
         aten_op,
         exir_ops=[],
         run_on_fvp=True,
-        symmetric_io_quantization=True,
         use_to_edge_transform_and_lower=True,
     ).run()
 
@@ -160,7 +155,6 @@ def test_max_pool2d_u85_BI_mult_batches(test_data: torch.Tensor):
         aten_op,
         exir_op,
         run_on_fvp=True,
-        symmetric_io_quantization=True,
         use_to_edge_transform_and_lower=True,
     ).run()
 
@@ -182,7 +176,6 @@ def test_max_pool2d_u55_BI_failure_set(test_data: Tuple):
         aten_op,
         exir_op,
         run_on_fvp=False,
-        symmetric_io_quantization=True,
         use_to_edge_transform_and_lower=True,
     )
     pipeline.pop_stage("check_count.exir")
diff --git a/backends/arm/test/ops/test_permute.py b/backends/arm/test/ops/test_permute.py
@@ -67,7 +67,6 @@ def test_permute_tosa_BI(test_data: torch.Tensor):
         (test_data,),
         aten_op,
         exir_op,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -88,7 +87,6 @@ def test_permute_u55_BI(test_data):
         aten_op,
         exir_ops="executorch_exir_dialects_edge__ops_aten_permute_copy_default",
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -104,6 +102,5 @@ def test_permute_u85_BI(test_data: torch.Tensor):
         aten_op,
         exir_ops="executorch_exir_dialects_edge__ops_aten_permute_copy_default",
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
diff --git a/backends/arm/test/ops/test_reciprocal.py b/backends/arm/test/ops/test_reciprocal.py
@@ -58,7 +58,6 @@ def test_reciprocal_tosa_BI(test_data: torch.Tensor):
         (test_data(),),
         aten_op,
         exir_op=[],
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -72,7 +71,6 @@ def test_reciprocal_u55_BI(test_data: torch.Tensor):
         aten_op,
         exir_ops=[],
         run_on_fvp=False,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
diff --git a/backends/arm/test/ops/test_relu.py b/backends/arm/test/ops/test_relu.py
@@ -60,7 +60,6 @@ def test_relu_tosa_BI(test_data: torch.Tensor):
         (test_data(),),
         aten_op,
         exir_op,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -73,7 +72,6 @@ def test_relu_u55_BI(test_data: torch.Tensor):
         aten_op,
         exir_op,
         run_on_fvp=False,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -86,6 +84,5 @@ def test_relu_u85_BI(test_data: torch.Tensor):
         aten_op,
         exir_op,
         run_on_fvp=False,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
diff --git a/backends/arm/test/ops/test_scalar_tensor.py b/backends/arm/test/ops/test_scalar_tensor.py
@@ -85,7 +85,6 @@ def test_scalar_tensor_u55_BI(test_data):
         ScalarTensor(scalar, dtype),
         tuple(data),
         ScalarTensor.aten_op,
-        symmetric_io_quantization=True,
         run_on_fvp=True,
     ).run()
 
@@ -98,6 +97,5 @@ def test_scalar_tensor_u85_BI(test_data):
         ScalarTensor(scalar, dtype),
         tuple(data),
         ScalarTensor.aten_op,
-        symmetric_io_quantization=True,
         run_on_fvp=True,
     ).run()
diff --git a/backends/arm/test/ops/test_var.py b/backends/arm/test/ops/test_var.py
@@ -175,7 +175,6 @@ def test_var_dim_tosa_BI_no_dim(test_data: Tuple):
         (test_data,),
         aten_op=[],
         exir_op=[],
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -190,7 +189,6 @@ def test_var_dim_u55_BI_no_dim(test_data: Tuple):
         aten_ops=[],
         exir_ops=[],
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -205,7 +203,6 @@ def test_var_dim_u85_BI_no_dim(test_data: Tuple):
         aten_ops=[],
         exir_ops=[],
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -231,7 +228,6 @@ def test_var_dim_tosa_BI(test_data: Tuple):
         (test_data,),
         aten_op=[],
         exir_op=[],
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -246,7 +242,6 @@ def test_var_dim_u55_BI(test_data: Tuple):
         aten_ops=[],
         exir_ops=[],
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -261,7 +256,6 @@ def test_var_dim_u85_BI(test_data: Tuple):
         aten_ops=[],
         exir_ops=[],
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -286,7 +280,6 @@ def test_var_dim_tosa_BI_correction(test_data: Tuple):
         (test_data,),
         aten_op=[],
         exir_op=[],
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -301,7 +294,6 @@ def test_var_dim_u55_BI_correction(test_data: Tuple):
         aten_ops=[],
         exir_ops=[],
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
     pipeline.run()
 
@@ -316,6 +308,5 @@ def test_var_dim_u85_BI_correction(test_data: Tuple):
         aten_ops=[],
         exir_ops=[],
         run_on_fvp=True,
-        symmetric_io_quantization=True,
     )
     pipeline.run()

Original file line number	Diff line number	Diff line change
`@@ -77,7 +77,6 @@ def test_clamp_tosa_BI(test_data):`
`77`	`77`	`(input_tensor,),`
`78`	`78`	`aten_op,`
`79`	`79`	`exir_op,`
`80`		`- symmetric_io_quantization=True,`
`81`	`80`	`)`
`82`	`81`	`pipeline.change_args("run_method_and_compare_outputs", qtol=1)`
`83`	`82`
`@@ -97,7 +96,6 @@ def test_clamp_u55_BI(test_data):`
`97`	`96`	`aten_op,`
`98`	`97`	`exir_op,`
`99`	`98`	`run_on_fvp=True,`
`100`		`- symmetric_io_quantization=True,`
`101`	`99`	`)`
`102`	`100`
`103`	`101`	`pipeline.change_args("run_method_and_compare_outputs", qtol=1)`
`@@ -117,7 +115,6 @@ def test_clamp_u85_BI(test_data):`
`117`	`115`	`aten_op,`
`118`	`116`	`exir_op,`
`119`	`117`	`run_on_fvp=True,`
`120`		`- symmetric_io_quantization=True,`
`121`	`118`	`)`
`122`	`119`	`pipeline.change_args("run_method_and_compare_outputs", qtol=1)`
`123`	`120`
Original file line number	Diff line number	Diff line change
`@@ -65,7 +65,6 @@ def test_clone_tosa_BI(test_data):`
`65`	`65`	`test_data(),`
`66`	`66`	`aten_op,`
`67`	`67`	`exir_op,`
`68`		`- symmetric_io_quantization=True,`
`69`	`68`	`)`
`70`	`69`	`pipeline.run()`
`71`	`70`
`@@ -82,7 +81,6 @@ def test_clone_u55_BI(test_data):`
`82`	`81`	`aten_op,`
`83`	`82`	`exir_op,`
`84`	`83`	`run_on_fvp=True,`
`85`		`- symmetric_io_quantization=True,`
`86`	`84`	`)`
`87`	`85`
`88`	`86`	`pipeline.run()`
`@@ -100,7 +98,6 @@ def test_clone_u85_BI(test_data):`
`100`	`98`	`aten_op,`
`101`	`99`	`exir_op,`
`102`	`100`	`run_on_fvp=True,`
`103`		`- symmetric_io_quantization=True,`
`104`	`101`	`)`
`105`	`102`
`106`	`103`	`pipeline.run()`
Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,6 @@ def test_hardtanh_tosa_BI(test_data: torch.Tensor):`
`58`	`58`	`(test_data(),),`
`59`	`59`	`aten_op,`
`60`	`60`	`exir_op,`
`61`		`- symmetric_io_quantization=True,`
`62`	`61`	`)`
`63`	`62`	`pipeline.run()`
`64`	`63`
`@@ -72,7 +71,6 @@ def test_hardtanh_u55_BI(test_data: torch.Tensor):`
`72`	`71`	`aten_op,`
`73`	`72`	`exir_op,`
`74`	`73`	`run_on_fvp=True,`
`75`		`- symmetric_io_quantization=True,`
`76`	`74`	`)`
`77`	`75`	`pipeline.run()`
`78`	`76`
`@@ -86,6 +84,5 @@ def test_hardtanh_u85_BI(test_data: torch.Tensor):`
`86`	`84`	`aten_op,`
`87`	`85`	`exir_op,`
`88`	`86`	`run_on_fvp=True,`
`89`		`- symmetric_io_quantization=True,`
`90`	`87`	`)`
`91`	`88`	`pipeline.run()`
Original file line number	Diff line number	Diff line change
`@@ -67,7 +67,6 @@ def test_permute_tosa_BI(test_data: torch.Tensor):`
`67`	`67`	`(test_data,),`
`68`	`68`	`aten_op,`
`69`	`69`	`exir_op,`
`70`		`- symmetric_io_quantization=True,`
`71`	`70`	`)`
`72`	`71`	`pipeline.run()`
`73`	`72`
`@@ -88,7 +87,6 @@ def test_permute_u55_BI(test_data):`
`88`	`87`	`aten_op,`
`89`	`88`	`exir_ops="executorch_exir_dialects_edge__ops_aten_permute_copy_default",`
`90`	`89`	`run_on_fvp=True,`
`91`		`- symmetric_io_quantization=True,`
`92`	`90`	`)`
`93`	`91`	`pipeline.run()`
`94`	`92`
`@@ -104,6 +102,5 @@ def test_permute_u85_BI(test_data: torch.Tensor):`
`104`	`102`	`aten_op,`
`105`	`103`	`exir_ops="executorch_exir_dialects_edge__ops_aten_permute_copy_default",`
`106`	`104`	`run_on_fvp=True,`
`107`		`- symmetric_io_quantization=True,`
`108`	`105`	`)`
`109`	`106`	`pipeline.run()`
Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,6 @@ def test_reciprocal_tosa_BI(test_data: torch.Tensor):`
`58`	`58`	`(test_data(),),`
`59`	`59`	`aten_op,`
`60`	`60`	`exir_op=[],`
`61`		`- symmetric_io_quantization=True,`
`62`	`61`	`)`
`63`	`62`	`pipeline.run()`
`64`	`63`
`@@ -72,7 +71,6 @@ def test_reciprocal_u55_BI(test_data: torch.Tensor):`
`72`	`71`	`aten_op,`
`73`	`72`	`exir_ops=[],`
`74`	`73`	`run_on_fvp=False,`
`75`		`- symmetric_io_quantization=True,`
`76`	`74`	`)`
`77`	`75`	`pipeline.run()`
`78`	`76`
Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,6 @@ def test_relu_tosa_BI(test_data: torch.Tensor):`
`60`	`60`	`(test_data(),),`
`61`	`61`	`aten_op,`
`62`	`62`	`exir_op,`
`63`		`- symmetric_io_quantization=True,`
`64`	`63`	`)`
`65`	`64`	`pipeline.run()`
`66`	`65`
`@@ -73,7 +72,6 @@ def test_relu_u55_BI(test_data: torch.Tensor):`
`73`	`72`	`aten_op,`
`74`	`73`	`exir_op,`
`75`	`74`	`run_on_fvp=False,`
`76`		`- symmetric_io_quantization=True,`
`77`	`75`	`)`
`78`	`76`	`pipeline.run()`
`79`	`77`
`@@ -86,6 +84,5 @@ def test_relu_u85_BI(test_data: torch.Tensor):`
`86`	`84`	`aten_op,`
`87`	`85`	`exir_op,`
`88`	`86`	`run_on_fvp=False,`
`89`		`- symmetric_io_quantization=True,`
`90`	`87`	`)`
`91`	`88`	`pipeline.run()`