Arm backend: Fix annotation of inplace ReLU (#14540)

gggekov · web-flow · commit a747e4d5c340 · 2025-09-27T17:49:32.000+02:00
The ResNet18 model uses a lot of ReLUs with inplace=True As a result of
the correct annotation, we can pass the numerical accuracy check on
resnet with lower atol.
diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py
@@ -392,7 +392,11 @@ def any_or_hardtanh_min_zero(n: Node):
                 torch.ops.aten.conv2d.padding,
             ],
             [torch.ops.aten.batch_norm.default, F.batch_norm],
-            [torch.ops.aten.relu.default, torch.ops.aten.hardtanh.default],
+            [
+                torch.ops.aten.relu.default,
+                torch.ops.aten.relu_.default,
+                torch.ops.aten.hardtanh.default,
+            ],
         ],
         filter_fn=any_or_hardtanh_min_zero,
     ):
@@ -408,6 +412,7 @@ def any_or_hardtanh_min_zero(n: Node):
             ]
         elif node.target in (
             torch.ops.aten.relu.default,
+            torch.ops.aten.relu_.default,
             torch.ops.aten.hardtanh.default,
         ):
             quant_properties.quant_output = _QuantProperty(0, output_act_qspec)
@@ -444,7 +449,11 @@ def any_or_hardtanh_min_zero(n: Node):
                 torch.ops.aten.linear.default,
                 torch.ops.aten.conv2d.padding,
             ],
-            [torch.ops.aten.relu.default, torch.ops.aten.hardtanh.default],
+            [
+                torch.ops.aten.relu.default,
+                torch.ops.aten.relu_.default,
+                torch.ops.aten.hardtanh.default,
+            ],
         ],
         any_or_hardtanh_min_zero,
     ):
diff --git a/backends/arm/test/models/test_resnet18.py b/backends/arm/test/models/test_resnet18.py
@@ -54,7 +54,7 @@ def test_resnet_tosa_INT(per_channel_quantization):
         exir_op=[],
         use_to_edge_transform_and_lower=True,
         per_channel_quantization=per_channel_quantization,
-        atol=0.5,
+        atol=0.25,
         qtol=1,
     )
     pipeline.run()
diff --git a/backends/arm/test/ops/test_relu.py b/backends/arm/test/ops/test_relu.py
@@ -43,6 +43,28 @@ def forward(self, x):
         return self.relu(x)
 
 
+test_data_conv_relu = {
+    # (test_name, test_data)
+    "4d_randn_inplace=True": (lambda: (torch.randn(1, 64, 96, 96) * 1000, True)),
+    "4d_randn_inplace=False": (lambda: (torch.randn(1, 64, 96, 96) * 1000, False)),
+}
+
+
+class Conv2d_Relu_Add(torch.nn.Module):
+    def __init__(self, inplace: bool = True):
+        super().__init__()
+        self.conv1 = torch.nn.Conv2d(
+            in_channels=64, out_channels=64, kernel_size=7, padding="same"
+        )
+        self.relu = torch.nn.ReLU(inplace=inplace)
+
+    def forward(self, x: torch.Tensor):
+        y = self.conv1(x)
+        z = self.relu(y)
+        out = x + z
+        return out
+
+
 @common.parametrize("test_data", test_data_suite)
 def test_relu_tosa_FP(test_data: torch.Tensor):
     pipeline = TosaPipelineFP[input_t1](
@@ -54,6 +76,35 @@ def test_relu_tosa_FP(test_data: torch.Tensor):
     pipeline.run()
 
 
+# Test the folding of Conv2D with ReLU
+@common.parametrize("test_data", test_data_conv_relu)
+def test_conv_relu_folding_tosa_INT(test_data: torch.Tensor):
+    input_data, inplace = test_data()
+    pipeline = TosaPipelineINT[input_t1](
+        Conv2d_Relu_Add(inplace=inplace),
+        (input_data,),
+        [],
+        [],
+    )
+    # We should have :
+    # 3 quantize_per_tensor nodes: input activation , output of the conv-relu sequence, out of the add
+    # 4 dequantize_per_tensor nodes: into the conv2d input, into the add, output of the conv-relu sequence, before returning
+    # 2 dequantize_per_channel nodes: one for the weights and another one for the bias
+    # In case of incorrect annotation of the ReLU, we get separate Q/DR around both the conv2d and the ReLU and
+    # therefore more quantize_per_tensor and dequantize_per_tensor nodes
+    pipeline.add_stage_after(
+        "quantize",
+        pipeline.tester.check_count,
+        {
+            "quantized_decomposed.quantize_per_tensor.default": 3,
+            "torch.ops.quantized_decomposed.dequantize_per_tensor.default": 4,
+            "quantized_decomposed.dequantize_per_channel.default": 2,
+        },
+        suffix="quant_nodes",
+    )
+    pipeline.run()
+
+
 @common.parametrize("test_data", test_data_suite)
 def test_relu_tosa_INT(test_data: torch.Tensor):
     pipeline = TosaPipelineINT[input_t1](

Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@ def test_resnet_tosa_INT(per_channel_quantization):`
`54`	`54`	`exir_op=[],`
`55`	`55`	`use_to_edge_transform_and_lower=True,`
`56`	`56`	`per_channel_quantization=per_channel_quantization,`
`57`		`- atol=0.5,`
	`57`	`+ atol=0.25,`
`58`	`58`	`qtol=1,`
`59`	`59`	`)`
`60`	`60`	`pipeline.run()`