Adjust tolerance for fp16 exp op to handle reasonable calculation discrepancies

leafs1 · leafs1 · commit 2ab2b4e37bda · 2025-07-02T17:42:46.000-07:00
diff --git a/backends/xnnpack/test/ops/test_exp.py b/backends/xnnpack/test/ops/test_exp.py
@@ -10,6 +10,23 @@
 from executorch.backends.xnnpack.test.tester import Tester
 
 
+def calculate_fp16_exp_tolerance(ref_output_tensor):
+    # Calculate mixed tolerance for float16 used in XNNPACK's float16 policy
+    fp16_epsilon = 9.77e-4
+    abs_tol = 2 * fp16_epsilon
+    rel_tol = 6 * fp16_epsilon
+
+    ref_abs = ref_output_tensor.abs()
+    mixed_tol = torch.maximum(
+        torch.full_like(ref_abs, abs_tol),
+        ref_abs * rel_tol,
+    )
+
+    final_atol = mixed_tol.max().item()
+
+    return final_atol, rel_tol
+
+
 class TestExp(unittest.TestCase):
     def setUp(self):
         torch._dynamo.reset()
@@ -22,6 +39,16 @@ def forward(self, x):
             return torch.exp(x)
 
     def run_exp_test(self, inputs):
+        input_tensor = inputs[0]
+
+        if input_tensor.dtype == torch.float16:
+            with torch.no_grad():
+                ref_output = torch.exp(input_tensor.to(torch.float32)).to(torch.float16)
+            atol, rtol = calculate_fp16_exp_tolerance(ref_output)
+        else:
+            atol = 1e-03
+            rtol = 1e-03
+
         (
             Tester(self.Exp(), inputs)
             .export()
@@ -31,12 +58,9 @@ def run_exp_test(self, inputs):
             .check_not(["executorch_exir_dialects_edge__ops_aten_exp_default"])
             .to_executorch()
             .serialize()
-            .run_method_and_compare_outputs()
+            .run_method_and_compare_outputs(atol=atol, rtol=rtol)
         )
 
-    # TODO (leafs1): Fix flaky tests. Land fix asap
-    # and cherry-pick onto release/0.7 branch
-    @unittest.skip(reason="For float16, numerical discepancies are too high")
     def test_fp16_exp(self):
         inputs = (torch.randn(20).to(torch.float16),)
         self.run_exp_test(inputs)
diff --git a/backends/xnnpack/test/ops/test_gelu.py b/backends/xnnpack/test/ops/test_gelu.py
@@ -10,6 +10,21 @@
 from executorch.backends.xnnpack.test.tester import Tester
 
 
+def calculate_fp16_gelu_tolerance(ref_output_tensor):
+    fp16_epsilon = 9.77e-4
+    abs_tol = 2 * fp16_epsilon
+    rel_tol = 6 * fp16_epsilon
+
+    ref_abs = ref_output_tensor.abs()
+    mixed_tol = torch.maximum(
+        torch.full_like(ref_abs, abs_tol),
+        ref_abs * rel_tol,
+    )
+
+    final_atol = mixed_tol.max().item()
+    return final_atol, rel_tol
+
+
 class TestGelu(unittest.TestCase):
     def setUp(self):
         torch._dynamo.reset()
@@ -23,6 +38,18 @@ def forward(self, x):
             return self.gelu(x)
 
     def run_gelu_test(self, inputs):
+        input_tensor = inputs[0]
+
+        if input_tensor.dtype == torch.float16:
+            with torch.no_grad():
+                ref_output = torch.nn.functional.gelu(
+                    input_tensor.to(torch.float32)
+                ).to(torch.float16)
+            atol, rtol = calculate_fp16_gelu_tolerance(ref_output)
+        else:
+            atol = 1e-03
+            rtol = 1e-03
+
         (
             Tester(self.Gelu(), inputs)
             .export()
@@ -32,7 +59,7 @@ def run_gelu_test(self, inputs):
             .check_not(["executorch_exir_dialects_edge__ops_aten_gelu_default"])
             .to_executorch()
             .serialize()
-            .run_method_and_compare_outputs()
+            .run_method_and_compare_outputs(atol=atol, rtol=rtol)
         )
 
     def test_fp16_gelu(self):
diff --git a/test.json b/test.json
@@ -0,0 +1 @@
+{"version": "0", "xnodes": [{"xnode_union": {"input_id": 0, "output_id": 1, "flags": 0}, "xnode_union_type": "XNNConvert", "debug_handle": 14, "output_min_max": null}, {"xnode_union": {"num_dims": 4, "perm": [0, 2, 3, 1], "input_id": 1, "output_id": 2, "flags": 0}, "xnode_union_type": "XNNStaticTranspose", "debug_handle": 65535, "output_min_max": null}, {"xnode_union": {"padding_top": 0, "padding_right": 0, "padding_bottom": 0, "padding_left": 0, "kernel_height": 3, "kernel_width": 3, "subsampling_height": 1, "subsampling_width": 1, "dilation_height": 1, "dilation_width": 1, "group_input_channels": 16, "group_output_channels": 16, "groups": 1, "adjustment_height": 0, "adjustment_width": 0, "input1_id": 2, "filter_id": 3, "bias_id": 5, "output_id": 4, "flags": 0}, "xnode_union_type": "XNNConv2d", "debug_handle": 16, "output_min_max": null}, {"xnode_union": {"num_dims": 4, "perm": [0, 3, 1, 2], "input_id": 4, "output_id": 6, "flags": 0}, "xnode_union_type": "XNNStaticTranspose", "debug_handle": 65535, "output_min_max": null}, {"xnode_union": {"input_id": 6, "output_id": 7, "flags": 0}, "xnode_union_type": "XNNConvert", "debug_handle": 65535, "output_min_max": null}], "xvalues": [{"xvalue_union": {"datatype": 1, "num_dims": 4, "dims": [1, 16, 30, 30], "constant_buffer_idx": 0, "external_id": 0, "flags": 1, "id_out": 0}, "xvalue_union_type": "XNNTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [1, 16, 30, 30], "constant_buffer_idx": 0, "external_id": 4294967295, "flags": 0, "id_out": 1}, "quant_params": {"scale": 0.019131116569042206, "zero_point": -5}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [1, 30, 30, 16], "constant_buffer_idx": 0, "external_id": 4294967295, "flags": 0, "id_out": 2}, "quant_params": {"scale": 0.019131116569042206, "zero_point": -5}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [16, 3, 3, 16], "constant_buffer_idx": 1, "external_id": 4294967295, "flags": 0, "id_out": 3}, "quant_params": {"scale": 0.0006561384652741253, "zero_point": 0}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [1, 28, 28, 16], "constant_buffer_idx": 0, "external_id": 4294967295, "flags": 0, "id_out": 4}, "quant_params": {"scale": 0.010861996561288834, "zero_point": -3}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 5, "num_dims": 1, "dims": [16], "constant_buffer_idx": 2, "external_id": 4294967295, "flags": 0, "id_out": 5}, "quant_params": {"scale": 1.2552661464591743e-05, "zero_point": 0}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [1, 16, 28, 28], "constant_buffer_idx": 0, "external_id": 4294967295, "flags": 0, "id_out": 6}, "quant_params": {"scale": 0.010861996561288834, "zero_point": -3}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"datatype": 1, "num_dims": 4, "dims": [1, 16, 28, 28], "constant_buffer_idx": 0, "external_id": 1, "flags": 2, "id_out": 7}, "xvalue_union_type": "XNNTensorValue"}], "num_externs": 2, "input_ids": [0], "output_ids": [7], "constant_data": [{"offset": 0, "size": 0, "named_key": ""}, {"offset": 18446744073709551615, "size": 2304, "named_key": "0e117c3acc2f830d49e09083fbff890df96cd155674dd972b96c069044d0855b"}, {"offset": 18446744073709551615, "size": 64, "named_key": "d36077ae704360321d7b072ff740bfa5b23546d28621f83c8ab0ae9f97fe0d22"}]}

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+{"version": "0", "xnodes": [{"xnode_union": {"input_id": 0, "output_id": 1, "flags": 0}, "xnode_union_type": "XNNConvert", "debug_handle": 14, "output_min_max": null}, {"xnode_union": {"num_dims": 4, "perm": [0, 2, 3, 1], "input_id": 1, "output_id": 2, "flags": 0}, "xnode_union_type": "XNNStaticTranspose", "debug_handle": 65535, "output_min_max": null}, {"xnode_union": {"padding_top": 0, "padding_right": 0, "padding_bottom": 0, "padding_left": 0, "kernel_height": 3, "kernel_width": 3, "subsampling_height": 1, "subsampling_width": 1, "dilation_height": 1, "dilation_width": 1, "group_input_channels": 16, "group_output_channels": 16, "groups": 1, "adjustment_height": 0, "adjustment_width": 0, "input1_id": 2, "filter_id": 3, "bias_id": 5, "output_id": 4, "flags": 0}, "xnode_union_type": "XNNConv2d", "debug_handle": 16, "output_min_max": null}, {"xnode_union": {"num_dims": 4, "perm": [0, 3, 1, 2], "input_id": 4, "output_id": 6, "flags": 0}, "xnode_union_type": "XNNStaticTranspose", "debug_handle": 65535, "output_min_max": null}, {"xnode_union": {"input_id": 6, "output_id": 7, "flags": 0}, "xnode_union_type": "XNNConvert", "debug_handle": 65535, "output_min_max": null}], "xvalues": [{"xvalue_union": {"datatype": 1, "num_dims": 4, "dims": [1, 16, 30, 30], "constant_buffer_idx": 0, "external_id": 0, "flags": 1, "id_out": 0}, "xvalue_union_type": "XNNTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [1, 16, 30, 30], "constant_buffer_idx": 0, "external_id": 4294967295, "flags": 0, "id_out": 1}, "quant_params": {"scale": 0.019131116569042206, "zero_point": -5}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [1, 30, 30, 16], "constant_buffer_idx": 0, "external_id": 4294967295, "flags": 0, "id_out": 2}, "quant_params": {"scale": 0.019131116569042206, "zero_point": -5}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [16, 3, 3, 16], "constant_buffer_idx": 1, "external_id": 4294967295, "flags": 0, "id_out": 3}, "quant_params": {"scale": 0.0006561384652741253, "zero_point": 0}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [1, 28, 28, 16], "constant_buffer_idx": 0, "external_id": 4294967295, "flags": 0, "id_out": 4}, "quant_params": {"scale": 0.010861996561288834, "zero_point": -3}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 5, "num_dims": 1, "dims": [16], "constant_buffer_idx": 2, "external_id": 4294967295, "flags": 0, "id_out": 5}, "quant_params": {"scale": 1.2552661464591743e-05, "zero_point": 0}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [1, 16, 28, 28], "constant_buffer_idx": 0, "external_id": 4294967295, "flags": 0, "id_out": 6}, "quant_params": {"scale": 0.010861996561288834, "zero_point": -3}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"datatype": 1, "num_dims": 4, "dims": [1, 16, 28, 28], "constant_buffer_idx": 0, "external_id": 1, "flags": 2, "id_out": 7}, "xvalue_union_type": "XNNTensorValue"}], "num_externs": 2, "input_ids": [0], "output_ids": [7], "constant_data": [{"offset": 0, "size": 0, "named_key": ""}, {"offset": 18446744073709551615, "size": 2304, "named_key": "0e117c3acc2f830d49e09083fbff890df96cd155674dd972b96c069044d0855b"}, {"offset": 18446744073709551615, "size": 64, "named_key": "d36077ae704360321d7b072ff740bfa5b23546d28621f83c8ab0ae9f97fe0d22"}]}