Skip to content

Commit 2ab2b4e

Browse files
committed
Adjust tolerance for fp16 exp op to handle reasonable calculation discrepancies
1 parent 75d4b2e commit 2ab2b4e

File tree

3 files changed

+57
-5
lines changed

3 files changed

+57
-5
lines changed

backends/xnnpack/test/ops/test_exp.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,23 @@
1010
from executorch.backends.xnnpack.test.tester import Tester
1111

1212

13+
def calculate_fp16_exp_tolerance(ref_output_tensor):
14+
# Calculate mixed tolerance for float16 used in XNNPACK's float16 policy
15+
fp16_epsilon = 9.77e-4
16+
abs_tol = 2 * fp16_epsilon
17+
rel_tol = 6 * fp16_epsilon
18+
19+
ref_abs = ref_output_tensor.abs()
20+
mixed_tol = torch.maximum(
21+
torch.full_like(ref_abs, abs_tol),
22+
ref_abs * rel_tol,
23+
)
24+
25+
final_atol = mixed_tol.max().item()
26+
27+
return final_atol, rel_tol
28+
29+
1330
class TestExp(unittest.TestCase):
1431
def setUp(self):
1532
torch._dynamo.reset()
@@ -22,6 +39,16 @@ def forward(self, x):
2239
return torch.exp(x)
2340

2441
def run_exp_test(self, inputs):
42+
input_tensor = inputs[0]
43+
44+
if input_tensor.dtype == torch.float16:
45+
with torch.no_grad():
46+
ref_output = torch.exp(input_tensor.to(torch.float32)).to(torch.float16)
47+
atol, rtol = calculate_fp16_exp_tolerance(ref_output)
48+
else:
49+
atol = 1e-03
50+
rtol = 1e-03
51+
2552
(
2653
Tester(self.Exp(), inputs)
2754
.export()
@@ -31,12 +58,9 @@ def run_exp_test(self, inputs):
3158
.check_not(["executorch_exir_dialects_edge__ops_aten_exp_default"])
3259
.to_executorch()
3360
.serialize()
34-
.run_method_and_compare_outputs()
61+
.run_method_and_compare_outputs(atol=atol, rtol=rtol)
3562
)
3663

37-
# TODO (leafs1): Fix flaky tests. Land fix asap
38-
# and cherry-pick onto release/0.7 branch
39-
@unittest.skip(reason="For float16, numerical discepancies are too high")
4064
def test_fp16_exp(self):
4165
inputs = (torch.randn(20).to(torch.float16),)
4266
self.run_exp_test(inputs)

backends/xnnpack/test/ops/test_gelu.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,21 @@
1010
from executorch.backends.xnnpack.test.tester import Tester
1111

1212

13+
def calculate_fp16_gelu_tolerance(ref_output_tensor):
14+
fp16_epsilon = 9.77e-4
15+
abs_tol = 2 * fp16_epsilon
16+
rel_tol = 6 * fp16_epsilon
17+
18+
ref_abs = ref_output_tensor.abs()
19+
mixed_tol = torch.maximum(
20+
torch.full_like(ref_abs, abs_tol),
21+
ref_abs * rel_tol,
22+
)
23+
24+
final_atol = mixed_tol.max().item()
25+
return final_atol, rel_tol
26+
27+
1328
class TestGelu(unittest.TestCase):
1429
def setUp(self):
1530
torch._dynamo.reset()
@@ -23,6 +38,18 @@ def forward(self, x):
2338
return self.gelu(x)
2439

2540
def run_gelu_test(self, inputs):
41+
input_tensor = inputs[0]
42+
43+
if input_tensor.dtype == torch.float16:
44+
with torch.no_grad():
45+
ref_output = torch.nn.functional.gelu(
46+
input_tensor.to(torch.float32)
47+
).to(torch.float16)
48+
atol, rtol = calculate_fp16_gelu_tolerance(ref_output)
49+
else:
50+
atol = 1e-03
51+
rtol = 1e-03
52+
2653
(
2754
Tester(self.Gelu(), inputs)
2855
.export()
@@ -32,7 +59,7 @@ def run_gelu_test(self, inputs):
3259
.check_not(["executorch_exir_dialects_edge__ops_aten_gelu_default"])
3360
.to_executorch()
3461
.serialize()
35-
.run_method_and_compare_outputs()
62+
.run_method_and_compare_outputs(atol=atol, rtol=rtol)
3663
)
3764

3865
def test_fp16_gelu(self):

test.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"version": "0", "xnodes": [{"xnode_union": {"input_id": 0, "output_id": 1, "flags": 0}, "xnode_union_type": "XNNConvert", "debug_handle": 14, "output_min_max": null}, {"xnode_union": {"num_dims": 4, "perm": [0, 2, 3, 1], "input_id": 1, "output_id": 2, "flags": 0}, "xnode_union_type": "XNNStaticTranspose", "debug_handle": 65535, "output_min_max": null}, {"xnode_union": {"padding_top": 0, "padding_right": 0, "padding_bottom": 0, "padding_left": 0, "kernel_height": 3, "kernel_width": 3, "subsampling_height": 1, "subsampling_width": 1, "dilation_height": 1, "dilation_width": 1, "group_input_channels": 16, "group_output_channels": 16, "groups": 1, "adjustment_height": 0, "adjustment_width": 0, "input1_id": 2, "filter_id": 3, "bias_id": 5, "output_id": 4, "flags": 0}, "xnode_union_type": "XNNConv2d", "debug_handle": 16, "output_min_max": null}, {"xnode_union": {"num_dims": 4, "perm": [0, 3, 1, 2], "input_id": 4, "output_id": 6, "flags": 0}, "xnode_union_type": "XNNStaticTranspose", "debug_handle": 65535, "output_min_max": null}, {"xnode_union": {"input_id": 6, "output_id": 7, "flags": 0}, "xnode_union_type": "XNNConvert", "debug_handle": 65535, "output_min_max": null}], "xvalues": [{"xvalue_union": {"datatype": 1, "num_dims": 4, "dims": [1, 16, 30, 30], "constant_buffer_idx": 0, "external_id": 0, "flags": 1, "id_out": 0}, "xvalue_union_type": "XNNTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [1, 16, 30, 30], "constant_buffer_idx": 0, "external_id": 4294967295, "flags": 0, "id_out": 1}, "quant_params": {"scale": 0.019131116569042206, "zero_point": -5}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [1, 30, 30, 16], "constant_buffer_idx": 0, "external_id": 4294967295, "flags": 0, "id_out": 2}, "quant_params": {"scale": 0.019131116569042206, "zero_point": -5}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [16, 3, 3, 16], "constant_buffer_idx": 1, "external_id": 4294967295, "flags": 0, "id_out": 3}, "quant_params": {"scale": 0.0006561384652741253, "zero_point": 0}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [1, 28, 28, 16], "constant_buffer_idx": 0, "external_id": 4294967295, "flags": 0, "id_out": 4}, "quant_params": {"scale": 0.010861996561288834, "zero_point": -3}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 5, "num_dims": 1, "dims": [16], "constant_buffer_idx": 2, "external_id": 4294967295, "flags": 0, "id_out": 5}, "quant_params": {"scale": 1.2552661464591743e-05, "zero_point": 0}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"tensor_value": {"datatype": 3, "num_dims": 4, "dims": [1, 16, 28, 28], "constant_buffer_idx": 0, "external_id": 4294967295, "flags": 0, "id_out": 6}, "quant_params": {"scale": 0.010861996561288834, "zero_point": -3}, "quant_params_type": "PerTensorQuant"}, "xvalue_union_type": "XNNQuantizedTensorValue"}, {"xvalue_union": {"datatype": 1, "num_dims": 4, "dims": [1, 16, 28, 28], "constant_buffer_idx": 0, "external_id": 1, "flags": 2, "id_out": 7}, "xvalue_union_type": "XNNTensorValue"}], "num_externs": 2, "input_ids": [0], "output_ids": [7], "constant_data": [{"offset": 0, "size": 0, "named_key": ""}, {"offset": 18446744073709551615, "size": 2304, "named_key": "0e117c3acc2f830d49e09083fbff890df96cd155674dd972b96c069044d0855b"}, {"offset": 18446744073709551615, "size": 64, "named_key": "d36077ae704360321d7b072ff740bfa5b23546d28621f83c8ab0ae9f97fe0d22"}]}

0 commit comments

Comments
 (0)