From 46cf91db98d1c0d3770fa574784b5f2312b30d17 Mon Sep 17 00:00:00 2001 From: PaliC Date: Thu, 21 Aug 2025 12:09:56 -0700 Subject: [PATCH 1/3] Filter out operators we cannot test correctness for --- BackendBench/scripts/dataset_filters.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/BackendBench/scripts/dataset_filters.py b/BackendBench/scripts/dataset_filters.py index 0c897226..bcd70a5a 100644 --- a/BackendBench/scripts/dataset_filters.py +++ b/BackendBench/scripts/dataset_filters.py @@ -20,6 +20,13 @@ "_fft_c2c.default", # cuFFT only supports dimensions whose sizes are powers of two when computing in half precision ] +RANDOM_OPERATORS = [ + "empty_like", + "new_empty", + "new_empty_strided", + "bernoulli", +] + def apply_skip_ops_filter(ops): for op in ops: @@ -28,6 +35,12 @@ def apply_skip_ops_filter(ops): op["why_excluded"].append("We cannot run this op on backendbench yet") op["runnable"] = False + if any(skip_op in op["op_name"] for skip_op in RANDOM_OPERATORS): + op["included_in_benchmark"] = False + op["why_excluded"].append( + "This op creates a random output, and therefore cannot be tested for correctness" + ) + if op["is_synthetic"]: op["included_in_benchmark"] = False op["why_excluded"].append( From f8509f79382619659b726d6600837a99964ec1f4 Mon Sep 17 00:00:00 2001 From: Sahan Paliskara Date: Thu, 21 Aug 2025 13:16:59 -0700 Subject: [PATCH 2/3] fix wording --- BackendBench/scripts/dataset_filters.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/BackendBench/scripts/dataset_filters.py b/BackendBench/scripts/dataset_filters.py index bcd70a5a..03fe5ab0 100644 --- a/BackendBench/scripts/dataset_filters.py +++ b/BackendBench/scripts/dataset_filters.py @@ -20,7 +20,7 @@ "_fft_c2c.default", # cuFFT only supports dimensions whose sizes are powers of two when computing in half precision ] -RANDOM_OPERATORS = [ +UNTESTABLE_OPERATORS = [ "empty_like", "new_empty", "new_empty_strided", @@ -35,10 +35,10 @@ def apply_skip_ops_filter(ops): op["why_excluded"].append("We cannot run this op on backendbench yet") op["runnable"] = False - if any(skip_op in op["op_name"] for skip_op in RANDOM_OPERATORS): + if any(skip_op in op["op_name"] for skip_op in UNTESTABLE_OPERATORS): op["included_in_benchmark"] = False op["why_excluded"].append( - "This op creates a random output, and therefore cannot be tested for correctness" + "This op creates a unpredictable output, and therefore cannot be tested for correctness" ) if op["is_synthetic"]: From b736062a2269f52c1c5712c4a10ba8b75e595e9f Mon Sep 17 00:00:00 2001 From: Sahan Paliskara Date: Thu, 21 Aug 2025 16:01:39 -0700 Subject: [PATCH 3/3] fix wording --- BackendBench/scripts/dataset_filters.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/BackendBench/scripts/dataset_filters.py b/BackendBench/scripts/dataset_filters.py index 03fe5ab0..710892f0 100644 --- a/BackendBench/scripts/dataset_filters.py +++ b/BackendBench/scripts/dataset_filters.py @@ -21,10 +21,10 @@ ] UNTESTABLE_OPERATORS = [ - "empty_like", - "new_empty", - "new_empty_strided", - "bernoulli", + "empty_like", # We can check using metadata + "new_empty", # We can check using metadata + "new_empty_strided", # We can check using metadata + "bernoulli", # We can write a custom test to verify this one (albeit not the randomness) ] @@ -38,7 +38,7 @@ def apply_skip_ops_filter(ops): if any(skip_op in op["op_name"] for skip_op in UNTESTABLE_OPERATORS): op["included_in_benchmark"] = False op["why_excluded"].append( - "This op creates a unpredictable output, and therefore cannot be tested for correctness" + "BackendBench does not support correctness testing for this op yet" ) if op["is_synthetic"]: