From 46cf91db98d1c0d3770fa574784b5f2312b30d17 Mon Sep 17 00:00:00 2001
From: PaliC <sahancpal@gmail.com>
Date: Thu, 21 Aug 2025 12:09:56 -0700
Subject: [PATCH 1/3] Filter out operators we cannot test correctness for

---
 BackendBench/scripts/dataset_filters.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/BackendBench/scripts/dataset_filters.py b/BackendBench/scripts/dataset_filters.py
index 0c897226..bcd70a5a 100644
--- a/BackendBench/scripts/dataset_filters.py
+++ b/BackendBench/scripts/dataset_filters.py
@@ -20,6 +20,13 @@
     "_fft_c2c.default",  # cuFFT only supports dimensions whose sizes are powers of two when computing in half precision
 ]
 
+RANDOM_OPERATORS = [
+    "empty_like",
+    "new_empty",
+    "new_empty_strided",
+    "bernoulli",
+]
+
 
 def apply_skip_ops_filter(ops):
     for op in ops:
@@ -28,6 +35,12 @@ def apply_skip_ops_filter(ops):
             op["why_excluded"].append("We cannot run this op on backendbench yet")
             op["runnable"] = False
 
+        if any(skip_op in op["op_name"] for skip_op in RANDOM_OPERATORS):
+            op["included_in_benchmark"] = False
+            op["why_excluded"].append(
+                "This op creates a random output, and therefore cannot be tested for correctness"
+            )
+
         if op["is_synthetic"]:
             op["included_in_benchmark"] = False
             op["why_excluded"].append(

From f8509f79382619659b726d6600837a99964ec1f4 Mon Sep 17 00:00:00 2001
From: Sahan Paliskara <sahanp@meta.com>
Date: Thu, 21 Aug 2025 13:16:59 -0700
Subject: [PATCH 2/3] fix wording

---
 BackendBench/scripts/dataset_filters.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/BackendBench/scripts/dataset_filters.py b/BackendBench/scripts/dataset_filters.py
index bcd70a5a..03fe5ab0 100644
--- a/BackendBench/scripts/dataset_filters.py
+++ b/BackendBench/scripts/dataset_filters.py
@@ -20,7 +20,7 @@
     "_fft_c2c.default",  # cuFFT only supports dimensions whose sizes are powers of two when computing in half precision
 ]
 
-RANDOM_OPERATORS = [
+UNTESTABLE_OPERATORS = [
     "empty_like",
     "new_empty",
     "new_empty_strided",
@@ -35,10 +35,10 @@ def apply_skip_ops_filter(ops):
             op["why_excluded"].append("We cannot run this op on backendbench yet")
             op["runnable"] = False
 
-        if any(skip_op in op["op_name"] for skip_op in RANDOM_OPERATORS):
+        if any(skip_op in op["op_name"] for skip_op in UNTESTABLE_OPERATORS):
             op["included_in_benchmark"] = False
             op["why_excluded"].append(
-                "This op creates a random output, and therefore cannot be tested for correctness"
+                "This op creates a unpredictable output, and therefore cannot be tested for correctness"
             )
 
         if op["is_synthetic"]:

From b736062a2269f52c1c5712c4a10ba8b75e595e9f Mon Sep 17 00:00:00 2001
From: Sahan Paliskara <sahanp@meta.com>
Date: Thu, 21 Aug 2025 16:01:39 -0700
Subject: [PATCH 3/3] fix wording

---
 BackendBench/scripts/dataset_filters.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/BackendBench/scripts/dataset_filters.py b/BackendBench/scripts/dataset_filters.py
index 03fe5ab0..710892f0 100644
--- a/BackendBench/scripts/dataset_filters.py
+++ b/BackendBench/scripts/dataset_filters.py
@@ -21,10 +21,10 @@
 ]
 
 UNTESTABLE_OPERATORS = [
-    "empty_like",
-    "new_empty",
-    "new_empty_strided",
-    "bernoulli",
+    "empty_like",  # We can check using metadata
+    "new_empty",  # We can check using metadata
+    "new_empty_strided",  # We can check using metadata
+    "bernoulli",  # We can write a custom test to verify this one (albeit not the randomness)
 ]
 
 
@@ -38,7 +38,7 @@ def apply_skip_ops_filter(ops):
         if any(skip_op in op["op_name"] for skip_op in UNTESTABLE_OPERATORS):
             op["included_in_benchmark"] = False
             op["why_excluded"].append(
-                "This op creates a unpredictable output, and therefore cannot be tested for correctness"
+                "BackendBench does not support correctness testing for this op yet"
             )
 
         if op["is_synthetic"]: