diff --git a/BackendBench/scripts/dataset_filters.py b/BackendBench/scripts/dataset_filters.py
index 0c897226..710892f0 100644
--- a/BackendBench/scripts/dataset_filters.py
+++ b/BackendBench/scripts/dataset_filters.py
@@ -20,6 +20,13 @@
     "_fft_c2c.default",  # cuFFT only supports dimensions whose sizes are powers of two when computing in half precision
 ]
 
+UNTESTABLE_OPERATORS = [
+    "empty_like",  # We can check using metadata
+    "new_empty",  # We can check using metadata
+    "new_empty_strided",  # We can check using metadata
+    "bernoulli",  # We can write a custom test to verify this one (albeit not the randomness)
+]
+
 
 def apply_skip_ops_filter(ops):
     for op in ops:
@@ -28,6 +35,12 @@ def apply_skip_ops_filter(ops):
             op["why_excluded"].append("We cannot run this op on backendbench yet")
             op["runnable"] = False
 
+        if any(skip_op in op["op_name"] for skip_op in UNTESTABLE_OPERATORS):
+            op["included_in_benchmark"] = False
+            op["why_excluded"].append(
+                "BackendBench does not support correctness testing for this op yet"
+            )
+
         if op["is_synthetic"]:
             op["included_in_benchmark"] = False
             op["why_excluded"].append(