meta-pytorch · PaliC · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025 · Aug 14, 2025
@@ -5,7 +5,7 @@
 import triton.testing
 
 
-from BackendBench.utils import uses_cuda_stream
+from BackendBench.utils import uses_cuda_stream, check_for_stable_output
 from BackendBench.utils import serialize_args
 
 logger = logging.getLogger(__name__)
@@ -48,6 +48,11 @@ def eval_correctness_test(op, impl, test):
 def eval_correctness(op, impl, tests):
     correct, total = 0, 0
     for test in tests:
+        if check_for_stable_output(op, serialize_args(test.args, test.kwargs)):
+            logger.warning(
+                f"Skipping {op.__name__} with args {serialize_args(test.args, test.kwargs)} because the output is always the same"
+            )
+            continue
         logging.debug(f"Testing {op.__name__} with args {serialize_args(test.args, test.kwargs)}")
         if eval_correctness_test(op, impl, test):
             correct += 1
@@ -73,6 +78,11 @@ def eval_performance(op, impl, tests):
     base_times = []
     test_times = []
     for test in tests:
+        if check_for_stable_output(op, serialize_args(test.args, test.kwargs)):
+            logger.warning(
+                f"Skipping {op.__name__} with args {serialize_args(test.args, test.kwargs)} because the output is always the same"
+            )
+            continue
         logging.debug(
             f"Benchmarking {op.__name__} with args {serialize_args(test.args, test.kwargs)}"
         )
@@ -94,6 +104,7 @@ def eval_one_op(op, impl, correctness_tests, performance_tests):
     if uses_cuda_stream(impl):
         logger.warning(f"Skipping {op.__name__} because it uses CUDA stream")
         return 0, 0
+
     return eval_correctness(op, impl, correctness_tests), eval_performance(
         op, impl, performance_tests
     )
@@ -153,3 +153,15 @@ def deserialize_args(inps):
     for key in dtype_abbrs_parsing:
         inps = inps.replace(f"'{key}'", key)
     return eval(inps.strip().strip("'").strip('"'), global_vals)
+
+
+def check_for_stable_output(op, inps, n_iterations=10):
+    op_func = eval(f"torch.ops.{op}")
+    args, kwargs = deserialize_args(inps)
+    initial_output = op_func(*args, **kwargs)
+    for _ in range(n_iterations):
+        args, kwargs = deserialize_args(inps)
+        output = op_func(*args, **kwargs)
+        if not torch.allclose(initial_output, output, atol=1e-2, rtol=1e-2):
+            return False
+    return True
@@ -7,6 +7,7 @@
     deserialize_args,
     _deserialize_tensor,
     uses_cuda_stream,
+    check_for_stable_output,
 )
 
 # Check if CUDA is available
@@ -531,5 +532,25 @@ def test_integer_tensors(self):
             assert tensor.shape == (10,)
 
 
+class TestCheckForStableOutput:
+    """Test cases for check_for_stable_output function"""
+
+    def test_stable_zeros_op(self):
+        """Test that zeros creation is stable"""
+        op = "aten.zeros"
+        inps = "(([3, 4],), {'dtype': torch.float32})"
+
+        result = check_for_stable_output(op, inps, n_iterations=5)
+        assert result
+
+    def test_unstable_random_op(self):
+        """Test that random operations are correctly detected as unstable"""
+        op = "aten.randn"
+        inps = "(([3, 3],), {'dtype': torch.float32})"
+
+        result = check_for_stable_output(op, inps, n_iterations=5)
+        assert not result
+
+
 if __name__ == "__main__":
     pytest.main([__file__])