[ca] run xfails up until their last passing backend (pytorch#153279)

xmfan · pytorchmergebot · commit 216e28f7e920 · 2025-05-13T16:42:10.000Z
Pull Request resolved: pytorch#153279 Approved by: https://github.com/jansel ghstack dependencies: pytorch#153193, pytorch#153222
diff --git a/test/inductor/test_compiled_autograd.py b/test/inductor/test_compiled_autograd.py
@@ -52,25 +52,27 @@
 def make_compiler_fn(
     fullgraph=True, dynamic=True, backend="inductor", gm_hook=lambda gm: None
 ):
-    assert backend in ["inductor", "aot_eager", "ca_eager"]
+    assert backend in ["inductor", "aot_eager", "eager", "ca_eager"]
 
     def _compiler_fn(gm):
         """Same as torch.compile() but counts number of compiles"""
         gm_hook(gm)
 
+        _backend = backend
         if backend == "ca_eager":
             return gm
+        elif backend != "eager":
 
-        def _inner_compiler(gm_, example_inputs_):
-            counters["compiled_autograd"]["compiles"] += 1
-            if backend == "inductor":
-                return inductor.compile(gm_, example_inputs_)
-            elif backend == "aot_eager":
-                return aot_eager(gm_, example_inputs_)
+            def _inner_compiler(gm_, example_inputs_):
+                counters["compiled_autograd"]["compiles"] += 1
+                if backend == "inductor":
+                    return inductor.compile(gm_, example_inputs_)
+                elif backend == "aot_eager":
+                    return aot_eager(gm_, example_inputs_)
 
-        return torch.compile(
-            gm, backend=_inner_compiler, fullgraph=fullgraph, dynamic=dynamic
-        )
+            _backend = _inner_compiler
+
+        return torch.compile(gm, backend=_backend, fullgraph=fullgraph, dynamic=dynamic)
 
     return _compiler_fn
 
@@ -4182,18 +4184,38 @@ def wrapped(self):
     return wrapped
 
 
+def lookup_backend(test_name):
+    if test_name in xfail_by_backend["inductor"]:
+        return "aot_eager"
+    elif test_name in xfail_by_backend["aot_eager"]:
+        return "eager"
+    elif test_name in xfail_by_backend["eager"]:
+        return "ca_eager"
+    else:
+        assert test_name not in xfail_by_backend["ca_eager"]
+        return "inductor"
+
+
 def wrap_test_class(orig_cls):
     dct = orig_cls.__dict__.copy()
     for name in list(dct.keys()):
         fn = dct[name]
         if not callable(fn) or name in skipped_tests:
             continue
-        elif known_failures_re.match(name) or name in known_failing_tests:
+        elif (
+            xfail_re.match(name)
+            or name in xfail_by_backend["ca_eager"]
+            or name in xfail_divergence_from_eager
+        ):
             dct[name] = unittest.expectedFailure
         elif name.startswith("test_"):
-            fullgraph = name not in known_graph_breaks_tests
             ctxs = [
-                compiled_autograd._enable(make_compiler_fn(fullgraph=fullgraph)),
+                compiled_autograd._enable(
+                    make_compiler_fn(
+                        backend=lookup_backend(name),
+                        fullgraph=name not in known_graph_breaks_tests,
+                    )
+                ),
                 test_contexts.get(name, contextlib.nullcontext()),
             ]
             dct[name] = make_wrapped(fn, ctxs)
@@ -4282,6 +4304,7 @@ def wrap_test_class(orig_cls):
     "test_create_graph_and_full_backward_hook_cycle",  # _pack_with_none
     "test_full_backward_hook_double_backward",  # _pack_with_none
     "test_grad_mode_restored_reentrant",  # assertTrue
+    "test_multi_grad_any_hooks",  # register_multi_grad_hook
 }
 
 test_contexts = {
@@ -4292,85 +4315,94 @@ def wrap_test_class(orig_cls):
 }
 
 # These groups of tests aren't supported yet
-known_failures_re = re.compile(r"^test_(sparse|profiler|gradcheck|named_tensor)")
-
-# Bugs needing investigation:
-skipped_tests = {
-    "test_callback_propagates_errors_from_device_thread",  # fullgraph for queue_callback, but graph break for RuntimeError
+xfail_re = re.compile(r"^test_(sparse|profiler|gradcheck|named_tensor)")
+
+# Tests fail at different stages, we categorize them wrt to their backends
+# We run only the last passing backend in this order:
+# ca_eager -> eager -> aot_eager -> inductor
+xfail_by_backend = {
+    "ca_eager": {  # xfail
+        "test_callback_propagates_errors_from_device_thread",  # fullgraph for queue_callback, but graph break for RuntimeError
+        "test_reentrant_with_callbacks_both_depths",  # queue_callback
+        "test_reentrant_with_callbacks_depth_0",  # queue_callback
+        "test_reentrant_with_callbacks_depth_1",  # queue_callback
+        "test_current_graph_task_execution_order",  # nodes are already freed by the time dynamo traces the lifted hook
+        "test_autograd_inplace_views_cross_dtype",  # view_fn not supported by compiled autograd
+        "test_current_node",  # TorchDispatchMode not yet implemented for compiled autograd
+        "test_post_accumulate_grad_hook_ordering",  # accuracy error
+        "test_current_graph_task_id",  # autograd state already cleared once dynamo is called
+        "test_custom_function_forward_mode_forward_is_no_op",  # forward AD
+        "test_custom_function_forward_mode_inplace_checks",  # forward AD
+        "test_custom_function_forward_mode_view_checks",  # forward AD
+        "test_custom_function_forward_mode_wrong_formula",  # forward AD
+        "test_node_post_hook_registered_during_unpack_hook",  # 'NoneType' object has no attribute 'register_hook'
+        "test_custom_function_error",  # forward AD
+        "test_custom_function_save_for_forward",  # forward AD
+        "test_dont_materialize_grads",  # undefined grad
+        "test_no_grad_copy",  # setting static member in lifted backward
+        "test_no_grad_copy_sparse",  # setting static member in lifted backward
+        "test_node_ordering_when_none_returned",  # torch._dynamo.exc.Unsupported: TypeError <built-in method clone
+        "test_save_output_nr",  # output_nr grad passed as None
+        # IndexError: list index out of range (NB: x.grad = y where both x and y are input tensors)
+        "test_grad_nonleaf_register_hook",
+        "test_backward_twice_without_saved_values",  # https://github.com/pytorch/pytorch/issues/129938
+        # Category: Higher Order Gradients
+        "test_default_saved_tensors_hooks_double_backward",  # wrong when pack hook returns non-leaf
+        "test_saved_variable_packing_unpacking_saved_original_with_hooks",  # wrong when pack hook returns non-leaf
+        "test_nested_anomaly_detect_nan",  # nested anomaly
+        "test_select_sum",  # batched gradients
+        "test_custom_autograd_no_early_free",  # batched gradients
+        "test_grad_batched_grad",  # batched gradients
+        # Uncategorized
+        "test_lobpcg",  # NaNs
+        "test_autograd_simple_views_python",  # gradient is None
+        "test_function_returns_undefined_tensor",  # gradient is None
+        "test_input_buffer_accum",  # add(sparse, dense)
+        "test_return_duplicate",  # batched gradients
+        "test_return_duplicate_inplace",  # batched gradients
+        "test_naughty_autograd_function_stashing_ctx",  # error not raised
+        "test_unrelated_inputs",  # batched gradients
+    },
+    "eager": {  # will be run without torch.compiling the CA graph
+        "test_setup_context_when_forward_has_default_args",  # autograd.Function with class methods
+        "test_accumulate_grad_tensor_reference",  # Out of bounds: frame_state_entry.stride[i] is None
+        "test_custom_function_exception",  # torch.no_grad(), torch._dynamo.exc.Unsupported: missing: WITH_EXCEPT_START
+        "test_to_sparse_backward",  # Out of bounds: frame_state_entry.stride[i] is None
+        "test_custom_function_non_tensor_inputs_outputs",  # gradient batching rule not implemented for aten::sym_size.int
+        "test_setitem",  # CopySlices accuracy error
+        "test_save_on_cpu_and_checkpoint",  # https://github.com/pytorch/pytorch/issues/147565
+        "test_checkpoint_detects_non_determinism",  # different error
+        "test_checkpointing_non_reentrant_autocast_cpu",  # saved != recompute
+        "test_checkpointing_non_reentrant_autocast_gpu",  # saved != recompute
+        "test_checkpointing_without_reentrant_saved_object_identity",  # same as https://github.com/pytorch/pytorch/issues/136193
+        "test_saved_variable_packing_unpacking_did_not_save_original_with_hooks",  # register_hooks multiple times
+        "test_saved_variable_saved_original_inplace_detach",  # RuntimeError not raised
+        "test_access_saved_tensor_twice_without_recomputation_works",  # saved != recompute
+        "test_checkpointing_without_reentrant_dataparallel",  # https://github.com/pytorch/pytorch/issues/127115
+        "test_checkpointing",  # takes very very long
+        "test_checkpointing_without_reentrant_input_requires_grad_False",  # takes very very long
+        "test_checkpointing_without_reentrant_input_requires_grad_True",  # takes very very long
+        "test_checkpointing_without_reentrant_memory_savings",  # takes very very long
+        "test_dtensor_different_gradient_placement",  # Dynamo failed to run FX node with fake tensors
+        "test_dtensor_noncontiguous_output",  # Dynamo failed to run FX node with fake tensors
+        "test_dtensor_partial_placement_graph_output",  # Dynamo failed to run FX node with fake tensors
+        "test_unwrap_async_collective_tensor_tangent",  # AttributeError: 'PlainTensorMeta' object has no attribute 'attrs'
+        "test_graph_save_on_cpu",  # torch.save should no-op and be recorded in the graph
+        "test_saving_variable_to_disk",  # torch.save should no-op and be recorded in the graph
+    },
+    "aot_eager": {  # will be run with torch.compile(backend="eager")
+        # Category: FakeTensor
+        "test_wrapped_number_saved_tensors_hooks",  # Proxy tensor should carryover is_wrapped_number_ of its original
+        "test_scalar_grad_mixed_device",  # Fake Tensors aren't propagating device properly for 0-dim grads
+        "test_grad",  # AOT backward higher order gradients
+        "test_grad_materialize_grads",  # AOT backward higher order gradients
+    },
+    "inductor": {},  # will be run with torch.compile(backend="aot_eager")
+    # tests not present in this dict will be run with torch.compile(backend="inductor")
 }
 
-known_failing_tests = {
-    # Category: Compiled autograd
-    "test_reentrant_with_callbacks_both_depths",  # queue_callback
-    "test_reentrant_with_callbacks_depth_0",  # queue_callback
-    "test_reentrant_with_callbacks_depth_1",  # queue_callback
-    "test_current_graph_task_execution_order",  # nodes are already freed by the time dynamo traces the lifted hook
-    "test_autograd_inplace_views_cross_dtype",  # view_fn not supported by compiled autograd
-    "test_current_node",  # TorchDispatchMode not yet implemented for compiled autograd
-    "test_post_accumulate_grad_hook_ordering",  # accuracy error
-    "test_current_graph_task_id",  # autograd state already cleared once dynamo is called
-    "test_custom_function_forward_mode_forward_is_no_op",  # forward AD
-    "test_custom_function_forward_mode_inplace_checks",  # forward AD
-    "test_custom_function_forward_mode_view_checks",  # forward AD
-    "test_custom_function_forward_mode_wrong_formula",  # forward AD
-    "test_node_post_hook_registered_during_unpack_hook",  # 'NoneType' object has no attribute 'register_hook'
-    "test_multi_grad_any_hooks",  # register_multi_grad_hook
-    "test_custom_function_error",  # vjp
-    "test_custom_function_save_for_forward",  # vjp
-    "test_dont_materialize_grads",  # undefined grad
-    "test_no_grad_copy",  # setting static member in lifted backward
-    "test_no_grad_copy_sparse",  # setting static member in lifted backward
-    "test_node_ordering_when_none_returned",  # torch._dynamo.exc.Unsupported: TypeError <built-in method clone
-    "test_save_output_nr",  # output_nr grad passed as None
-    "test_setup_context_when_forward_has_default_args",  # autograd.Function with class methods
-    # IndexError: list index out of range (NB: x.grad = y where both x and y are input tensors)
-    "test_grad_nonleaf_register_hook",
-    "test_backward_twice_without_saved_values",  # https://github.com/pytorch/pytorch/issues/129938
-    # Category: Higher Order Gradients
-    "test_default_saved_tensors_hooks_double_backward",  # wrong when pack hook returns non-leaf
-    "test_saved_variable_packing_unpacking_saved_original_with_hooks",  # wrong when pack hook returns non-leaf
-    "test_nested_anomaly_detect_nan",  # nested anomaly
-    "test_select_sum",  # batched gradients
-    "test_custom_autograd_no_early_free",  # batched gradients
-    "test_lobpcg",  # NaNs
-    # Category: Dynamo (pass when directly running CA graph)
-    "test_accumulate_grad_tensor_reference",  # Out of bounds: frame_state_entry.stride[i] is None
-    "test_custom_function_exception",  # torch.no_grad(), torch._dynamo.exc.Unsupported: missing: WITH_EXCEPT_START
-    "test_to_sparse_backward",  # Out of bounds: frame_state_entry.stride[i] is None
-    "test_autograd_simple_views_python",  # gradient is None
-    "test_function_returns_undefined_tensor",  # gradient is None
-    "test_naughty_autograd_function_stashing_ctx",  # bytecode issue
-    "test_unrelated_inputs",  # gradient batching rule not implemented for aten::sym_size.int
-    "test_custom_function_non_tensor_inputs_outputs",  # gradient batching rule not implemented for aten::sym_size.int
-    "test_return_duplicate",  # gradient batching rule not implemented for aten::sym_size.int
-    "test_return_duplicate_inplace",  # gradient batching rule not implemented for aten::sym_size.int
-    "test_setitem",  # CopySlices accuracy error
-    "test_save_on_cpu_and_checkpoint",  # https://github.com/pytorch/pytorch/issues/147565
-    "test_checkpoint_detects_non_determinism",  # different error
-    "test_checkpointing_non_reentrant_autocast_cpu",  # saved != recompute
-    "test_checkpointing_non_reentrant_autocast_gpu",  # saved != recompute
-    "test_checkpointing_without_reentrant_saved_object_identity",  # same as https://github.com/pytorch/pytorch/issues/136193
-    "test_saved_variable_packing_unpacking_did_not_save_original_with_hooks",  # register_hooks multiple times
-    "test_saved_variable_saved_original_inplace_detach",  # RuntimeError not raised
-    "test_access_saved_tensor_twice_without_recomputation_works",  # saved != recompute
-    "test_checkpointing_without_reentrant_dataparallel",  # https://github.com/pytorch/pytorch/issues/127115
-    "test_checkpointing",  # takes very very long
-    "test_checkpointing_without_reentrant_input_requires_grad_False",  # takes very very long
-    "test_checkpointing_without_reentrant_input_requires_grad_True",  # takes very very long
-    "test_checkpointing_without_reentrant_memory_savings",  # takes very very long
-    "test_dtensor_different_gradient_placement",  # Dynamo failed to run FX node with fake tensors
-    "test_dtensor_noncontiguous_output",  # Dynamo failed to run FX node with fake tensors
-    "test_dtensor_partial_placement_graph_output",  # Dynamo failed to run FX node with fake tensors
-    "test_unwrap_async_collective_tensor_tangent",  # AttributeError: 'PlainTensorMeta' object has no attribute 'attrs'
-    # Category: Inductor (pass on backend="aot_eager")
-    "test_input_buffer_accum",  # does not support sparse_grad=True: https://github.com/pytorch/pytorch/issues/120267
-    "test_graph_save_on_cpu",  # does not support pin_memory: https://github.com/pytorch/pytorch/issues/134173
-    # Category: FakeTensor
-    "test_saving_variable_to_disk",  # torch.save should no-op and be recorded in the graph
-    "test_wrapped_number_saved_tensors_hooks",  # Proxy tensor should carryover is_wrapped_number_ of its original
-    "test_grad_batched_grad",  # torch._subclasses.fake_tensor.UnsupportedFakeTensorException: meta converter nyi
-    "test_scalar_grad_mixed_device",  # Fake Tensors aren't propagating device properly for 0-dim grads
-    # Category: Divergence from eager
+# These tests fail due to difference in semantics that we won't fix
+xfail_divergence_from_eager = {
     "test_invalid_gradients",  # can't give autograd error due to inaccurate output metadata of lifted backward
     "test_autograd_node_isinstance",  # backward ctx is a fake cls and not directly a Node instance
     "test_backward_hook_relative_ordering",  # compiled autograd collects breadth first, and module backward hook not supported
@@ -4382,18 +4414,17 @@ def wrap_test_class(orig_cls):
     "test_function",  # different node name: CompiledFunctionBackward
     "test_inplace_on_view_backward",  # different node name: CompiledFunctionBackward
     "test_nested_anomaly_printstack_cleanup",  # anomaly NaN error message different
-    # Uncategorized
     "test_not_implemented_grad",  # Dynamo changes the types of exceptions
-    "test_grad",  # AOT backward higher order gradients
-    "test_grad_materialize_grads",  # AOT backward higher order gradients
 }
 
+skipped_tests = set()
+
 if not HAS_CUDA:
     # Found Tesla M60 which is too old to be supported by the triton GPU compiler
-    known_failing_tests.add("test_type_conversions")
+    skipped_tests.add("test_type_conversions")
 
 if IS_S390X:
-    known_failing_tests.add("test_deep_reentrant")
+    skipped_tests.add("test_deep_reentrant")
 
 test_autograd = load_test_module("test_autograd")
 test_custom_ops = load_test_module("test_custom_ops")