Skip to content

Commit 216e28f

Browse files
xmfanpytorchmergebot
authored andcommitted
[ca] run xfails up until their last passing backend (pytorch#153279)
Pull Request resolved: pytorch#153279 Approved by: https://github.com/jansel ghstack dependencies: pytorch#153193, pytorch#153222
1 parent a80eb84 commit 216e28f

File tree

1 file changed

+126
-95
lines changed

1 file changed

+126
-95
lines changed

test/inductor/test_compiled_autograd.py

Lines changed: 126 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -52,25 +52,27 @@
5252
def make_compiler_fn(
5353
fullgraph=True, dynamic=True, backend="inductor", gm_hook=lambda gm: None
5454
):
55-
assert backend in ["inductor", "aot_eager", "ca_eager"]
55+
assert backend in ["inductor", "aot_eager", "eager", "ca_eager"]
5656

5757
def _compiler_fn(gm):
5858
"""Same as torch.compile() but counts number of compiles"""
5959
gm_hook(gm)
6060

61+
_backend = backend
6162
if backend == "ca_eager":
6263
return gm
64+
elif backend != "eager":
6365

64-
def _inner_compiler(gm_, example_inputs_):
65-
counters["compiled_autograd"]["compiles"] += 1
66-
if backend == "inductor":
67-
return inductor.compile(gm_, example_inputs_)
68-
elif backend == "aot_eager":
69-
return aot_eager(gm_, example_inputs_)
66+
def _inner_compiler(gm_, example_inputs_):
67+
counters["compiled_autograd"]["compiles"] += 1
68+
if backend == "inductor":
69+
return inductor.compile(gm_, example_inputs_)
70+
elif backend == "aot_eager":
71+
return aot_eager(gm_, example_inputs_)
7072

71-
return torch.compile(
72-
gm, backend=_inner_compiler, fullgraph=fullgraph, dynamic=dynamic
73-
)
73+
_backend = _inner_compiler
74+
75+
return torch.compile(gm, backend=_backend, fullgraph=fullgraph, dynamic=dynamic)
7476

7577
return _compiler_fn
7678

@@ -4182,18 +4184,38 @@ def wrapped(self):
41824184
return wrapped
41834185

41844186

4187+
def lookup_backend(test_name):
4188+
if test_name in xfail_by_backend["inductor"]:
4189+
return "aot_eager"
4190+
elif test_name in xfail_by_backend["aot_eager"]:
4191+
return "eager"
4192+
elif test_name in xfail_by_backend["eager"]:
4193+
return "ca_eager"
4194+
else:
4195+
assert test_name not in xfail_by_backend["ca_eager"]
4196+
return "inductor"
4197+
4198+
41854199
def wrap_test_class(orig_cls):
41864200
dct = orig_cls.__dict__.copy()
41874201
for name in list(dct.keys()):
41884202
fn = dct[name]
41894203
if not callable(fn) or name in skipped_tests:
41904204
continue
4191-
elif known_failures_re.match(name) or name in known_failing_tests:
4205+
elif (
4206+
xfail_re.match(name)
4207+
or name in xfail_by_backend["ca_eager"]
4208+
or name in xfail_divergence_from_eager
4209+
):
41924210
dct[name] = unittest.expectedFailure
41934211
elif name.startswith("test_"):
4194-
fullgraph = name not in known_graph_breaks_tests
41954212
ctxs = [
4196-
compiled_autograd._enable(make_compiler_fn(fullgraph=fullgraph)),
4213+
compiled_autograd._enable(
4214+
make_compiler_fn(
4215+
backend=lookup_backend(name),
4216+
fullgraph=name not in known_graph_breaks_tests,
4217+
)
4218+
),
41974219
test_contexts.get(name, contextlib.nullcontext()),
41984220
]
41994221
dct[name] = make_wrapped(fn, ctxs)
@@ -4282,6 +4304,7 @@ def wrap_test_class(orig_cls):
42824304
"test_create_graph_and_full_backward_hook_cycle", # _pack_with_none
42834305
"test_full_backward_hook_double_backward", # _pack_with_none
42844306
"test_grad_mode_restored_reentrant", # assertTrue
4307+
"test_multi_grad_any_hooks", # register_multi_grad_hook
42854308
}
42864309

42874310
test_contexts = {
@@ -4292,85 +4315,94 @@ def wrap_test_class(orig_cls):
42924315
}
42934316

42944317
# These groups of tests aren't supported yet
4295-
known_failures_re = re.compile(r"^test_(sparse|profiler|gradcheck|named_tensor)")
4296-
4297-
# Bugs needing investigation:
4298-
skipped_tests = {
4299-
"test_callback_propagates_errors_from_device_thread", # fullgraph for queue_callback, but graph break for RuntimeError
4318+
xfail_re = re.compile(r"^test_(sparse|profiler|gradcheck|named_tensor)")
4319+
4320+
# Tests fail at different stages, we categorize them wrt to their backends
4321+
# We run only the last passing backend in this order:
4322+
# ca_eager -> eager -> aot_eager -> inductor
4323+
xfail_by_backend = {
4324+
"ca_eager": { # xfail
4325+
"test_callback_propagates_errors_from_device_thread", # fullgraph for queue_callback, but graph break for RuntimeError
4326+
"test_reentrant_with_callbacks_both_depths", # queue_callback
4327+
"test_reentrant_with_callbacks_depth_0", # queue_callback
4328+
"test_reentrant_with_callbacks_depth_1", # queue_callback
4329+
"test_current_graph_task_execution_order", # nodes are already freed by the time dynamo traces the lifted hook
4330+
"test_autograd_inplace_views_cross_dtype", # view_fn not supported by compiled autograd
4331+
"test_current_node", # TorchDispatchMode not yet implemented for compiled autograd
4332+
"test_post_accumulate_grad_hook_ordering", # accuracy error
4333+
"test_current_graph_task_id", # autograd state already cleared once dynamo is called
4334+
"test_custom_function_forward_mode_forward_is_no_op", # forward AD
4335+
"test_custom_function_forward_mode_inplace_checks", # forward AD
4336+
"test_custom_function_forward_mode_view_checks", # forward AD
4337+
"test_custom_function_forward_mode_wrong_formula", # forward AD
4338+
"test_node_post_hook_registered_during_unpack_hook", # 'NoneType' object has no attribute 'register_hook'
4339+
"test_custom_function_error", # forward AD
4340+
"test_custom_function_save_for_forward", # forward AD
4341+
"test_dont_materialize_grads", # undefined grad
4342+
"test_no_grad_copy", # setting static member in lifted backward
4343+
"test_no_grad_copy_sparse", # setting static member in lifted backward
4344+
"test_node_ordering_when_none_returned", # torch._dynamo.exc.Unsupported: TypeError <built-in method clone
4345+
"test_save_output_nr", # output_nr grad passed as None
4346+
# IndexError: list index out of range (NB: x.grad = y where both x and y are input tensors)
4347+
"test_grad_nonleaf_register_hook",
4348+
"test_backward_twice_without_saved_values", # https://github.com/pytorch/pytorch/issues/129938
4349+
# Category: Higher Order Gradients
4350+
"test_default_saved_tensors_hooks_double_backward", # wrong when pack hook returns non-leaf
4351+
"test_saved_variable_packing_unpacking_saved_original_with_hooks", # wrong when pack hook returns non-leaf
4352+
"test_nested_anomaly_detect_nan", # nested anomaly
4353+
"test_select_sum", # batched gradients
4354+
"test_custom_autograd_no_early_free", # batched gradients
4355+
"test_grad_batched_grad", # batched gradients
4356+
# Uncategorized
4357+
"test_lobpcg", # NaNs
4358+
"test_autograd_simple_views_python", # gradient is None
4359+
"test_function_returns_undefined_tensor", # gradient is None
4360+
"test_input_buffer_accum", # add(sparse, dense)
4361+
"test_return_duplicate", # batched gradients
4362+
"test_return_duplicate_inplace", # batched gradients
4363+
"test_naughty_autograd_function_stashing_ctx", # error not raised
4364+
"test_unrelated_inputs", # batched gradients
4365+
},
4366+
"eager": { # will be run without torch.compiling the CA graph
4367+
"test_setup_context_when_forward_has_default_args", # autograd.Function with class methods
4368+
"test_accumulate_grad_tensor_reference", # Out of bounds: frame_state_entry.stride[i] is None
4369+
"test_custom_function_exception", # torch.no_grad(), torch._dynamo.exc.Unsupported: missing: WITH_EXCEPT_START
4370+
"test_to_sparse_backward", # Out of bounds: frame_state_entry.stride[i] is None
4371+
"test_custom_function_non_tensor_inputs_outputs", # gradient batching rule not implemented for aten::sym_size.int
4372+
"test_setitem", # CopySlices accuracy error
4373+
"test_save_on_cpu_and_checkpoint", # https://github.com/pytorch/pytorch/issues/147565
4374+
"test_checkpoint_detects_non_determinism", # different error
4375+
"test_checkpointing_non_reentrant_autocast_cpu", # saved != recompute
4376+
"test_checkpointing_non_reentrant_autocast_gpu", # saved != recompute
4377+
"test_checkpointing_without_reentrant_saved_object_identity", # same as https://github.com/pytorch/pytorch/issues/136193
4378+
"test_saved_variable_packing_unpacking_did_not_save_original_with_hooks", # register_hooks multiple times
4379+
"test_saved_variable_saved_original_inplace_detach", # RuntimeError not raised
4380+
"test_access_saved_tensor_twice_without_recomputation_works", # saved != recompute
4381+
"test_checkpointing_without_reentrant_dataparallel", # https://github.com/pytorch/pytorch/issues/127115
4382+
"test_checkpointing", # takes very very long
4383+
"test_checkpointing_without_reentrant_input_requires_grad_False", # takes very very long
4384+
"test_checkpointing_without_reentrant_input_requires_grad_True", # takes very very long
4385+
"test_checkpointing_without_reentrant_memory_savings", # takes very very long
4386+
"test_dtensor_different_gradient_placement", # Dynamo failed to run FX node with fake tensors
4387+
"test_dtensor_noncontiguous_output", # Dynamo failed to run FX node with fake tensors
4388+
"test_dtensor_partial_placement_graph_output", # Dynamo failed to run FX node with fake tensors
4389+
"test_unwrap_async_collective_tensor_tangent", # AttributeError: 'PlainTensorMeta' object has no attribute 'attrs'
4390+
"test_graph_save_on_cpu", # torch.save should no-op and be recorded in the graph
4391+
"test_saving_variable_to_disk", # torch.save should no-op and be recorded in the graph
4392+
},
4393+
"aot_eager": { # will be run with torch.compile(backend="eager")
4394+
# Category: FakeTensor
4395+
"test_wrapped_number_saved_tensors_hooks", # Proxy tensor should carryover is_wrapped_number_ of its original
4396+
"test_scalar_grad_mixed_device", # Fake Tensors aren't propagating device properly for 0-dim grads
4397+
"test_grad", # AOT backward higher order gradients
4398+
"test_grad_materialize_grads", # AOT backward higher order gradients
4399+
},
4400+
"inductor": {}, # will be run with torch.compile(backend="aot_eager")
4401+
# tests not present in this dict will be run with torch.compile(backend="inductor")
43004402
}
43014403

4302-
known_failing_tests = {
4303-
# Category: Compiled autograd
4304-
"test_reentrant_with_callbacks_both_depths", # queue_callback
4305-
"test_reentrant_with_callbacks_depth_0", # queue_callback
4306-
"test_reentrant_with_callbacks_depth_1", # queue_callback
4307-
"test_current_graph_task_execution_order", # nodes are already freed by the time dynamo traces the lifted hook
4308-
"test_autograd_inplace_views_cross_dtype", # view_fn not supported by compiled autograd
4309-
"test_current_node", # TorchDispatchMode not yet implemented for compiled autograd
4310-
"test_post_accumulate_grad_hook_ordering", # accuracy error
4311-
"test_current_graph_task_id", # autograd state already cleared once dynamo is called
4312-
"test_custom_function_forward_mode_forward_is_no_op", # forward AD
4313-
"test_custom_function_forward_mode_inplace_checks", # forward AD
4314-
"test_custom_function_forward_mode_view_checks", # forward AD
4315-
"test_custom_function_forward_mode_wrong_formula", # forward AD
4316-
"test_node_post_hook_registered_during_unpack_hook", # 'NoneType' object has no attribute 'register_hook'
4317-
"test_multi_grad_any_hooks", # register_multi_grad_hook
4318-
"test_custom_function_error", # vjp
4319-
"test_custom_function_save_for_forward", # vjp
4320-
"test_dont_materialize_grads", # undefined grad
4321-
"test_no_grad_copy", # setting static member in lifted backward
4322-
"test_no_grad_copy_sparse", # setting static member in lifted backward
4323-
"test_node_ordering_when_none_returned", # torch._dynamo.exc.Unsupported: TypeError <built-in method clone
4324-
"test_save_output_nr", # output_nr grad passed as None
4325-
"test_setup_context_when_forward_has_default_args", # autograd.Function with class methods
4326-
# IndexError: list index out of range (NB: x.grad = y where both x and y are input tensors)
4327-
"test_grad_nonleaf_register_hook",
4328-
"test_backward_twice_without_saved_values", # https://github.com/pytorch/pytorch/issues/129938
4329-
# Category: Higher Order Gradients
4330-
"test_default_saved_tensors_hooks_double_backward", # wrong when pack hook returns non-leaf
4331-
"test_saved_variable_packing_unpacking_saved_original_with_hooks", # wrong when pack hook returns non-leaf
4332-
"test_nested_anomaly_detect_nan", # nested anomaly
4333-
"test_select_sum", # batched gradients
4334-
"test_custom_autograd_no_early_free", # batched gradients
4335-
"test_lobpcg", # NaNs
4336-
# Category: Dynamo (pass when directly running CA graph)
4337-
"test_accumulate_grad_tensor_reference", # Out of bounds: frame_state_entry.stride[i] is None
4338-
"test_custom_function_exception", # torch.no_grad(), torch._dynamo.exc.Unsupported: missing: WITH_EXCEPT_START
4339-
"test_to_sparse_backward", # Out of bounds: frame_state_entry.stride[i] is None
4340-
"test_autograd_simple_views_python", # gradient is None
4341-
"test_function_returns_undefined_tensor", # gradient is None
4342-
"test_naughty_autograd_function_stashing_ctx", # bytecode issue
4343-
"test_unrelated_inputs", # gradient batching rule not implemented for aten::sym_size.int
4344-
"test_custom_function_non_tensor_inputs_outputs", # gradient batching rule not implemented for aten::sym_size.int
4345-
"test_return_duplicate", # gradient batching rule not implemented for aten::sym_size.int
4346-
"test_return_duplicate_inplace", # gradient batching rule not implemented for aten::sym_size.int
4347-
"test_setitem", # CopySlices accuracy error
4348-
"test_save_on_cpu_and_checkpoint", # https://github.com/pytorch/pytorch/issues/147565
4349-
"test_checkpoint_detects_non_determinism", # different error
4350-
"test_checkpointing_non_reentrant_autocast_cpu", # saved != recompute
4351-
"test_checkpointing_non_reentrant_autocast_gpu", # saved != recompute
4352-
"test_checkpointing_without_reentrant_saved_object_identity", # same as https://github.com/pytorch/pytorch/issues/136193
4353-
"test_saved_variable_packing_unpacking_did_not_save_original_with_hooks", # register_hooks multiple times
4354-
"test_saved_variable_saved_original_inplace_detach", # RuntimeError not raised
4355-
"test_access_saved_tensor_twice_without_recomputation_works", # saved != recompute
4356-
"test_checkpointing_without_reentrant_dataparallel", # https://github.com/pytorch/pytorch/issues/127115
4357-
"test_checkpointing", # takes very very long
4358-
"test_checkpointing_without_reentrant_input_requires_grad_False", # takes very very long
4359-
"test_checkpointing_without_reentrant_input_requires_grad_True", # takes very very long
4360-
"test_checkpointing_without_reentrant_memory_savings", # takes very very long
4361-
"test_dtensor_different_gradient_placement", # Dynamo failed to run FX node with fake tensors
4362-
"test_dtensor_noncontiguous_output", # Dynamo failed to run FX node with fake tensors
4363-
"test_dtensor_partial_placement_graph_output", # Dynamo failed to run FX node with fake tensors
4364-
"test_unwrap_async_collective_tensor_tangent", # AttributeError: 'PlainTensorMeta' object has no attribute 'attrs'
4365-
# Category: Inductor (pass on backend="aot_eager")
4366-
"test_input_buffer_accum", # does not support sparse_grad=True: https://github.com/pytorch/pytorch/issues/120267
4367-
"test_graph_save_on_cpu", # does not support pin_memory: https://github.com/pytorch/pytorch/issues/134173
4368-
# Category: FakeTensor
4369-
"test_saving_variable_to_disk", # torch.save should no-op and be recorded in the graph
4370-
"test_wrapped_number_saved_tensors_hooks", # Proxy tensor should carryover is_wrapped_number_ of its original
4371-
"test_grad_batched_grad", # torch._subclasses.fake_tensor.UnsupportedFakeTensorException: meta converter nyi
4372-
"test_scalar_grad_mixed_device", # Fake Tensors aren't propagating device properly for 0-dim grads
4373-
# Category: Divergence from eager
4404+
# These tests fail due to difference in semantics that we won't fix
4405+
xfail_divergence_from_eager = {
43744406
"test_invalid_gradients", # can't give autograd error due to inaccurate output metadata of lifted backward
43754407
"test_autograd_node_isinstance", # backward ctx is a fake cls and not directly a Node instance
43764408
"test_backward_hook_relative_ordering", # compiled autograd collects breadth first, and module backward hook not supported
@@ -4382,18 +4414,17 @@ def wrap_test_class(orig_cls):
43824414
"test_function", # different node name: CompiledFunctionBackward
43834415
"test_inplace_on_view_backward", # different node name: CompiledFunctionBackward
43844416
"test_nested_anomaly_printstack_cleanup", # anomaly NaN error message different
4385-
# Uncategorized
43864417
"test_not_implemented_grad", # Dynamo changes the types of exceptions
4387-
"test_grad", # AOT backward higher order gradients
4388-
"test_grad_materialize_grads", # AOT backward higher order gradients
43894418
}
43904419

4420+
skipped_tests = set()
4421+
43914422
if not HAS_CUDA:
43924423
# Found Tesla M60 which is too old to be supported by the triton GPU compiler
4393-
known_failing_tests.add("test_type_conversions")
4424+
skipped_tests.add("test_type_conversions")
43944425

43954426
if IS_S390X:
4396-
known_failing_tests.add("test_deep_reentrant")
4427+
skipped_tests.add("test_deep_reentrant")
43974428

43984429
test_autograd = load_test_module("test_autograd")
43994430
test_custom_ops = load_test_module("test_custom_ops")

0 commit comments

Comments
 (0)