[functorch] fix CI (pytorch/functorch#816)

Samantha Andow · zou3519 · commit b33262d1b276 · 2022-07-20T15:45:00.000-07:00
diff --git a/functorch/test/test_eager_transforms.py b/functorch/test/test_eager_transforms.py
@@ -19,7 +19,6 @@
 from torch.testing._internal.common_utils import IS_WINDOWS
 from functools import partial
 from functorch.experimental import replace_all_batch_norm_modules_
-from contextlib import nullcontext
 
 import functorch
 from functorch import (
@@ -815,7 +814,6 @@ def f2(value):
         self.assertEqual(result, (x <= 0).type_as(x))
 
     def test_tensor_ctor_inside_grad(self, device):
-        self.skipTest("Only fails on CUDA but I can't figure out how to test that")
         def foo(x):
             return x * torch.tensor(2., device=device)
 
diff --git a/functorch/test/test_ops.py b/functorch/test/test_ops.py
@@ -309,7 +309,6 @@ def is_inplace(op, variant):
     skip('pca_lowrank', ''),  # fails on cuda, runs okay on cpu
     skip('svd_lowrank', ''),  # fails on cuda, runs okay on cpu
     skip('nn.functional.dropout2d', ''),  # fails on cuda, runs okay on cpu
-    xfail('__getitem__', device_type='cuda'),
 }
 
 
@@ -318,18 +317,6 @@ class TestOperators(TestCase):
     @skipOps('TestOperators', 'test_grad', vjp_fail.union({
         skip('nn.functional.fractional_max_pool2d'),  # fails on cuda, runs okay on cpu
         skip('nn.functional.fractional_max_pool3d'),  # fails on cuda, runs okay on cpu
-        xfail('__getitem__', 'functorch', device_type='cuda'),
-        xfail('_masked.amax', device_type='cuda'),
-        xfail('_masked.amin', device_type='cuda'),
-        xfail('_masked.log_softmax', device_type='cuda'),
-        xfail('_masked.mean', device_type='cuda'),
-        xfail('_masked.norm', device_type='cuda'),
-        xfail('_masked.prod', device_type='cuda'),
-        xfail('_masked.softmax', device_type='cuda'),
-        xfail('_masked.softmin', device_type='cuda'),
-        xfail('_masked.std', device_type='cuda'),
-        xfail('_masked.sum', device_type='cuda'),
-        xfail('_masked.var', device_type='cuda'),
     }))
     @opsToleranceOverride('TestOperators', 'test_grad', (
         tol1('nn.functional.binary_cross_entropy_with_logits',
@@ -409,16 +396,6 @@ def wrapped_fn(*args, **kwargs):
         skip('nn.functional.max_unpool1d'),  # fails everywhere except on mac
         skip('nn.functional.max_unpool2d'),  # fails everywhere except on windows
         xfail('nn.functional.max_unpool3d'),
-        xfail('__getitem__', device_type='cuda'),
-        xfail('_masked.log_softmax', device_type='cuda'),
-        xfail('_masked.mean', device_type='cuda'),
-        xfail('_masked.norm', device_type='cuda'),
-        xfail('_masked.prod', device_type='cuda'),
-        xfail('_masked.softmax', device_type='cuda'),
-        xfail('_masked.softmin', device_type='cuda'),
-        xfail('_masked.std', device_type='cuda'),
-        xfail('_masked.sum', device_type='cuda'),
-        xfail('_masked.var', device_type='cuda'),
     }))
     @opsToleranceOverride('TestOperators', 'test_jvp', (
         tol1('nn.functional.conv_transpose3d',
@@ -466,19 +443,6 @@ def test_jvp(self, device, dtype, op):
         xfail('nn.functional.dropout2d', ''),
         xfail('nn.functional.feature_alpha_dropout', 'without_train'),
         xfail('svd_lowrank', ''),
-
-        xfail('__getitem__', 'functorch', device_type='cuda'),
-        xfail('_masked.amax', device_type='cuda'),
-        xfail('_masked.amin', device_type='cuda'),
-        xfail('_masked.log_softmax', device_type='cuda'),
-        xfail('_masked.mean', device_type='cuda'),
-        xfail('_masked.norm', device_type='cuda'),
-        xfail('_masked.prod', device_type='cuda'),
-        xfail('_masked.softmax', device_type='cuda'),
-        xfail('_masked.softmin', device_type='cuda'),
-        xfail('_masked.std', device_type='cuda'),
-        xfail('_masked.sum', device_type='cuda'),
-        xfail('_masked.var', device_type='cuda'),
     }))
     @opsToleranceOverride('TestOperators', 'test_vjp', (
         tol1('nn.functional.conv_transpose3d',
@@ -524,19 +488,6 @@ def _test(_op):
         skip('nn.functional.fractional_max_pool2d'), # randomness
         skip('nn.functional.fractional_max_pool3d'), # randomness
         xfail('nn.functional.binary_cross_entropy'),  # testing problem
-
-        xfail('__getitem__', 'functorch', device_type='cuda'),
-        xfail('_masked.amax', device_type='cuda'),
-        xfail('_masked.amin', device_type='cuda'),
-        xfail('_masked.log_softmax', device_type='cuda'),
-        xfail('_masked.mean', device_type='cuda'),
-        xfail('_masked.norm', device_type='cuda'),
-        xfail('_masked.prod', device_type='cuda'),
-        xfail('_masked.softmax', device_type='cuda'),
-        xfail('_masked.softmin', device_type='cuda'),
-        xfail('_masked.std', device_type='cuda'),
-        xfail('_masked.sum', device_type='cuda'),
-        xfail('_masked.var', device_type='cuda'),
     }))
     @opsToleranceOverride('TestOperators', 'test_vjpvjp', (
         tol1('nn.functional.conv_transpose3d',
@@ -672,19 +623,6 @@ def vjp_of_vjp(*args_and_cotangents):
         # NYI: querying is_contiguous inside of vmap for memory_format other than torch.contiguous_format
         xfail('nn.functional.max_unpool2d'),
         xfail('nn.functional.max_unpool2d', 'grad'),
-
-        xfail('__getitem__', 'functorch', device_type='cuda'),
-        xfail('_masked.amax', device_type='cuda'),
-        xfail('_masked.amin', device_type='cuda'),
-        xfail('_masked.log_softmax', device_type='cuda'),
-        xfail('_masked.mean', device_type='cuda'),
-        xfail('_masked.norm', device_type='cuda'),
-        xfail('_masked.prod', device_type='cuda'),
-        xfail('_masked.softmax', device_type='cuda'),
-        xfail('_masked.softmin', device_type='cuda'),
-        xfail('_masked.std', device_type='cuda'),
-        xfail('_masked.sum', device_type='cuda'),
-        xfail('_masked.var', device_type='cuda'),
     })
 
     @ops(functorch_lagging_op_db + additional_op_db, allowed_dtypes=(torch.float,))
@@ -774,18 +712,8 @@ def test_vmapvjp(self, device, dtype, op):
         xfail('nn.functional.max_unpool2d'),
         xfail('nn.functional.max_unpool3d'),
 
-        xfail('__getitem__', device_type='cuda'),
-        xfail('_masked.amax', device_type='cuda'),
-        xfail('_masked.amin', device_type='cuda'),
-        xfail('_masked.log_softmax', device_type='cuda'),
-        xfail('_masked.mean', device_type='cuda'),
-        xfail('_masked.norm', device_type='cuda'),
-        xfail('_masked.prod', device_type='cuda'),
-        xfail('_masked.softmax', device_type='cuda'),
-        xfail('_masked.softmin', device_type='cuda'),
-        xfail('_masked.std', device_type='cuda'),
-        xfail('_masked.sum', device_type='cuda'),
-        xfail('_masked.var', device_type='cuda'),
+        xfail('nn.functional.embedding'),  # embedding_renorm_ does not support fwd AD
+        xfail('put'),  # calls put_ during vmap with only vmaps over other, not self
     })
     def test_vmapjvp(self, device, dtype, op):
         if is_inplace(op, op.get_op()):
@@ -820,15 +748,13 @@ def test_vmapjvp(self, device, dtype, op):
 
         # The following are bugs that we should fix
         skip('nn.functional.max_pool1d'),  # fails on cpu, runs on cuda
-        xfail('_masked.mean', device_type='cuda'),
-        xfail('_masked.prod', device_type='cuda'),
         xfail('nn.functional.batch_norm', device_type='cuda'),
         xfail('nn.functional.batch_norm', 'without_cudnn', device_type='cuda'),
         xfail('nn.functional.hinge_embedding_loss', device_type='cuda'),
+        xfail('_masked.mean'),
+        xfail('_masked.prod'),
 
         # Causing issues with multiple cpu levels of forward mode AD
-        xfail('_masked.mean', device_type='cpu'),
-        xfail('_masked.prod', device_type='cpu'),
         xfail('nn.functional.batch_norm', device_type='cpu'),
         xfail('nn.functional.hinge_embedding_loss', device_type='cpu'),
 
@@ -863,18 +789,9 @@ def test_vmapjvp(self, device, dtype, op):
         xfail('nn.functional.max_unpool2d'),
         xfail('nn.functional.max_unpool3d'),
 
-        xfail('__getitem__', device_type='cuda'),
-        xfail('_masked.amax', device_type='cuda'),
-        xfail('_masked.amin', device_type='cuda'),
-        xfail('_masked.log_softmax', device_type='cuda'),
-        xfail('_masked.mean', device_type='cuda'),
-        xfail('_masked.norm', device_type='cuda'),
-        xfail('_masked.prod', device_type='cuda'),
-        xfail('_masked.softmax', device_type='cuda'),
-        xfail('_masked.softmin', device_type='cuda'),
-        xfail('_masked.std', device_type='cuda'),
-        xfail('_masked.sum', device_type='cuda'),
-        xfail('_masked.var', device_type='cuda'),
+        xfail('nn.functional.embedding'),  # embedding_renorm_ does not support fwd AD
+        xfail('put'),  # calls put_ during vmap with only vmaps over other, not self
+        xfail('nn.functional.prelu'),  # Call Tensor.as_strided
     }
 
     @ops(functorch_lagging_op_db, allowed_dtypes=(torch.float,))
@@ -962,6 +879,7 @@ def test_vmapjvpall(self, device, dtype, op):
         xfail('nn.functional.max_unpool1d', 'grad'),
         xfail('lu_unpack'),
         xfail('nn.functional.glu'),
+        xfail('nn.functional.bilinear'),  # trilinear doesn't have batching rule
     }))
     @toleranceOverride({torch.float32: tol(atol=1e-04, rtol=1e-04)})
     def test_vmapjvpall_has_batch_rule(self, device, dtype, op):
@@ -1222,11 +1140,9 @@ def test_vjpvmap(self, device, dtype, op):
         xfail('nansum', ''),
         xfail('nn.functional.batch_norm', ''),
         xfail('nn.functional.batch_norm', 'without_cudnn', device_type='cuda'),
-        xfail('nn.functional.bilinear', ''),
         xfail('nn.functional.embedding', ''),
         xfail('nn.functional.embedding', 'functorch'),
         xfail('nn.functional.embedding_bag', ''),
-        xfail('nn.functional.glu', ''),
         xfail('nn.functional.grid_sample', ''),
         xfail('nn.functional.hardsigmoid', ''),
         xfail('nn.functional.hardswish', ''),
@@ -1239,11 +1155,9 @@ def test_vjpvmap(self, device, dtype, op):
         xfail('nn.functional.softmin', ''),
         xfail('nn.functional.softmin', 'with_dtype'),
         xfail('nn.functional.softplus', ''),
-        xfail('put', ''),
         xfail('renorm', ''),
         xfail('std_mean', ''),
         xfail('symeig', ''),
-        xfail('take', ''),
         xfail('var_mean', ''),
         xfail('nn.functional.feature_alpha_dropout', 'with_train'),
         xfail('nn.functional.kl_div', ''),
@@ -1264,18 +1178,6 @@ def test_vjpvmap(self, device, dtype, op):
         xfail('scatter_reduce', 'prod'),
         skip('linalg.householder_product', '', device_type='cuda'),  # flaky, I'm not sure why
         xfail('nn.functional.binary_cross_entropy_with_logits'),
-        xfail('__getitem__', 'functorch', device_type='cuda'),
-        xfail('_masked.amax', device_type='cuda'),
-        xfail('_masked.amin', device_type='cuda'),
-        xfail('_masked.log_softmax', device_type='cuda'),
-        xfail('_masked.mean', device_type='cuda'),
-        xfail('_masked.norm', device_type='cuda'),
-        xfail('_masked.prod', device_type='cuda'),
-        xfail('_masked.softmax', device_type='cuda'),
-        xfail('_masked.softmin', device_type='cuda'),
-        xfail('_masked.std', device_type='cuda'),
-        xfail('_masked.sum', device_type='cuda'),
-        xfail('_masked.var', device_type='cuda'),
     }))
     def test_jvpvjp(self, device, dtype, op):
         if not op.supports_autograd:
diff --git a/functorch/test/test_pythonkey.py b/functorch/test/test_pythonkey.py
@@ -376,7 +376,6 @@ class TestEagerFusionOpInfo(TestCase):
         xfail('linalg.householder_product'),
         xfail('logit'),
         xfail('matrix_exp'),
-        xfail('trace'),
         xfail('trapezoid'),
         xfail('trapz'),
         skip('nn.functional.binary_cross_entropy_with_logits'),  # seems to fail sometimes?