Make it so that vmap tests generate with bdim=-1 as well as 0 (#204)

zou3519 · web-flow · commit 5497d757d3d0 · 2021-10-13T21:05:08.000-04:00
Fixes #62. There are a lot of new xfails.
diff --git a/functorch/csrc/BatchRulesLinearAlgebra.cpp b/functorch/csrc/BatchRulesLinearAlgebra.cpp
@@ -175,6 +175,7 @@ TORCH_LIBRARY_IMPL(aten, FT_BATCHED_KEY, m) {
   VARIADIC_BDIMS(logdet);
   VARIADIC_BDIMS(matrix_exp);
   VARIADIC_BDIMS(pinverse);
+  VARIADIC_BDIMS(inverse);
   VARIADIC_BDIMS_BOXED(slogdet);
   VARIADIC_BDIMS_BOXED(_svd_helper);
   VARIADIC_BDIMS_BOXED(solve);
diff --git a/functorch/csrc/BatchRulesReduceOps.cpp b/functorch/csrc/BatchRulesReduceOps.cpp
@@ -271,8 +271,8 @@ std::tuple<Tensor, optional<int64_t>, Tensor, optional<int64_t>> aminmax_batchin
     dim = maybe_wrap_dim(dim.value(), logical_rank) + 1;
   } else {
     // flatten the input except for batch-dim
-    auto bsize = self.size(0);
-    self_ = self.view({bsize, -1});
+    auto bsize = self_.size(0);
+    self_ = self_.view({bsize, -1});
     dim = 1;
   }
 
diff --git a/functorch/csrc/BatchRulesUnaryOps.cpp b/functorch/csrc/BatchRulesUnaryOps.cpp
@@ -39,7 +39,6 @@ TORCH_LIBRARY_IMPL(aten, FT_BATCHED_KEY, m) {
   UNARY_POINTWISE_ALL(floor);
   UNARY_POINTWISE_ALL(frac);
   UNARY_POINTWISE(glu);
-  UNARY_POINTWISE(inverse);
   UNARY_POINTWISE(isfinite);
   UNARY_POINTWISE(isnan);
   UNARY_POINTWISE(isposinf);
diff --git a/functorch/csrc/BatchRulesViews.cpp b/functorch/csrc/BatchRulesViews.cpp
@@ -11,7 +11,7 @@
 #include <functorch/csrc/BatchedFallback.h>
 #include <ATen/core/dispatch/Dispatcher.h>
 #include <c10/util/SmallBuffer.h>
-
+#include <ATen/InferSize.h>
 
 namespace at { namespace functorch {
 
@@ -134,10 +134,21 @@ std::tuple<Tensor,optional<int64_t>> _unsafe_view_batch_rule(
     const Tensor& self,
     optional<int64_t> self_bdim,
     IntArrayRef size) {
+  auto self_ = moveBatchDimToFront(self, self_bdim);
   VmapDimVector view_size(size);
-  view_size.insert(view_size.begin() + *self_bdim, self.size(*self_bdim));
-
-  return std::make_tuple(at::_unsafe_view(self, view_size), self_bdim);
+  view_size.insert(view_size.begin(), self_.size(0));
+
+  // See if the view is valid. If it's not, then we copy.
+  // It's OK to copy, because _unsafe_view(x) guarantees that x isn't used
+  // anymore.
+  const at::DimVector inferred_size = at::infer_size_dv(view_size, self_.numel());
+  const auto stride = at::detail::computeStride(self_.sizes(),
+                                                self_.strides(),
+                                                inferred_size);
+  if (!stride.has_value()) {
+    self_ = self_.contiguous();
+  }
+  return std::make_tuple(at::_unsafe_view(self_, view_size), 0);
 }
 
 Tensor trace_decomp(const Tensor& self) {
@@ -276,11 +287,11 @@ std::tuple<Tensor, optional<int64_t>> _reshape_alias_batch_rule(const Tensor& se
   (void) strides;
   TORCH_INTERNAL_ASSERT(bdim.has_value());
 
+  auto self_ = moveBatchDimToFront(self, bdim);
   c10::SmallBuffer<int64_t, 5> new_shape(shape.size() + 1);
-  new_shape[*bdim] = self.size(*bdim);
-  std::copy(shape.begin(), shape.begin() + *bdim, new_shape.begin());
-  std::copy(shape.begin() + *bdim, shape.end(), new_shape.begin() + *bdim + 1);
-  return std::make_tuple(at::reshape(self, new_shape), bdim);
+  new_shape[0] = self_.size(0);
+  std::copy(shape.begin(), shape.end(), new_shape.begin() + 1);
+  return std::make_tuple(at::reshape(self_, new_shape), 0);
 }
 
 std::tuple<Tensor, optional<int64_t>> roll_batch_rule(const Tensor& self, optional<int64_t> bdim, IntArrayRef shifts, IntArrayRef dims) {
diff --git a/test/common_utils.py b/test/common_utils.py
@@ -37,32 +37,39 @@ def loop(op, in_dims, out_dim, batch_size, *batched_args, **kwarg_values):
 
 def get_exhaustive_batched_inputs(arg_values, kwarg_values, batch_size=3):
     def add_batch_dim(arg, bdim, batch_size=3):
+        assert bdim == 0 or bdim == -1
         if isinstance(arg, torch.Tensor):
-            shape = [1] * len(arg.shape)
-            shape.insert(bdim, batch_size)
-            return (arg.repeat(shape), bdim)
+            if bdim == 0:
+                shape = [1] * len(arg.shape)
+                shape.insert(bdim, batch_size)
+                return (arg.repeat(shape), bdim)
+            if bdim == -1:
+                arg = arg.unsqueeze(-1).expand(*arg.shape, batch_size).contiguous()
+                return (arg, bdim)
+            assert False
         else:
             return (arg, None)
 
-    batch_choices = []
-    def add_batch_choices(a):
-        if isinstance(a, torch.Tensor):
-            batched_val = add_batch_dim(a, 0, batch_size)
-            batch_choices.append((batched_val, (a, None)))
-        else:
-            batch_choices.append(((a, None),))
+    for bdim in [0, -1]:
+        batch_choices = []
+        def add_batch_choices(a):
+            if isinstance(a, torch.Tensor):
+                batched_val = add_batch_dim(a, bdim, batch_size)
+                batch_choices.append((batched_val, (a, None)))
+            else:
+                batch_choices.append(((a, None),))
 
-    flat_args, arg_spec = pytree.tree_flatten(tuple(arg_values))
-    for arg in flat_args:
-        add_batch_choices(arg)
+        flat_args, arg_spec = pytree.tree_flatten(tuple(arg_values))
+        for arg in flat_args:
+            add_batch_choices(arg)
 
-    for batched_values in itertools.product(*batch_choices):
-        batched_args, in_dims = zip(*batched_values)
+        for batched_values in itertools.product(*batch_choices):
+            batched_args, in_dims = zip(*batched_values)
 
-        if all([i is None for i in in_dims]):
-            continue
+            if all([i is None for i in in_dims]):
+                continue
 
-        yield pytree.tree_unflatten(batched_args, arg_spec), pytree.tree_unflatten(in_dims, arg_spec), kwarg_values
+            yield pytree.tree_unflatten(batched_args, arg_spec), pytree.tree_unflatten(in_dims, arg_spec), kwarg_values
 
 
 def get_fallback_and_vmap_exhaustive(op, arg_values, kwarg_values, compute_loop_out=True):
diff --git a/test/test_ops.py b/test/test_ops.py
@@ -307,12 +307,24 @@ def vjp_of_vjp(*args_and_cotangents):
 
     @ops(functorch_lagging_op_db + additional_op_db, allowed_dtypes=(torch.float,))
     @skipOps('TestOperators', 'test_vmapvjp', vjp_fail.union({
+        # All of the following are bugs and need to be fixed
         xfail('clamp', ''),
         xfail('diag_embed'),
         xfail('eig'),
+        xfail('matrix_exp'),
+        xfail('nn.functional.conv_transpose2d'),
+        xfail('nn.functional.pad', 'constant'),
+        xfail('view_as_complex'),
+        xfail('fft.fft'),
+        xfail('fft.ifft'),
+        xfail('fft.ihfft'),
         xfail('fft.ihfft'),
         xfail('fft.rfft'),
+        xfail('fft.rfft'),
+        xfail('fft.fftn'),
         xfail('fft.rfftn'),
+        xfail('fft.ifftn'),
+        xfail('cdist'),
         xfail('fmax'),
         xfail('fmin'),
         xfail('index_add'),
@@ -373,6 +385,8 @@ def test_vmapvjp(self, device, dtype, op):
 
     @ops(functorch_lagging_op_db + additional_op_db, allowed_dtypes=(torch.float,))
     @skipOps('TestOperators', 'test_vmapvjp_has_batch_rule', {
+        xfail('nn.functional.pad', 'constant'),
+        xfail('view_as_complex'),
         xfail('__getitem__'),
         xfail('__rpow__'),
         xfail('cdist'),
@@ -388,9 +402,14 @@ def test_vmapvjp(self, device, dtype, op):
         xfail('diag'),
         xfail('diag_embed'),
         xfail('eig'),
+        xfail('fft.fft'),
+        xfail('fft.fftn'),
+        xfail('fft.ifft'),
+        xfail('fft.ifftn'),
         xfail('fft.ihfft'),
         xfail('fft.rfft'),
         xfail('fft.rfftn'),
+        xfail('cdist'),
         xfail('fill_'),
         xfail('float_power'),
         xfail('fmax'),
@@ -500,6 +519,7 @@ def test():
 
     @ops(functorch_lagging_op_db + additional_op_db, allowed_dtypes=(torch.float,))
     @skipOps('TestOperators', 'test_vjpvmap', vjp_fail.union({
+        # All of the following are bugs and need to be fixed
         xfail('__getitem__'),
         xfail('clamp', ''),
         xfail('dsplit'),
@@ -518,6 +538,11 @@ def test():
         xfail('block_diag'),
         xfail('nn.functional.batch_norm'),
         xfail('nn.functional.nll_loss'),
+        xfail('cdist'),
+        xfail('lu_solve'),
+        xfail('lu_unpack'),
+        xfail('matrix_exp'),
+        xfail('view_as_complex'),
     }))
     def test_vjpvmap(self, device, dtype, op):
         # NB: there is no vjpvmap_has_batch_rule test because that is almost
diff --git a/test/test_vmap.py b/test/test_vmap.py
@@ -2989,6 +2989,7 @@ class TestVmapOperatorsOpInfo(TestCase):
         xfail('gradient'),
         xfail('hsplit'),
         xfail('nn.functional.pad', 'circular'),
+        xfail('resize_'),
         xfail('resize_as_'),
         xfail('tensor_split'),
         xfail('to_sparse'),
@@ -3000,15 +3001,24 @@ class TestVmapOperatorsOpInfo(TestCase):
         xfail('nanmean'),
         xfail('block_diag'),
         xfail('nn.functional.dropout'),
+        xfail('view_as_complex'),
 
         # entries in here don't work and need to be fixed.
         # Each one of these is a bug
         xfail('unfold'),
         xfail('svd', device_type='cuda'),
         xfail('linalg.svd', device_type='cuda'),
         xfail('index_put'),
+        xfail('matrix_exp'),
+        xfail('fft.fft'),
+        xfail('fft.ifft'),
+        xfail('fft.ihfft'),
+        xfail('fft.rfft'),
+        xfail('fft.rfftn'),
         xfail('nn.functional.batch_norm'),
         xfail('nn.functional.nll_loss'),
+        xfail('lu_unpack'),
+        xfail('nn.functional.pad', 'constant'),
     })
     def test_vmap_exhaustive(self, device, dtype, op):
         sample_inputs_itr = op.sample_inputs(device, dtype, requires_grad=False)
@@ -3105,6 +3115,9 @@ def test_vmap_exhaustive(self, device, dtype, op):
         xfail('nn.functional.dropout'),
         xfail('nn.functional.conv2d', ''),
         xfail('nn.functional.batch_norm'),
+        xfail('resize_'),
+        xfail('view_as_complex'),
+        xfail('matrix_exp'),
     })
     def test_op_has_batch_rule(self, device, dtype, op):
         def test():