Fix convolution batch rule in the transpose case (#345)

zou3519 · web-flow · commit 5304c81e7417 · 2021-12-15T09:21:55.000-05:00
We were making wrong assumptions about where the input_channels /
output_channels were in the weight tensor and where the groups dimension
gets included.

Test Plan:
- run tests
diff --git a/functorch/csrc/BatchRulesDecompositions.cpp b/functorch/csrc/BatchRulesDecompositions.cpp
@@ -224,6 +224,9 @@ TORCH_LIBRARY_IMPL(aten, FT_BATCHED_KEY, m) {
   OP_DECOMPOSE(arctan2);
   OP_DECOMPOSE(layer_norm);
   OP_DECOMPOSE(diag_backward);
+  OP_DECOMPOSE(conv_transpose1d);
+  OP_DECOMPOSE2(conv_transpose2d, input);
+  OP_DECOMPOSE2(conv_transpose3d, input);
   DECOMPOSE_FUNCTIONAL(diag_embed);
   DECOMPOSE_FUNCTIONAL(block_diag);
 }
diff --git a/functorch/csrc/BatchRulesModules.cpp b/functorch/csrc/BatchRulesModules.cpp
@@ -15,10 +15,14 @@ namespace at { namespace functorch {
 // Does not support batch_group_count (needed for convolution backwards)
 std::tuple<Tensor,optional<int64_t>>
 convolution_batch_rule(const Tensor& lhs, optional<int64_t> lhs_bdim, const Tensor& rhs, optional<int64_t> rhs_bdim, const optional<Tensor>& bias, optional<int64_t> bias_bdim, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool transposed, IntArrayRef output_padding, int64_t groups) {
-  std::vector<int64_t> lhs_spec(stride.size() + 2);
+  DimVector lhs_spec(stride.size() + 2);
   std::iota(lhs_spec.begin(), lhs_spec.end(), 0);
-  std::vector<int64_t> rhs_spec = lhs_spec;
-  std::vector<int64_t> out_spec = lhs_spec;
+  DimVector rhs_spec = lhs_spec;
+  DimVector out_spec = lhs_spec;
+  if (transposed) {
+    rhs_spec[0] = 1;
+    rhs_spec[1] = 0;
+  }
 
   // If we have a batched bias or weight, we need to perform the computation separately.
   optional<Tensor> unbatched_bias;
@@ -45,7 +49,8 @@ convolution_batch_rule(const Tensor& lhs, optional<int64_t> lhs_bdim, const Tens
       out = reshape_dim_outof(out_spec[1], rhs.sizes()[*rhs_bdim], out);
       result = std::make_tuple(out, out_spec[1]);
     } else {
-      auto new_w = reshape_dim_outof(rhs_spec[0] + (*rhs_bdim <= rhs_spec[0]), groups, rhs);
+      auto dim_with_groups = transposed ? 1 : 0;
+      auto new_w = reshape_dim_outof(rhs_spec[dim_with_groups] + (*rhs_bdim <= rhs_spec[0]), groups, rhs);
       new_w = reshape_dim_into(*rhs_bdim + (rhs_spec[0] < rhs_bdim), rhs_spec[0] + 1, new_w);
       new_w = reshape_dim_into(rhs_spec[0], rhs_spec[0], new_w);
       auto out = at::convolution(lhs, new_w, unbatched_bias, stride, padding, dilation, transposed, output_padding, groups);
@@ -57,7 +62,8 @@ convolution_batch_rule(const Tensor& lhs, optional<int64_t> lhs_bdim, const Tens
   } else if (lhs_bdim && rhs_bdim) {
     auto new_x = reshape_dim_into(*lhs_bdim, lhs_spec[1], lhs);
     groups *= lhs.sizes()[*lhs_bdim];
-    auto new_w = reshape_dim_into(*rhs_bdim, rhs_spec[0], rhs);
+    auto dim_with_groups = transposed ? 1 : 0;
+    auto new_w = reshape_dim_into(*rhs_bdim, rhs_spec[dim_with_groups], rhs);
     auto out = at::convolution(new_x, new_w, unbatched_bias, stride, padding, dilation, transposed, output_padding, groups);
     out = reshape_dim_outof(out_spec[1], lhs.sizes()[*lhs_bdim], out);
     result = std::make_tuple(out, out_spec[1]);
diff --git a/test/test_ops.py b/test/test_ops.py
@@ -467,7 +467,6 @@ def vjp_of_vjp(*args_and_cotangents):
         xfail('symeig'),
         xfail('take'),
         xfail('linalg.tensorinv'),
-        xfail('nn.functional.conv_transpose2d', device_type='cuda'),
         xfail('nanmean'),
         xfail('block_diag'),
         xfail('nn.functional.dropout'),
@@ -487,13 +486,6 @@ def vjp_of_vjp(*args_and_cotangents):
         xfail('nn.functional.fractional_max_pool3d'),
         xfail('as_strided'),
         xfail('nn.functional.fractional_max_pool2d'),
-
-        # PyTorch changed its convolution recently.
-        # Maybe it is responsible for all of the following changes.
-        xfail('nn.functional.conv_transpose1d'),
-        xfail('nn.functional.conv_transpose2d'),
-        xfail('nn.functional.conv_transpose3d'),
-
     })
     @ops(functorch_lagging_op_db + additional_op_db, allowed_dtypes=(torch.float,))
     @skipOps('TestOperators', 'test_vmapvjp', vmapvjp_fail)
diff --git a/test/test_vmap.py b/test/test_vmap.py
@@ -3197,7 +3197,6 @@ def test_vmap_exhaustive(self, device, dtype, op):
         xfail('masked_scatter'),
         xfail('masked_select'),
         xfail('nanquantile'),
-        xfail('nn.functional.conv_transpose2d'),
         xfail('norm', 'fro'),
         xfail('norm', 'nuc'),
         xfail('ormqr'),
@@ -3265,7 +3264,6 @@ def test_vmap_exhaustive(self, device, dtype, op):
         xfail('nn.functional.poisson_nll_loss'),
         xfail('nn.functional.max_pool3d'),
         xfail('histc'),
-        xfail('nn.functional.conv_transpose1d'),
         xfail('as_strided'),
         xfail('istft'),
         xfail('nonzero'),
@@ -3282,7 +3280,6 @@ def test_vmap_exhaustive(self, device, dtype, op):
         xfail('isclose'),
         xfail('cartesian_prod'),
         xfail('nn.functional.fractional_max_pool3d'),
-        xfail('nn.functional.conv_transpose3d'),
         xfail('nn.functional.rrelu'),
         xfail('nn.functional.bilinear'),
         xfail('nn.functional.embedding_bag'),