max_pool2d_backward batch rule

zou3519 · zou3519 · commit 254d4e6715bf · 2021-10-12T16:49:22.000-07:00
diff --git a/functorch/csrc/BatchRulesHelper.h b/functorch/csrc/BatchRulesHelper.h
@@ -264,6 +264,71 @@ inline void boxed_existing_bdim_all_batch_rule(
 #define EXISTING_BDIM_ALL_BOXED(op) \
   m.impl(#op, torch::CppFunction::makeFromBoxedFunction<boxed_existing_bdim_all_batch_rule>());
 
+template <int64_t feature_rank>
+inline void boxed_all_tensors_have_optional_bdim(
+    const c10::OperatorHandle& op, torch::jit::Stack* stack) {
+  const auto& schema = op.schema();
+  const auto num_returns = schema.returns().size();
+  const auto num_arguments = schema.arguments().size();
+
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+
+  int64_t args_begin = stack->size() - num_arguments;
+  SmallVector<UnpackedBatchedTensor, 5> tensor_inputs;
+  SmallVector<int64_t, 5> tensor_pos;
+  int64_t batch_size;
+
+  find_and_unpack_tensors(
+      stack, num_arguments, cur_level,
+      &tensor_inputs, &tensor_pos, &batch_size);
+
+  optional<bool> is_no_batch_dim_case;
+
+  for (const auto tensor_idx : c10::irange(0, tensor_inputs.size())) {
+    const auto& value = std::get<0>(tensor_inputs[tensor_idx]);
+    auto bdim = std::get<1>(tensor_inputs[tensor_idx]);
+    const auto logical_rank = rankWithoutBatchDim(value, bdim);
+
+    if (!is_no_batch_dim_case.has_value()) {
+      is_no_batch_dim_case = (logical_rank == feature_rank);
+    }
+    auto value_ = ensure_has_bdim(value, bdim.has_value(), batch_size);
+    if (!bdim.has_value()) {
+      bdim = 0;
+    }
+    if (*is_no_batch_dim_case) {
+      TORCH_INTERNAL_ASSERT(logical_rank == feature_rank);
+      (*stack)[args_begin + tensor_pos[tensor_idx]] = moveBatchDimToFront(value_, bdim);
+      continue;
+    }
+    TORCH_INTERNAL_ASSERT(logical_rank == feature_rank + 1);
+    (*stack)[args_begin + tensor_pos[tensor_idx]] = reshape_dim_into(*bdim, 0, value_);
+  }
+
+  op.callBoxed(stack);
+
+  for (const auto idx : c10::irange(args_begin, args_begin + num_returns)) {
+    const auto& ret = (*stack)[idx];
+    TORCH_INTERNAL_ASSERT(ret.isTensor(),
+        "This boxed batching rule does not currently support ops that return non-tensor values");
+    if (*is_no_batch_dim_case) {
+      (*stack)[idx] = makeBatched(ret.toTensor(), 0, cur_level);
+    } else {
+      (*stack)[idx] = makeBatched(reshape_dim_outof(0, batch_size, ret.toTensor()), 0, cur_level);
+    }
+  }
+}
+
+// Useful for many NN operators.
+// The operator must satisfy the following:
+// - All arguments must accept an optional batch dim.
+// - All arguments must be the same rank
+#define ALL_TENSORS_HAVE_OPTIONAL_BDIM_BOXED(feature_rank, op) \
+  m.impl(#op, torch::CppFunction::makeFromBoxedFunction<boxed_all_tensors_have_optional_bdim<feature_rank>>());
+
 template <typename A, A a, typename C>
 struct ExistingBdimBatchRuleHelper;
 
@@ -304,5 +369,29 @@ Tensor& unary_inplace_batch_rule(Tensor& self, optional<int64_t>, ExtraArgs... e
   return self;
 }
 
+inline int64_t get_bdim_size3(
+    const Tensor& a_value, optional<int64_t> a_bdim,
+    const Tensor& b_value, optional<int64_t> b_bdim,
+    const Tensor& c_value, optional<int64_t> c_bdim) {
+  if (a_bdim)
+    return a_value.size(*a_bdim);
+  if (b_bdim)
+    return b_value.size(*b_bdim);
+  if (c_bdim)
+    return c_value.size(*c_bdim);
+  TORCH_INTERNAL_ASSERT(false);
+}
+
+inline int64_t get_bdim_size2(
+    const Tensor& a_value, optional<int64_t> a_bdim,
+    const Tensor& b_value, optional<int64_t> b_bdim) {
+  if (a_bdim)
+    return a_value.size(*a_bdim);
+  if (b_bdim)
+    return b_value.size(*b_bdim);
+  TORCH_INTERNAL_ASSERT(false);
+}
+
+
 }}
 
diff --git a/functorch/csrc/BatchRulesPooling.cpp b/functorch/csrc/BatchRulesPooling.cpp
@@ -11,57 +11,6 @@
 
 namespace at { namespace functorch {
 
-std::tuple<Tensor,int64_t> max_pool2d_with_indices_backward_batch_rule(
-    const Tensor & grad_output, optional<int64_t> grad_output_bdim,
-    const Tensor & self, optional<int64_t> self_bdim,
-    IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding,
-    IntArrayRef dilation, bool ceil_mode,
-    const Tensor & indices, optional<int64_t> indices_bdim) {
-  TORCH_INTERNAL_ASSERT(grad_output_bdim && self_bdim && indices_bdim);
-
-  auto bdim_size = self.size(*self_bdim);
-  auto grad_output_ = reshape_dim_into(*grad_output_bdim, 0, grad_output);
-  auto self_ = reshape_dim_into(*self_bdim, 0, self);
-  auto indices_ = reshape_dim_into(*indices_bdim, 0, indices);
-
-  auto result = at::max_pool2d_with_indices_backward(
-      grad_output_, self_, kernel_size, stride, padding, dilation, ceil_mode,
-      indices_);
-
-  result = reshape_dim_outof(0, bdim_size, result);
-  return std::make_tuple(result, 0);
-}
-
-Tensor max_pool2d_with_indices_backward_plumbing(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, const Tensor & indices) {
-  auto maybe_layer = maybeCurrentDynamicLayer();
-  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
-  int64_t cur_level = maybe_layer->layerId();
-
-  Tensor grad_output_value;
-  optional<int64_t> grad_output_bdim;
-  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
-  Tensor self_value;
-  optional<int64_t> self_bdim;
-  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
-  Tensor indices_value;
-  optional<int64_t> indices_bdim;
-  std::tie(indices_value, indices_bdim) = unwrapTensorAtLevel(indices, cur_level);
-
-  if (self_bdim && grad_output_bdim && indices_bdim) {
-    c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
-    auto result = max_pool2d_with_indices_backward_batch_rule(
-        grad_output_value, grad_output_bdim,
-        self_value, self_bdim,
-        kernel_size, stride, padding, dilation, ceil_mode,
-        indices_value, indices_bdim);
-    return makeBatched(std::get<0>(result), std::get<1>(result), cur_level);
-  }
-
-  static auto op = c10::Dispatcher::singleton()
-    .findSchemaOrThrow("aten::max_pool2d_with_indices_backward", "");
-  return slow_fallback<Tensor>(op, { grad_output, self, kernel_size, stride, padding, dilation, ceil_mode, indices });
-}
-
 std::tuple<Tensor,optional<int64_t>,Tensor,optional<int64_t>>
 max_pool2d_with_indices_batch_rule(
     const Tensor& self, optional<int64_t> self_bdim,
@@ -89,9 +38,9 @@ max_pool2d_with_indices_batch_rule(
 TORCH_LIBRARY_IMPL(aten, FT_BATCHED_KEY, m) {
   EXISTING_BDIM(_adaptive_avg_pool2d);
   EXISTING_BDIM(avg_pool2d);
-  m.impl("max_pool2d_with_indices_backward", max_pool2d_with_indices_backward_plumbing);
   EXISTING_BDIM_ALL_BOXED(avg_pool2d_backward);
   VMAP_SUPPORT("max_pool2d_with_indices", max_pool2d_with_indices_batch_rule);
+  ALL_TENSORS_HAVE_OPTIONAL_BDIM_BOXED(3, max_pool2d_with_indices_backward);
 }
 
 }}
diff --git a/test/test_ops.py b/test/test_ops.py
@@ -473,7 +473,6 @@ def test_vmapvjp(self, device, dtype, op):
         xfail('nn.functional.nll_loss'),
         xfail('block_diag'),
         xfail('nn.functional.dropout'),
-        xfail('nn.functional.max_pool2d'),
         xfail('nn.functional.batch_norm'),
     })
     def test_vmapvjp_has_batch_rule(self, device, dtype, op):
diff --git a/test/test_vmap.py b/test/test_vmap.py
@@ -3083,7 +3083,6 @@ def test_vmap_exhaustive(self, device, dtype, op):
         xfail('renorm'),
         xfail('repeat_interleave'),
         xfail('resize_as_'),
-        xfail('scatter'),
         xfail('take'),
         xfail('take_along_dim'),
         xfail('tensor_split'),