Batch rule for avg_pool2d_backward

zou3519 · zou3519 · commit 1fb0a0ff08c8 · 2021-10-06T18:54:07.000-07:00
Boxed kernels take a lot of lines to write... Maybe we can make these
easier to write.
diff --git a/functorch/csrc/BatchRulesHelper.h b/functorch/csrc/BatchRulesHelper.h
@@ -32,6 +32,19 @@ void vmapIncompatibleInplaceError(const char* schema_name);
 
 Tensor maybePadToLogicalRank(const Tensor& tensor, optional<int64_t> has_bdim, int64_t logical_rank);
 
+inline Tensor ensure_has_bdim(const Tensor& tensor, bool has_bdim, int64_t batch_size) {
+  if (has_bdim) {
+    return tensor;
+  }
+  const auto sizes = tensor.sizes();
+  DimVector expanded_shape;
+  expanded_shape.reserve(sizes.size());
+  expanded_shape.emplace_back(batch_size);
+  expanded_shape.insert(expanded_shape.end(), sizes.begin(), sizes.end());
+  return tensor.expand(expanded_shape);
+}
+
+
 #define VMAP_SUPPORT(op, batch_rule) \
   m.impl(op, PrimBatchRule7< \
       decltype(&batch_rule), &batch_rule, to_operator_t<decltype(batch_rule)> \
@@ -166,7 +179,8 @@ inline void handle_variadic_bdims(std::vector<std::pair<Tensor, optional<int64_t
 #define VARIADIC_BDIMS_BOXED(op) \
   m.impl(#op, torch::CppFunction::makeFromBoxedFunction<boxed_tensor_inputs_batch_rule<decltype(&handle_variadic_bdims), &handle_variadic_bdims>>());
 
-inline void boxed_existing_bdim_batch_rule(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
+inline void boxed_existing_bdim_all_batch_rule(
+    const c10::OperatorHandle& op, torch::jit::Stack* stack) {
   const auto& schema = op.schema();
   const auto num_returns = schema.returns().size();
   const auto num_arguments = schema.arguments().size();
@@ -177,19 +191,101 @@ inline void boxed_existing_bdim_batch_rule(const c10::OperatorHandle& op, torch:
   TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
   int64_t cur_level = maybe_layer->layerId();
 
+  std::vector<std::pair<Tensor, optional<int64_t>>> tensor_inputs;
+  std::vector<int64_t> tensor_pos;
+  for (const auto idx : c10::irange(0, num_arguments)) {
+    const auto& ivalue = arguments[idx];
+    if (!ivalue.isTensor()) {
+      continue;
+    }
+    Tensor tensor_value;
+    optional<int64_t> tensor_bdim;
+    std::tie(tensor_value, tensor_bdim) = unwrapTensorAtLevel(ivalue.toTensor(), cur_level);
+    tensor_inputs.push_back(std::make_pair(tensor_value, tensor_bdim));
+    tensor_pos.push_back(idx);
+  }
+
+  // compute batch size...
+  int64_t batch_size = -1;
+  for (const auto& tensor_input : tensor_inputs) {
+    const auto& value = tensor_input.first;
+    const auto& bdim = tensor_input.second;
+    if (!bdim) {
+      continue;
+    }
+    if (batch_size == -1) {
+      batch_size = value.size(*bdim);
+    }
+    TORCH_INTERNAL_ASSERT(batch_size == value.size(*bdim));
+  }
+
+  // for each tensor, ensure it has a bdim and reshape it.
+  for (auto& tensor_input : tensor_inputs) {
+    auto value = tensor_input.first;
+    auto bdim = tensor_input.second;
+    value = ensure_has_bdim(value, bdim.has_value(), batch_size);
+    if (!bdim.has_value()) {
+      bdim = 0;
+    }
+    tensor_input.first = reshape_dim_into(*bdim, 0, value);
+  }
+
+  size_t tensor_idx = 0;
+  TORCH_INTERNAL_ASSERT(tensor_pos.size() > 0);
+  for (const auto arg_idx : c10::irange(0, num_arguments)) {
+    if (tensor_idx >= tensor_pos.size() || (int64_t)arg_idx != tensor_pos[tensor_idx]) {
+      torch::jit::push(stack, arguments[arg_idx]);
+    } else {
+      TORCH_INTERNAL_ASSERT(tensor_idx < tensor_inputs.size());
+      torch::jit::push(stack, tensor_inputs[tensor_idx].first);
+      tensor_idx++;
+    }
+  }
+
+  op.callBoxed(stack);
+  const auto returns = torch::jit::pop(*stack, num_returns);
+  for (const auto& ret : returns) {
+    if (ret.isTensor()) {
+      torch::jit::push(stack, makeBatched(reshape_dim_outof(0, batch_size, ret.toTensor()), 0, cur_level));
+    } else {
+      TORCH_INTERNAL_ASSERT(false, "This boxed batching rule does not currently support ops that return non-tensor values");
+    }
+  }
+}
+
+// Use when all tensors arguments accept one (normal) batch dim.
+// This batching rule expands the batch dim on all Tensors, reshapes it into
+// dim 0, calls the op, and then reshapes the batch dim out of dim 0.
+// This is not the most efficient thing; if there are alternatives, plese try
+// to use them. Use this only as a last resort.
+#define EXISTING_BDIM_ALL_BOXED(op) \
+  m.impl(#op, torch::CppFunction::makeFromBoxedFunction<boxed_existing_bdim_all_batch_rule>());
+
+inline void boxed_existing_bdim_batch_rule(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
+  const auto& schema = op.schema();
+  const auto num_returns = schema.returns().size();
+  const auto num_arguments = schema.arguments().size();
+  auto arguments = torch::jit::pop(*stack, num_arguments);
+
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
 
   std::vector<std::pair<Tensor, optional<int64_t>>> tensor_inputs;
   std::vector<int64_t> tensor_pos;
   for (const auto idx : c10::irange(0, num_arguments)) {
     const auto& ivalue = arguments[idx];
-    if (ivalue.isTensor()) {
-      Tensor tensor_value;
-      optional<int64_t> tensor_bdim;
-      std::tie(tensor_value, tensor_bdim) = unwrapTensorAtLevel(ivalue.toTensor(), cur_level);
-      tensor_inputs.push_back(std::make_pair(tensor_value, tensor_bdim));
-      tensor_pos.push_back(idx);
+    if (!ivalue.isTensor()) {
+      continue;
     }
+    Tensor tensor_value;
+    optional<int64_t> tensor_bdim;
+    std::tie(tensor_value, tensor_bdim) = unwrapTensorAtLevel(ivalue.toTensor(), cur_level);
+    tensor_inputs.push_back(std::make_pair(tensor_value, tensor_bdim));
+    tensor_pos.push_back(idx);
   }
+
   int64_t batch_size = -1;
   for (auto& tensor_input : tensor_inputs) {
     if (tensor_input.second) {
diff --git a/functorch/csrc/BatchRulesPooling.cpp b/functorch/csrc/BatchRulesPooling.cpp
@@ -67,6 +67,7 @@ TORCH_LIBRARY_IMPL(aten, FT_BATCHED_KEY, m) {
   EXISTING_BDIM(_adaptive_avg_pool2d);
   EXISTING_BDIM(avg_pool2d);
   m.impl("max_pool2d_with_indices_backward", max_pool2d_with_indices_backward_plumbing);
+  EXISTING_BDIM_ALL_BOXED(avg_pool2d_backward);
 }
 
 }}
diff --git a/functorch/csrc/BatchRulesScatterOps.cpp b/functorch/csrc/BatchRulesScatterOps.cpp
@@ -156,18 +156,6 @@ Tensor& index_put__plumbing(Tensor & self, const List<optional<Tensor>> & indice
   return self;
 }
 
-Tensor ensure_has_bdim(const Tensor& tensor, bool has_bdim, int64_t batch_size) {
-  if (has_bdim) {
-    return tensor;
-  }
-  const auto sizes = tensor.sizes();
-  DimVector expanded_shape;
-  expanded_shape.reserve(sizes.size());
-  expanded_shape.emplace_back(batch_size);
-  expanded_shape.insert(expanded_shape.end(), sizes.begin(), sizes.end());
-  return tensor.expand(expanded_shape);
-}
-
 int64_t bdim_size(
     const Tensor& a, optional<int64_t> a_bdim,
     const Tensor& b, optional<int64_t> b_bdim,
diff --git a/test/test_ops.py b/test/test_ops.py
@@ -437,7 +437,6 @@ def test_vmapvjp(self, device, dtype, op):
         xfail('nanmedian'),
         xfail('nanquantile'),
         xfail('nn.functional.adaptive_avg_pool2d'),
-        xfail('nn.functional.avg_pool2d'),
         xfail('nn.functional.conv_transpose2d'),
         xfail('nn.functional.cross_entropy', 'mean'),
         xfail('nn.functional.cross_entropy', 'none'),

Original file line number	Diff line number	Diff line change
`@@ -67,6 +67,7 @@ TORCH_LIBRARY_IMPL(aten, FT_BATCHED_KEY, m) {`
`67`	`67`	`EXISTING_BDIM(_adaptive_avg_pool2d);`
`68`	`68`	`EXISTING_BDIM(avg_pool2d);`
`69`	`69`	`m.impl("max_pool2d_with_indices_backward", max_pool2d_with_indices_backward_plumbing);`
	`70`	`+ EXISTING_BDIM_ALL_BOXED(avg_pool2d_backward);`
`70`	`71`	`}`
`71`	`72`
`72`	`73`	`}}`