add aminmax batching rule (#180)

kshitij12345 · web-flow · commit dc82a738e542 · 2021-10-05T13:16:49.000-07:00
* add aminmax batching rule

* special case for cuda

* update code and add comment
diff --git a/functorch/csrc/BatchRulesReduceOps.cpp b/functorch/csrc/BatchRulesReduceOps.cpp
@@ -255,6 +255,38 @@ std::tuple<Tensor,optional<int64_t>> _log_softmax_backward_batch_rule(
   return std::make_tuple(at::_log_softmax_backward_data(grad_output_, output_, dim, input_dtype), 0);
 }
 
+// aminmax has divergent behavior for 0-d tenosrs.
+// reference: https://github.com/pytorch/pytorch/issues/64008
+// TODO: Once the divergent behavior for 0-d scalar is fixed, we should use REDUCTION_BOXED_ARGS
+std::tuple<Tensor, optional<int64_t>, Tensor, optional<int64_t>> aminmax_batching_rule(
+    const Tensor &self, optional<int64_t> self_bdim, optional<int64_t> dim, bool keep_dim)
+{
+  auto self_ = moveBatchDimToFront(self, self_bdim);
+  auto logical_rank = rankWithoutBatchDim(self_, self_bdim);
+  if (logical_rank == 0) {
+    self_ = self_.unsqueeze(-1);
+  }
+
+  if (dim.has_value()) {
+    dim = maybe_wrap_dim(dim.value(), logical_rank) + 1;
+  } else {
+    // flatten the input except for batch-dim
+    auto bsize = self.size(0);
+    self_ = self.view({bsize, -1});
+    dim = 1;
+  }
+
+  Tensor min, max;
+  std::tie(min, max) = at::aminmax(self_, dim, keep_dim);
+
+  if (logical_rank == 0 && self_.device().is_cuda()) {
+    // behaviour diverges between cpu and cuda
+    min = min.squeeze(-1);
+    max = max.squeeze(-1);
+  }
+  return std::make_tuple(min, 0, max, 0);
+}
+
 TORCH_LIBRARY_IMPL(aten, FT_BATCHED_KEY, m) {
   REDUCTION_BOXED(_fft_r2c);
   REDUCTION_BOXED(_fft_c2r);
@@ -306,6 +338,7 @@ TORCH_LIBRARY_IMPL(aten, FT_BATCHED_KEY, m) {
   REDUCTION_BOXED(var_mean.correction);
   REDUCTION_BOXED(_log_softmax);
   REDUCTION_BOXED_ARGS(rot90, 2);
+  VMAP_SUPPORT("aminmax", aminmax_batching_rule);
 
   VMAP_SUPPORT("_log_softmax_backward_data", _log_softmax_backward_batch_rule);
   VMAP_SUPPORT("_softmax_backward_data", _softmax_backward_batch_rule);
diff --git a/test/test_vmap.py b/test/test_vmap.py
@@ -3023,7 +3023,6 @@ def test_vmap_exhaustive(self, device, dtype, op):
     @ops(functorch_lagging_op_db + additional_op_db, allowed_dtypes=(torch.float,))
     @skipOps('TestVmapOperatorsOpInfo', 'test_op_has_batch_rule', {
         # xfail('__getitem__'),
-        xfail('aminmax'),
         xfail('broadcast_to'),
         xfail('cdist'),
         xfail('complex'),