add cosine_similairity batching rule (#171)

kshitij12345 · web-flow · commit 4038cc462046 · 2021-10-04T22:26:00.000-07:00
* add cosine_similairity batching rule

* update test file

* update comment

* add rule for clamp_min_ and clamp_max_

* update test

* update xfail in test_ops

* undo line change in BatchRulesLoss
diff --git a/functorch/csrc/BatchRulesBinaryOps.cpp b/functorch/csrc/BatchRulesBinaryOps.cpp
@@ -178,8 +178,10 @@ TORCH_LIBRARY_IMPL(aten, FT_BATCHED_KEY, m) {
   POINTWISE_BOXED(clamp.Tensor);
   BINARY_POINTWISE2(clamp_min, Tensor);
   UNARY_POINTWISE(clamp_min);
+  POINTWISE_BOXED(clamp_min_);
   BINARY_POINTWISE2(clamp_max, Tensor);
   UNARY_POINTWISE(clamp_max);
+  POINTWISE_BOXED(clamp_max_);
 
   // Commented out so we have a test op
   // BINARY_SCALAR_2(copysign, Tensor, Scalar);
@@ -263,6 +265,10 @@ TORCH_LIBRARY_IMPL(aten, FT_BATCHED_KEY, m) {
   m.impl("div_.Scalar", inplacePlumbing1<
      DECLTYPE_AUTO(&unary_inplace_batch_rule<ScalarInplaceT, &Tensor::div_, const Scalar&>),
      const Scalar&>);
+  m.impl("clamp_min_.Tensor", inplacePlumbing2<
+     DECLTYPE_AUTO(&binary_pointwise_inplace_batch_rule<TensorInplaceT, &Tensor::clamp_min_>)>);
+  m.impl("clamp_max_.Tensor", inplacePlumbing2<
+     DECLTYPE_AUTO(&binary_pointwise_inplace_batch_rule<TensorInplaceT, &Tensor::clamp_max_>)>);
 
   m.impl("masked_fill_.Scalar", inplacePlumbing2<
      DECLTYPE_AUTO(&binary_pointwise_inplace_batch_rule<TensorScalarInplaceT, &Tensor::masked_fill_, const Scalar&>), const Scalar&>);
diff --git a/functorch/csrc/BatchRulesStopDecomposition.cpp b/functorch/csrc/BatchRulesStopDecomposition.cpp
@@ -182,7 +182,6 @@ TORCH_LIBRARY_IMPL(aten, FT_BATCHED_KEY, m) {
   STOP_DECOMPOSE(conv_transpose1d);
   STOP_DECOMPOSE(conv_transpose3d.input);
   STOP_DECOMPOSE(cosine_embedding_loss);
-  STOP_DECOMPOSE(cosine_similarity);
   STOP_DECOMPOSE(ctc_loss.IntList);
   STOP_DECOMPOSE(ctc_loss.Tensor);
   STOP_DECOMPOSE(cudnn_is_acceptable);
diff --git a/test/test_ops.py b/test/test_ops.py
@@ -471,7 +471,6 @@ def test_vmapvjp(self, device, dtype, op):
         xfail('vdot'),
         xfail('view_as_complex'),
         xfail('nanmean'),
-        xfail('nn.functional.cosine_similarity'),
         xfail('nn.functional.layer_norm'),
         xfail('nn.functional.nll_loss'),
         xfail('block_diag'),
diff --git a/test/test_vmap.py b/test/test_vmap.py
@@ -1234,6 +1234,37 @@ def clone_contiguous(x):
         with self.assertRaisesRegex(RuntimeError, msg):
             vmap(lambda x: x.clone(memory_format=torch.channels_last_3d))(torch.randn(B0))
 
+    @parametrize("case",
+                 (
+                     (torch.clamp_min_, TensorFactory.randn),
+                     (torch.clamp_max_, TensorFactory.randn),
+                 ), name_fn=lambda x: x[0].__name__)
+    def test_clamp_inplace_variant(self, case):
+        test = self._vmap_test
+
+        def get_number(getter):
+            return getter([]).item()
+
+        op, getter = case
+        device = 'cpu'
+        B0, B1 = 7, 11
+
+        # Single vmap: op(Tensor, Tensor)
+        test(op, (getter([B0, 3], device), getter([B0, 3], device)), check_propagates_grad=False)
+        test(op, (getter([B0], device), getter([B0], device)), check_propagates_grad=False)
+        test(op, (getter([2, B0, 3], device), getter([2, B0, 3], device)), in_dims=(1, 1), check_propagates_grad=False)
+        test(op, (getter([B0, 2, 3], device), getter([2, B0, 3], device)),
+             in_dims=(0, 1), out_dims=1, check_propagates_grad=False)
+        test(op, (getter([B0, 2, 3], device), getter([1, 1], device)), in_dims=(0, None), check_propagates_grad=False)
+        test(op, (getter([B0, 3], device), getter([B0, 3], device)), in_dims=(0, 0), check_propagates_grad=False)
+
+        # Nested vmap: op(Tensor, Tensor)
+        test(vmap(op), (getter([B0, B1, 2, 3], device), getter([B0, B1, 1, 3], device)), check_propagates_grad=False)
+
+        # Python number overload: op(Tensor, Number)
+        number = get_number(getter)
+        self._test_unary(lambda t: op(t, number), getter, device, check_propagates_grad=False)
+
     @parametrize('case', [
         subtest(_make_case(torch.clamp_min), name='clamp_min'),
         subtest(_make_case(torch.clamp_max), name='clamp_max'),
@@ -1255,7 +1286,7 @@ def get_number(getter):
         test(op, (getter([B0], device), getter([2, B0, 3], device)),
              in_dims=(0, 1), out_dims=1)
         test(op, (getter([B0], device), getter([2, 3], device)), in_dims=(0, None))
-        test(op, (getter([2, 3], device), getter([B0, 3], device)), in_dims=(0, None))
+        test(op, (getter([2, 3], device), getter([B0, 3], device)), in_dims=(None, 0))
 
         # Nested vmap: op(Tensor, Tensor)
         test(vmap(op), (getter([B0, B1, 2, 3], device), getter([B0, B1, 3], device)))
@@ -3069,7 +3100,6 @@ def test_vmap_exhaustive(self, device, dtype, op):
         xfail('hstack'),
         xfail('linalg.multi_dot'),
         xfail('nanmean'),
-        xfail('nn.functional.cosine_similarity'),
         xfail('nn.functional.layer_norm'),
         xfail('nn.functional.nll_loss'),
         xfail('vstack'),