[MPS] Fix type promotion for torch.floor_divide (pytorch#149233)

malfet · pytorchmergebot · commit 08af311fc22f · 2025-03-15T00:00:42.000Z
And delete some duplicating glue code by relying on the stub After this change `torch.arange(10, device = 'mps') // torch.arange(10., device='mps')` will return tensor of floats, which is a common dtype for float + integral operation, rather than tensor of ints Checked by `test_div2` inductor testing Pull Request resolved: pytorch#149233 Approved by: https://github.com/atalman ghstack dependencies: pytorch#149216
diff --git a/aten/src/ATen/native/mps/operations/BinaryOps.mm b/aten/src/ATen/native/mps/operations/BinaryOps.mm
@@ -14,7 +14,6 @@
 #include <ATen/ops/atan2_native.h>
 #include <ATen/ops/div_native.h>
 #include <ATen/ops/eq_native.h>
-#include <ATen/ops/floor_divide_native.h>
 #include <ATen/ops/fmod_native.h>
 #include <ATen/ops/ge_native.h>
 #include <ATen/ops/gt_native.h>
@@ -447,19 +446,8 @@ static void add_sub_lerp_template(const Tensor& self,
   }
 }
 
-Tensor& floor_divide_out_mps(const Tensor& self, const Tensor& other, Tensor& result) {
-  mps::div_mode_template(self, other, "floor", result, "floor_divide_out");
-  return result;
-}
-
-Tensor floor_divide_mps(const Tensor& self, const Tensor& other) {
-  Tensor output = at::empty_like(self);
-  mps::div_mode_template(self, other, "floor", output, "floor_divide");
-  return output;
-}
-
-Tensor& floor_divide_mps_(Tensor& self, const Tensor& other) {
-  return floor_divide_out_mps(self, other, self);
+static void div_floor_kernel_mps(TensorIteratorBase& iter) {
+  mps::div_mode_template(iter.input(0), iter.input(1), "floor", iter.output(0), "floor_divide_out");
 }
 
 TORCH_IMPL_FUNC(remainder_out_mps)(const Tensor& self, const Tensor& other, const Tensor& output) {
@@ -538,4 +526,6 @@ Tensor floor_divide_mps(const Tensor& self, const Tensor& other) {
 TORCH_IMPL_FUNC(lerp_Scalar_mps)(const Tensor& self, const Tensor& end, const Scalar& weight, const Tensor& out) {
   mps::add_sub_lerp_template(self, end, weight, out, "lerp");
 }
+
+REGISTER_DISPATCH(div_floor_stub, &div_floor_kernel_mps);
 } // namespace at::native
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -2749,23 +2749,20 @@
   device_check: NoCheck   # TensorIterator
   variants: function, method
   dispatch:
-    CPU, CUDA: floor_divide
-    MPS: floor_divide_mps
+    CPU, CUDA, MPS: floor_divide
     SparseCPU, SparseCUDA: floor_divide_sparse
 
 - func: floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
   device_check: NoCheck   # TensorIterator
   variants: method
   dispatch:
-    CPU, CUDA: floor_divide_
-    MPS: floor_divide_mps_
+    CPU, CUDA, MPS: floor_divide_
     SparseCPU, SparseCUDA: floor_divide_sparse_
 
 - func: floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   device_check: NoCheck   # TensorIterator
   dispatch:
-    CPU, CUDA: floor_divide_out
-    MPS: floor_divide_out_mps
+    CPU, CUDA, MPS: floor_divide_out
     SparseCPU, SparseCUDA: floor_divide_out_sparse_zerodim
 
 - func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
diff --git a/test/inductor/test_mps_basic.py b/test/inductor/test_mps_basic.py
@@ -190,6 +190,7 @@ def fn(a):
     "test_cumsum_inf",
     "test_custom_op_2",
     "test_div1",
+    "test_div2",
     "test_div3",
     "test_erfinv",
     "test_floordiv",