[Cherry-pick] Fix copysign + scalar correctness issue (pytorch#153098)

malfet · web-flow · commit 1a3161ae5ad0 · 2025-05-09T14:54:04.000-07:00
* [Testing] Add copysign from scalar regression test (pytorch#152997) But instead of adding it just for MPS backend, add it to OpInfo Fixes pytorch#152582 Pull Request resolved: pytorch#152997 Approved by: https://github.com/wdvr (cherry picked from commit 9919d6b) * Spiritual cherry-pick of 52cbcac * [CI] Skip test_copy_large_tensor on M2-15 runners (pytorch#150377) They have more than 12Gb memory, but may be running this test causes OOM in CI Pull Request resolved: pytorch#150377 Approved by: https://github.com/atalman
diff --git a/aten/src/ATen/native/mps/operations/BinaryKernel.mm b/aten/src/ATen/native/mps/operations/BinaryKernel.mm
@@ -34,10 +34,24 @@
 static void binary_mps_impl(TensorIteratorBase& iter, const std::string func_name, bool supports_dense = true) {
   TORCH_CHECK(iter.common_dtype() != at::kDouble, "float64 is not supported on MPS");
 
+  auto convert_double_scalar = [](Tensor& t) {
+    if (t.dim() != 0) {
+      return;
+    }
+    if (t.scalar_type() == kDouble) {
+      t = t.to(kFloat);
+    } else if (t.scalar_type() == kComplexDouble) {
+      t = t.to(kComplexFloat);
+    }
+  };
+
   Tensor input = iter.input(0);
   Tensor other = iter.input(1);
   Tensor out = iter.output();
 
+  convert_double_scalar(input);
+  convert_double_scalar(other);
+
   id<MTLDevice> device = MPSDevice::getInstance()->device();
   MPSStream* mpsStream = getCurrentMPSStream();
   const uint32_t nDim = iter.ndim();
diff --git a/test/test_mps.py b/test/test_mps.py
@@ -7480,6 +7480,7 @@ def compare_mm(m, n, k, dtype=torch.float):
 
     @unittest.skipIf(total_memory < 12_000_000_000, "Needs at least 12Gb RAM to run the test")
     @unittest.skipIf(MACOS_VERSION < 14.0, "Can't allocate 4Gb tensor on MacOS 13")
+    @unittest.skipIf(IS_CI, "May be fixes https://github.com/pytorch/pytorch/issues/149999")
     def test_copy_large(self):
         """ Test that copy of 4Gb+ tensors works """
         x = torch.ones((2**30 + 11,), dtype=torch.float32)
diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
@@ -6165,6 +6165,11 @@ def _generate_correlation_inputs(device, dtype, requires_grad, **kwargs):
 def sample_inputs_corrcoef(op_info, device, dtype, requires_grad, **kwargs):
     return (SampleInput(t) for t in _generate_correlation_inputs(device, dtype, requires_grad))
 
+def sample_inputs_copysign(op_info, device, dtype, requires_grad, **kwargs):
+    yield from sample_inputs_elementwise_binary(op_info, device, dtype, requires_grad, **kwargs)
+    if dtype.is_floating_point:
+        yield SampleInput(make_tensor(5, dtype=dtype, device=device, requires_grad=requires_grad), -3.14)
+
 
 def sample_inputs_cov(op_info, device, dtype, requires_grad, **kwargs):
     for t in _generate_correlation_inputs(device, dtype, requires_grad):
@@ -12882,6 +12887,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs):
                         DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out', device_type='mps'),
                         DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_binary_ufuncs_mixed_dtype'),)),
     BinaryUfuncInfo('copysign',
+                    sample_inputs_func=sample_inputs_copysign,
                     dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
                     dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
                     promotes_int_to_float=True,