Properly handle data that's already on the GPU

rostan-t · rostan-t · commit 03f9013fcab5 · 2026-01-09T16:44:52.000Z
Signed-off-by: Rostan Tabet &lt;rtabet@nvidia.com&gt;
diff --git a/dali/python/nvidia/dali/experimental/dynamic/_arithmetic.py b/dali/python/nvidia/dali/experimental/dynamic/_arithmetic.py
@@ -33,15 +33,13 @@ def _arithm_op(name: str, *args):
     new_args = []
     for arg in args:
         if not isinstance(arg, (Tensor, Batch)):
-            if gpu and not _implicitly_convertible(arg):
-                raise ValueError(f"Type {type(arg)} is not implicitly copyable to the GPU.")
+            if gpu and _implicitly_convertible(arg):
+                arg = as_tensor(arg, device="gpu")
+            arg = as_tensor(arg)
 
-            device = "gpu" if gpu else None
-            arg = as_tensor(arg, device=device)
+        if (arg.device.device_type == "gpu") != gpu:
+            raise ValueError("Cannot mix GPU and CPU inputs.")
 
         new_args.append(arg)
 
-    if any((arg.device.device_type == "gpu") != gpu for arg in new_args):
-        raise ValueError("Cannot mix GPU and CPU inputs.")
-
     return _arithmetic_generic_op(*new_args, expression_desc=f"{name}({argsstr})")
diff --git a/dali/test/python/experimental_mode/test_arithm_ops.py b/dali/test/python/experimental_mode/test_arithm_ops.py
@@ -17,7 +17,7 @@
 import numpy as np
 import nvidia.dali.experimental.dynamic as ndd
 from nose2.tools import params
-from nose_utils import assert_raises
+from nose_utils import assert_raises, attr
 from test_tensor import asnumpy
 
 
@@ -140,10 +140,47 @@ def test_binary_scalars(device: str, op: str, batch_size: int | None):
             raise AssertionError(msg)
 
 
+@attr("pytorch")
+@params(*binary_ops)
+def test_binary_pytorch_gpu(op: str):
+    import torch
+
+    a = torch.tensor([1, 2, 3], device="cuda")
+    b = ndd.as_tensor(a)
+
+    result = apply_bin_op(op, a, b)
+    result_rev = apply_bin_op(op, b, a)
+    expected = apply_bin_op(op, a, a)
+    np.testing.assert_array_equal(result.cpu(), expected.cpu())
+    np.testing.assert_array_equal(expected.cpu(), result.cpu())
+
+
 @params(*binary_ops)
 def test_incompatible_devices(op: str):
     a = ndd.tensor([1, 2, 3], device="cpu")
     b = ndd.tensor([4, 5, 6], device="gpu")
 
     with assert_raises(ValueError, regex="[CG]PU and [CG]PU"):
         apply_bin_op(op, a, b)
+    with assert_raises(ValueError, regex="[CG]PU and [CG]PU"):
+        apply_bin_op(op, b, a)
+
+
+@attr("pytorch")
+@params(*binary_ops)
+def test_binary_pytorch_incompatible(op: str):
+    import torch
+
+    devices = [
+        ("cpu", "gpu"),
+        ("cuda", "cpu"),
+    ]
+
+    for torch_device, ndd_device in devices:
+        a = torch.tensor([1, 2, 3], device=torch_device)
+        b = ndd.tensor([1, 2, 3], device=ndd_device)
+
+        with assert_raises(ValueError, regex="[CG]PU and [CG]PU"):
+            apply_bin_op(op, a, b)
+        with assert_raises(ValueError, regex="[CG]PU and [CG]PU"):
+            apply_bin_op(op, b, a)