Add support for GPU ops.

hameerabbasi · hameerabbasi · commit e2913e3de7fa · 2025-05-07T10:47:30.000Z
diff --git a/pixi.toml b/pixi.toml
@@ -83,10 +83,15 @@ test-finch = "ci/test_Finch.sh"
 [feature.mlir.activation.env]
 SPARSE_BACKEND = "MLIR"
 
+[feature.cuda-12.target.linux-64.pypi-dependencies]
+cupy-cuda12x = ">=13"
+array-api-compat = ">=1.11"
+
 [environments]
 test = ["test", "extra"]
 doc = ["doc", "extra"]
 mlir-dev = {features = ["test", "mlir"], no-default-feature = true}
 finch-dev = {features = ["test", "finch"], no-default-feature = true}
 notebooks = ["extra", "mlir", "finch", "notebooks"]
 barebones = {features = ["barebones"], no-default-feature = true}
+cuda-12 = ["cuda-12"]
diff --git a/sparse/numba_backend/_common.py b/sparse/numba_backend/_common.py
@@ -11,6 +11,7 @@
 import numpy as np
 
 from ._coo import as_coo
+from ._settings import SUPPORTED_ARRAY_TYPE
 from ._sparse_array import SparseArray
 from ._utils import (
     _zero_of_dtype,
@@ -30,6 +31,13 @@ def _is_scipy_sparse_obj(x):
     return bool(hasattr(x, "__module__") and x.__module__.startswith("scipy.sparse"))
 
 
+def _coerce_to_supported_dense(x) -> SUPPORTED_ARRAY_TYPE:
+    if isinstance(x, SUPPORTED_ARRAY_TYPE):
+        return x
+
+    return np.asarray(x)
+
+
 def _check_device(func):
     @wraps(func)
     def wrapped(*args, **kwargs):
@@ -84,11 +92,16 @@ def check_class_nan(test):
     """
     from ._compressed import GCXS
     from ._coo import COO
+    from ._settings import NUMPY_DEVICE
 
     if isinstance(test, GCXS | COO):
-        return nan_check(test.fill_value, test.data)
+        if test.device == NUMPY_DEVICE:
+            return nan_check(test.fill_value, test.data)
+        return np.isnan(test.fill_value) or np.isnan(np.min(test.data))
     if _is_scipy_sparse_obj(test):
         return nan_check(test.data)
+    if type(test).__name__ == "ndarray" and not isinstance(test, np.ndarray):
+        return np.isnan(np.min(test))
     return nan_check(test)
 
 
@@ -238,13 +251,31 @@ def matmul(a, b):
     - [`numpy.matmul`][] : NumPy equivalent function.
     - `COO.__matmul__`: Equivalent function for COO objects.
     """
+    from ._coo import COO
+
     check_zero_fill_value(a, b)
     if not hasattr(a, "ndim") or not hasattr(b, "ndim"):
         raise TypeError(f"Cannot perform dot product on types {type(a)}, {type(b)}")
 
     if check_class_nan(a) or check_class_nan(b):
         warnings.warn("Nan will not be propagated in matrix multiplication", RuntimeWarning, stacklevel=1)
 
+    from ._settings import NUMPY_DEVICE
+
+    if a.device != NUMPY_DEVICE or b.device != NUMPY_DEVICE:
+        import cupyx.scipy.sparse as cps
+
+        if isinstance(a, COO):
+            a = a.to_scipy_sparse()
+        if isinstance(b, COO):
+            b = b.to_scipy_sparse()
+
+        cp_res = a @ b
+        if isinstance(cp_res, cps.spmatrix):
+            return COO.from_scipy_sparse(cp_res.asformat("coo"))
+
+        return cp_res
+
     # When b is 2-d, it is equivalent to dot
     if b.ndim <= 2:
         return dot(a, b)
diff --git a/sparse/numba_backend/_compressed/compressed.py b/sparse/numba_backend/_compressed/compressed.py
@@ -132,6 +132,8 @@ class GCXS(SparseArray, NDArrayOperatorsMixin):
 
     __array_priority__ = 12
 
+    __array_members__ = ("data", "indices", "indptr", "fill_value")
+
     def __init__(
         self,
         arg,
@@ -178,7 +180,7 @@ def __init__(
         self.shape = shape
 
         if fill_value is None:
-            fill_value = _zero_of_dtype(self.data.dtype)
+            fill_value = _zero_of_dtype(self.data.dtype, self.data.device)
 
         self._compressed_axes = tuple(compressed_axes) if isinstance(compressed_axes, Iterable) else None
         self.fill_value = self.data.dtype.type(fill_value)
diff --git a/sparse/numba_backend/_coo/common.py b/sparse/numba_backend/_coo/common.py
@@ -55,14 +55,17 @@ def asCOO(x, name="asCOO", check=True):
 
 
 def linear_loc(coords, shape):
+    import array_api_compat
+
+    namespace = array_api_compat.array_namespace(coords)
     if shape == () and len(coords) == 0:
         # `np.ravel_multi_index` is not aware of arrays, so cannot produce a
         # sensible result here (https://github.com/numpy/numpy/issues/15690).
         # Since `coords` is an array and not a sequence, we know the correct
         # dimensions.
-        return np.zeros(coords.shape[1:], dtype=np.intp)
+        return namespace.zeros(coords.shape[1:], dtype=namespace.intp)
 
-    return np.ravel_multi_index(coords, shape)
+    return namespace.ravel_multi_index(coords, shape)
 
 
 def kron(a, b):
diff --git a/sparse/numba_backend/_coo/core.py b/sparse/numba_backend/_coo/core.py
@@ -195,6 +195,8 @@ class COO(SparseArray, NDArrayOperatorsMixin):  # lgtm [py/missing-equals]
 
     __array_priority__ = 12
 
+    __array_members__ = ("data", "coords", "fill_value")
+
     def __init__(
         self,
         coords,
@@ -207,6 +209,8 @@ def __init__(
         fill_value=None,
         idx_dtype=None,
     ):
+        from .._common import _coerce_to_supported_dense
+
         if isinstance(coords, COO):
             self._make_shallow_copy_of(coords)
             if data is not None or shape is not None:
@@ -226,8 +230,8 @@ def __init__(
                 self.enable_caching()
             return
 
-        self.data = np.asarray(data)
-        self.coords = np.asarray(coords)
+        self.data = _coerce_to_supported_dense(data)
+        self.coords = _coerce_to_supported_dense(coords)
 
         if self.coords.ndim == 1:
             if self.coords.size == 0 and shape is not None:
@@ -236,7 +240,7 @@ def __init__(
                 self.coords = self.coords[None, :]
 
         if self.data.ndim == 0:
-            self.data = np.broadcast_to(self.data, self.coords.shape[1])
+            self.data = self._component_namespace.broadcast_to(self.data, self.coords.shape[1])
 
         if self.data.ndim != 1:
             raise ValueError("`data` must be a scalar or 1-dimensional.")
@@ -251,7 +255,9 @@ def __init__(
             shape = tuple(shape)
 
         if shape and not self.coords.size:
-            self.coords = np.zeros((len(shape) if isinstance(shape, Iterable) else 1, 0), dtype=np.intp)
+            self.coords = self._component_namespace.zeros(
+                (len(shape) if isinstance(shape, Iterable) else 1, 0), dtype=np.intp
+            )
         super().__init__(shape, fill_value=fill_value)
         if idx_dtype:
             if not can_store(idx_dtype, max(shape)):
@@ -369,7 +375,7 @@ def from_numpy(cls, x, fill_value=None, idx_dtype=None):
         x = np.asanyarray(x).view(type=np.ndarray)
 
         if fill_value is None:
-            fill_value = _zero_of_dtype(x.dtype) if x.shape else x
+            fill_value = _zero_of_dtype(x.dtype, x.device) if x.shape else x
 
         coords = np.atleast_2d(np.flatnonzero(~equivalent(x, fill_value)))
         data = x.ravel()[tuple(coords)]
@@ -407,7 +413,9 @@ def todense(self):
         >>> np.array_equal(x, x2)
         True
         """
-        x = np.full(self.shape, self.fill_value, self.dtype)
+        x = self._component_namespace.full(
+            self.shape, fill_value=self.fill_value, dtype=self.dtype, device=self.data.device
+        )
 
         coords = tuple([self.coords[i, :] for i in range(self.ndim)])
         data = self.data
@@ -446,14 +454,16 @@ def from_scipy_sparse(cls, x, /, *, fill_value=None):
         >>> np.array_equal(x.todense(), s.todense())
         True
         """
+        import array_api_compat
+
         x = x.asformat("coo")
         if not x.has_canonical_format:
             x.eliminate_zeros()
             x.sum_duplicates()
 
-        coords = np.empty((2, x.nnz), dtype=x.row.dtype)
-        coords[0, :] = x.row
-        coords[1, :] = x.col
+        xp = array_api_compat.array_namespace(x.data)
+
+        coords = xp.stack((x.row, x.col))
         return COO(
             coords,
             x.data,
@@ -1184,14 +1194,19 @@ def to_scipy_sparse(self, /, *, accept_fv=None):
         - [`sparse.COO.tocsr`][] : Convert to a [`scipy.sparse.csr_matrix`][].
         - [`sparse.COO.tocsc`][] : Convert to a [`scipy.sparse.csc_matrix`][].
         """
-        import scipy.sparse
+        from .._settings import NUMPY_DEVICE
+
+        if self.device == NUMPY_DEVICE:
+            import scipy.sparse as sps
+        else:
+            import cupyx.scipy.sparse as sps
 
         check_fill_value(self, accept_fv=accept_fv)
 
         if self.ndim != 2:
             raise ValueError("Can only convert a 2-dimensional array to a Scipy sparse matrix.")
 
-        result = scipy.sparse.coo_matrix((self.data, (self.coords[0], self.coords[1])), shape=self.shape)
+        result = sps.coo_matrix((self.data, (self.coords[0], self.coords[1])), shape=self.shape)
         result.has_canonical_format = True
         return result
 
@@ -1307,10 +1322,10 @@ def _sort_indices(self):
         """
         linear = self.linear_loc()
 
-        if (np.diff(linear) >= 0).all():  # already sorted
+        if (self._component_namespace.diff(linear) >= 0).all():  # already sorted
             return
 
-        order = np.argsort(linear, kind="mergesort")
+        order = self._component_namespace.argsort(linear, kind="mergesort")
         self.coords = self.coords[:, order]
         self.data = self.data[order]
 
@@ -1336,16 +1351,16 @@ def _sum_duplicates(self):
         # Inspired by scipy/sparse/coo.py::sum_duplicates
         # See https://github.com/scipy/scipy/blob/main/LICENSE.txt
         linear = self.linear_loc()
-        unique_mask = np.diff(linear) != 0
+        unique_mask = self._component_namespace.diff(linear) != 0
 
         if unique_mask.sum() == len(unique_mask):  # already unique
             return
 
-        unique_mask = np.append(True, unique_mask)
+        unique_mask = self._component_namespace.append(True, unique_mask)
 
         coords = self.coords[:, unique_mask]
-        (unique_inds,) = np.nonzero(unique_mask)
-        data = np.add.reduceat(self.data, unique_inds, dtype=self.data.dtype)
+        (unique_inds,) = self._component_namespace.nonzero(unique_mask)
+        data = self._component_namespace.add.reduceat(self.data, unique_inds, dtype=self.data.dtype)
 
         self.data = data
         self.coords = coords
diff --git a/sparse/numba_backend/_coo/indexing.py b/sparse/numba_backend/_coo/indexing.py
@@ -40,7 +40,7 @@ def getitem(x, index):
         coords.extend(idx[1:])
 
         fill_value_idx = np.asarray(x.fill_value[index]).flatten()
-        fill_value = fill_value_idx[0] if fill_value_idx.size else _zero_of_dtype(data.dtype)[()]
+        fill_value = fill_value_idx[0] if fill_value_idx.size else _zero_of_dtype(data.dtype, data.device)
 
         if not equivalent(fill_value, fill_value_idx).all():
             raise ValueError("Fill-values in the array are inconsistent.")
diff --git a/sparse/numba_backend/_coo/numba_extension.py b/sparse/numba_backend/_coo/numba_extension.py
@@ -99,7 +99,7 @@ def impl_COO(context, builder, sig, args):
     coo.coords = coords
     coo.data = data
     coo.shape = shape
-    coo.fill_value = context.get_constant_generic(builder, typ.fill_value_type, _zero_of_dtype(typ.data_dtype))
+    coo.fill_value = context.get_constant_generic(builder, typ.fill_value_type, _zero_of_dtype(typ.data_dtype, "cpu"))
     return impl_ret_borrowed(context, builder, sig.return_type, coo._getvalue())
 
 
diff --git a/sparse/numba_backend/_settings.py b/sparse/numba_backend/_settings.py
@@ -1,3 +1,4 @@
+import importlib.util
 import os
 
 import numpy as np
@@ -17,4 +18,20 @@ def __array_function__(self, *args, **kwargs):
         return False
 
 
+def _supported_array_type() -> type[np.ndarray]:
+    try:
+        import cupy as cp
+
+        return np.ndarray | cp.ndarray
+    except ImportError:
+        return np.ndarray
+
+
+def _cupy_available() -> bool:
+    return importlib.util.find_spec("cupy") is not None
+
+
 NEP18_ENABLED = _is_nep18_enabled()
+NUMPY_DEVICE = np.asarray(5).device
+SUPPORTED_ARRAY_TYPE = _supported_array_type()
+CUPY_AVAILABLE = _cupy_available()
diff --git a/sparse/numba_backend/_sparse_array.py b/sparse/numba_backend/_sparse_array.py
diff --git a/sparse/numba_backend/_umath.py b/sparse/numba_backend/_umath.py
diff --git a/sparse/numba_backend/_utils.py b/sparse/numba_backend/_utils.py
diff --git a/sparse/numba_backend/tests/test_coo.py b/sparse/numba_backend/tests/test_coo.py