depr, chore: Enforce deprecations and clean up warnings. (#742)

hameerabbasi · web-flow · commit 889ac33f4441 · 2024-08-14T12:16:06.000+02:00
diff --git a/examples/sparse_finch.ipynb b/examples/sparse_finch.ipynb
@@ -24,7 +24,7 @@
     "import os\n",
     "\n",
     "os.environ[\"SPARSE_BACKEND\"] = \"Finch\"\n",
-    "CI_MODE = os.getenv(\"CI_MODE\", default=False)"
+    "CI_MODE = bool(int(os.getenv(\"CI_MODE\", default=\"0\")))"
    ]
   },
   {
@@ -42,9 +42,7 @@
     "\n",
     "import numpy as np\n",
     "import scipy.sparse as sps\n",
-    "import scipy.sparse.linalg as splin\n",
-    "\n",
-    "assert sparse.BackendType.Finch == sparse.BACKEND"
+    "import scipy.sparse.linalg as splin"
    ]
   },
   {
diff --git a/sparse/__init__.py b/sparse/__init__.py
@@ -7,32 +7,37 @@
 __array_api_version__ = "2022.12"
 
 
-class BackendType(Enum):
+class _BackendType(Enum):
     Numba = "Numba"
     Finch = "Finch"
 
 
 _ENV_VAR_NAME = "SPARSE_BACKEND"
 
+
+class SparseFutureWarning(FutureWarning):
+    pass
+
+
 if os.environ.get(_ENV_VAR_NAME, "") != "":
     warnings.warn(
         "Changing back-ends is a development feature, please do not rely on it in production.",
-        FutureWarning,
+        SparseFutureWarning,
         stacklevel=1,
     )
     _backend_name = os.environ[_ENV_VAR_NAME]
 else:
-    _backend_name = BackendType.Numba.value
+    _backend_name = _BackendType.Numba.value
 
-if _backend_name not in {BackendType.Numba.value, BackendType.Finch.value}:
+if _backend_name not in {v.value for v in _BackendType}:
     warnings.warn(f"Invalid backend identifier: {_backend_name}. Selecting Numba backend.", UserWarning, stacklevel=1)
-    BACKEND = BackendType.Numba
+    _BACKEND = _BackendType.Numba
 else:
-    BACKEND = BackendType[_backend_name]
+    _BACKEND = _BackendType[_backend_name]
 
 del _backend_name
 
-if BackendType.Finch == BACKEND:
+if _BackendType.Finch == _BACKEND:
     from sparse.finch_backend import *  # noqa: F403
     from sparse.finch_backend import __all__
 else:
diff --git a/sparse/numba_backend/_common.py b/sparse/numba_backend/_common.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 
-from ._coo.common import asCOO
+from ._coo import as_coo
 from ._sparse_array import SparseArray
 from ._utils import (
     _zero_of_dtype,
@@ -19,6 +19,9 @@
     normalize_axis,
 )
 
+_EINSUM_SYMBOLS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+_EINSUM_SYMBOLS_SET = set(_EINSUM_SYMBOLS)
+
 
 def _is_scipy_sparse_obj(x):
     """
@@ -189,7 +192,12 @@ def tensordot(a, b, axes=2, *, return_type=None):
     oldb = [bs[axis] for axis in notin]
 
     if builtins.any(dim == 0 for dim in chain(newshape_a, newshape_b)):
-        res = asCOO(np.empty(olda + oldb), check=False)
+        from sparse import COO
+
+        dt = np.result_type(a.dtype, b.dtype)
+        res = COO(
+            np.empty((len(olda) + len(oldb), 0), dtype=np.uintp), data=np.empty(0, dtype=dt), shape=tuple(olda + oldb)
+        )
         if isinstance(a, np.ndarray) or isinstance(b, np.ndarray):
             res = res.todense()
 
@@ -309,9 +317,9 @@ def dot(a, b):
 
     if a.ndim == 1 and b.ndim == 1:
         if isinstance(a, SparseArray):
-            a = asCOO(a)
+            a = as_coo(a)
         if isinstance(b, SparseArray):
-            b = asCOO(b)
+            b = as_coo(b)
         return (a * b).sum()
 
     a_axis = -1
@@ -1182,7 +1190,7 @@ def _parse_einsum_input(operands):
         for s in subscripts:
             if s in ".,->":
                 continue
-            if s not in np.core.einsumfunc.einsum_symbols:
+            if not s.isalpha():
                 raise ValueError(f"Character {s} is not a valid symbol.")
 
     else:
@@ -1206,7 +1214,7 @@ def _parse_einsum_input(operands):
                         s = index(s)
                     except TypeError as e:
                         raise TypeError("For this input type lists must contain either int or Ellipsis") from e
-                    subscripts += np.core.einsumfunc.einsum_symbols[s]
+                    subscripts += _EINSUM_SYMBOLS[s]
             if num != last:
                 subscripts += ","
 
@@ -1220,7 +1228,7 @@ def _parse_einsum_input(operands):
                         s = index(s)
                     except TypeError as e:
                         raise TypeError("For this input type lists must contain either int or Ellipsis") from e
-                    subscripts += np.core.einsumfunc.einsum_symbols[s]
+                    subscripts += _EINSUM_SYMBOLS[s]
     # Check for proper "->"
     if ("-" in subscripts) or (">" in subscripts):
         invalid = (subscripts.count("-") > 1) or (subscripts.count(">") > 1)
@@ -1230,7 +1238,7 @@ def _parse_einsum_input(operands):
     # Parse ellipses
     if "." in subscripts:
         used = subscripts.replace(".", "").replace(",", "").replace("->", "")
-        unused = list(np.core.einsumfunc.einsum_symbols_set - set(used))
+        unused = list(_EINSUM_SYMBOLS_SET - set(used))
         ellipse_inds = "".join(unused)
         longest = 0
 
@@ -1275,7 +1283,7 @@ def _parse_einsum_input(operands):
             output_subscript = ""
             tmp_subscripts = subscripts.replace(",", "")
             for s in sorted(set(tmp_subscripts)):
-                if s not in (np.core.einsumfunc.einsum_symbols):
+                if not s.isalpha():
                     raise ValueError(f"Character {s} is not a valid symbol.")
                 if tmp_subscripts.count(s) == 1:
                     output_subscript += s
@@ -1292,7 +1300,7 @@ def _parse_einsum_input(operands):
         tmp_subscripts = subscripts.replace(",", "")
         output_subscript = ""
         for s in sorted(set(tmp_subscripts)):
-            if s not in np.core.einsumfunc.einsum_symbols:
+            if not s.isalpha():
                 raise ValueError(f"Character {s} is not a valid symbol.")
             if tmp_subscripts.count(s) == 1:
                 output_subscript += s
@@ -1340,7 +1348,7 @@ def _einsum_single(lhs, rhs, operand):
 
     # else require COO for operations, but check if should convert back
     to_output_format = getattr(operand, "from_coo", lambda x: x)
-    operand = asCOO(operand)
+    operand = as_coo(operand)
 
     # check if repeated / 'trace' indices mean we are only taking a subset
     where = {}
diff --git a/sparse/numba_backend/_coo/core.py b/sparse/numba_backend/_coo/core.py
@@ -207,17 +207,18 @@ def __init__(
         fill_value=None,
         idx_dtype=None,
     ):
+        if isinstance(coords, COO):
+            self._make_shallow_copy_of(coords)
+            if data is not None or shape is not None:
+                raise ValueError("If `coords` is `COO`, then no other arguments should be provided.")
+            if fill_value is not None:
+                self.fill_value = self.data.dtype.type(fill_value)
+            return
+
         self._cache = None
         if cache:
             self.enable_caching()
 
-        if not isinstance(coords, np.ndarray):
-            warnings.warn(
-                "coords should be an ndarray. This will raise a ValueError in the future.",
-                DeprecationWarning,
-                stacklevel=1,
-            )
-
         if data is None:
             arr = as_coo(coords, shape=shape, fill_value=fill_value, idx_dtype=idx_dtype)
             self._make_shallow_copy_of(arr)
@@ -238,15 +239,10 @@ def __init__(
             self.data = np.broadcast_to(self.data, self.coords.shape[1])
 
         if self.data.ndim != 1:
-            raise ValueError("data must be a scalar or 1-dimensional.")
+            raise ValueError("`data` must be a scalar or 1-dimensional.")
 
         if shape is None:
-            warnings.warn(
-                "shape should be provided. This will raise a ValueError in the future.",
-                DeprecationWarning,
-                stacklevel=1,
-            )
-            shape = tuple(self.coords.max(axis=1) + 1) if self.coords.nbytes else ()
+            raise ValueError("`shape` was not provided.")
 
         if not isinstance(shape, Iterable):
             shape = (shape,)
@@ -256,7 +252,6 @@ def __init__(
 
         if shape and not self.coords.size:
             self.coords = np.zeros((len(shape) if isinstance(shape, Iterable) else 1, 0), dtype=np.intp)
-
         super().__init__(shape, fill_value=fill_value)
         if idx_dtype:
             if not can_store(idx_dtype, max(shape)):
@@ -417,11 +412,12 @@ def todense(self):
         coords = tuple([self.coords[i, :] for i in range(self.ndim)])
         data = self.data
 
-        if coords != ():
+        if len(coords) != 0:
             x[coords] = data
         else:
             if len(data) != 0:
-                x[coords] = data
+                assert data.shape == (1,)
+                x[...] = data[0]
 
         return x
 
@@ -1157,52 +1153,6 @@ def squeeze(self, axis=None):
             fill_value=self.fill_value,
         )
 
-    def resize(self, *args, refcheck=True, coords_dtype=np.intp):
-        """
-        This method changes the shape and size of an array in-place.
-        Parameters
-        ----------
-        args : tuple, or series of integers
-            The desired shape of the output array.
-
-        See Also
-        --------
-        [`numpy.ndarray.resize`][] : The equivalent Numpy function.
-
-        """
-        warnings.warn("resize is deprecated on all SpraseArray objects.", DeprecationWarning, stacklevel=1)
-        if len(args) == 1 and isinstance(args[0], tuple):
-            shape = args[0]
-        elif all(isinstance(arg, int) for arg in args):
-            shape = tuple(args)
-        else:
-            raise ValueError("Invalid input")
-
-        if any(d < 0 for d in shape):
-            raise ValueError("negative dimensions not allowed")
-
-        new_size = reduce(operator.mul, shape, 1)
-
-        # TODO: this self.size enforces a 2**64 limit to array size
-        linear_loc = self.linear_loc()
-        end_idx = np.searchsorted(linear_loc, new_size, side="left")
-        linear_loc = linear_loc[:end_idx]
-
-        idx_dtype = self.coords.dtype
-        if shape != () and not can_store(idx_dtype, max(shape)):
-            idx_dtype = np.min_scalar_type(max(shape))
-        coords = np.empty((len(shape), len(linear_loc)), dtype=idx_dtype)
-        strides = 1
-        for i, d in enumerate(shape[::-1]):
-            coords[-(i + 1), :] = (linear_loc // strides) % d
-            strides *= d
-
-        self.shape = shape
-        self.coords = coords
-
-        if len(self.data) != len(linear_loc):
-            self.data = self.data[:end_idx].copy()
-
     def to_scipy_sparse(self, /, *, accept_fv=None):
         """
         Converts this [`sparse.COO`][] object into a [`scipy.sparse.coo_matrix`][].
diff --git a/sparse/numba_backend/_sparse_array.py b/sparse/numba_backend/_sparse_array.py
@@ -170,7 +170,9 @@ def density(self):
         >>> s.density
         0.125
         """
-        return self.nnz / self.size
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=RuntimeWarning)
+            return float(np.float64(self.nnz) / np.float64(self.size))
 
     def _repr_html_(self):
         """
diff --git a/sparse/numba_backend/_utils.py b/sparse/numba_backend/_utils.py
@@ -1,8 +1,6 @@
 import functools
-import operator
 import warnings
 from collections.abc import Iterable
-from functools import reduce
 from numbers import Integral
 
 import numba
@@ -476,14 +474,12 @@ def html_table(arr):
     table = ["<table><tbody>"]
     headings = ["Format", "Data Type", "Shape", "nnz", "Density", "Read-only"]
 
-    density = np.float64(arr.nnz) / np.float64(arr.size)
-
     info = [
         type(arr).__name__.lower(),
         str(arr.dtype),
         str(arr.shape),
         str(arr.nnz),
-        str(density),
+        str(arr.density),
     ]
 
     # read-only
@@ -493,9 +489,10 @@ def html_table(arr):
         headings.append("Size")
         info.append(human_readable_size(arr.nbytes))
         headings.append("Storage ratio")
-        info.append(
-            f"{np.float64(arr.nbytes) / np.float64(reduce(operator.mul, arr.shape, 1) * arr.dtype.itemsize):.2f}"
-        )
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", category=RuntimeWarning)
+            ratio = float(np.float64(arr.nbytes) / np.float64(arr.size * arr.dtype.itemsize))
+        info.append(f"{ratio:.2f}")
 
     # compressed_axes
     if type(arr).__name__ == "GCXS":
diff --git a/sparse/numba_backend/tests/test_coo.py b/sparse/numba_backend/tests/test_coo.py
diff --git a/sparse/tests/conftest.py b/sparse/tests/conftest.py
diff --git a/sparse/tests/test_backends.py b/sparse/tests/test_backends.py

Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@`
`24`	`24`	`"import os\n",`
`25`	`25`	`"\n",`
`26`	`26`	`"os.environ[\"SPARSE_BACKEND\"] = \"Finch\"\n",`
`27`		`- "CI_MODE = os.getenv(\"CI_MODE\", default=False)"`
	`27`	`+ "CI_MODE = bool(int(os.getenv(\"CI_MODE\", default=\"0\")))"`
`28`	`28`	`]`
`29`	`29`	`},`
`30`	`30`	`{`
`@@ -42,9 +42,7 @@`
`42`	`42`	`"\n",`
`43`	`43`	`"import numpy as np\n",`
`44`	`44`	`"import scipy.sparse as sps\n",`
`45`		`- "import scipy.sparse.linalg as splin\n",`
`46`		`- "\n",`
`47`		`- "assert sparse.BackendType.Finch == sparse.BACKEND"`
	`45`	`+ "import scipy.sparse.linalg as splin"`
`48`	`46`	`]`
`49`	`47`	`},`
`50`	`48`	`{`