diff --git a/pandas/_libs/sparse.pyi b/pandas/_libs/sparse.pyi index 7ba82f3bc1268..f1f3efb4d3096 100644 --- a/pandas/_libs/sparse.pyi +++ b/pandas/_libs/sparse.pyi @@ -1,9 +1,11 @@ -from collections.abc import Sequence from typing import Self import numpy as np -from pandas._typing import npt +from pandas._typing import ( + TakeIndexer, + npt, +) class SparseIndex: length: int @@ -26,7 +28,7 @@ class SparseIndex: class IntIndex(SparseIndex): indices: npt.NDArray[np.int32] def __init__( - self, length: int, indices: Sequence[int], check_integrity: bool = ... + self, length: int, indices: TakeIndexer, check_integrity: bool = ... ) -> None: ... class BlockIndex(SparseIndex): diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index 7dde03b30cd6a..5d065765a4570 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -440,10 +440,10 @@ def to_coo(self) -> spmatrix: rows.append(row) data.append(sp_arr.sp_values.astype(dtype, copy=False)) - cols = np.concatenate(cols) - rows = np.concatenate(rows) - data = np.concatenate(data) - return coo_matrix((data, (rows, cols)), shape=self._parent.shape) + cols_arr = np.concatenate(cols) + rows_arr = np.concatenate(rows) + data_arr = np.concatenate(data) + return coo_matrix((data_arr, (rows_arr, cols_arr)), shape=self._parent.shape) @property def density(self) -> float: diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index ab5569537dc55..c04f3716f4739 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -92,13 +92,26 @@ Sequence, ) from enum import Enum + from typing import ( + Protocol, + type_check_only, + ) class ellipsis(Enum): Ellipsis = "..." Ellipsis = ellipsis.Ellipsis - from scipy.sparse import spmatrix + from scipy.sparse import ( + csc_array, + csc_matrix, + ) + + @type_check_only + class _SparseMatrixLike(Protocol): + @property + def shape(self, /) -> tuple[int, int]: ... + def tocsc(self, /) -> csc_array | csc_matrix: ... from pandas._typing import NumpySorter @@ -120,6 +133,7 @@ class ellipsis(Enum): from pandas import Series + else: ellipsis = type(Ellipsis) @@ -511,7 +525,7 @@ def _simple_new( return new @classmethod - def from_spmatrix(cls, data: spmatrix) -> Self: + def from_spmatrix(cls, data: _SparseMatrixLike) -> Self: """ Create a SparseArray from a scipy.sparse matrix. @@ -543,10 +557,10 @@ def from_spmatrix(cls, data: spmatrix) -> Self: # our sparse index classes require that the positions be strictly # increasing. So we need to sort loc, and arr accordingly. - data = data.tocsc() - data.sort_indices() - arr = data.data - idx = data.indices + data_csc = data.tocsc() + data_csc.sort_indices() + arr = data_csc.data + idx = data_csc.indices zero = np.array(0, dtype=arr.dtype).item() dtype = SparseDtype(arr.dtype, zero) @@ -1217,10 +1231,7 @@ def _concat_same_type(cls, to_concat: Sequence[Self]) -> Self: data = np.concatenate(values) indices_arr = np.concatenate(indices) - # error: Argument 2 to "IntIndex" has incompatible type - # "ndarray[Any, dtype[signedinteger[_32Bit]]]"; - # expected "Sequence[int]" - sp_index = IntIndex(length, indices_arr) # type: ignore[arg-type] + sp_index = IntIndex(length, indices_arr) else: # when concatenating block indices, we don't claim that you'll diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py index 7a77a2064e7e0..ff41fa0c461c3 100644 --- a/pandas/tests/arrays/sparse/test_libsparse.py +++ b/pandas/tests/arrays/sparse/test_libsparse.py @@ -209,12 +209,10 @@ def test_intersect_empty(self): @pytest.mark.parametrize( "case", [ - # Argument 2 to "IntIndex" has incompatible type "ndarray[Any, - # dtype[signedinteger[_32Bit]]]"; expected "Sequence[int]" - IntIndex(5, np.array([1, 2], dtype=np.int32)), # type: ignore[arg-type] - IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), # type: ignore[arg-type] - IntIndex(0, np.array([], dtype=np.int32)), # type: ignore[arg-type] - IntIndex(5, np.array([], dtype=np.int32)), # type: ignore[arg-type] + IntIndex(5, np.array([1, 2], dtype=np.int32)), + IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), + IntIndex(0, np.array([], dtype=np.int32)), + IntIndex(5, np.array([], dtype=np.int32)), ], ) def test_intersect_identical(self, case):