Skip to content

Commit b9a0154

Browse files
authored
Testing improvements and minor fixes. (#808)
* Testing improvements and minor fixes. * Add more thorough testing for dense format. * Simplify 3D COO test. * Simplify sparse vector test. * Add tests for mixed-format sparse-sparse and sparse-dense `add`. * Simplify CSF test.
1 parent 4b314c2 commit b9a0154

File tree

4 files changed

+147
-79
lines changed

4 files changed

+147
-79
lines changed

sparse/mlir_backend/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
) from e
1111

1212
from . import levels
13+
from ._array import Array
1314
from ._conversions import asarray, from_constituent_arrays, to_numpy, to_scipy
1415
from ._dtypes import (
1516
asdtype,
@@ -30,6 +31,7 @@
3031
from ._ops import add, reshape
3132

3233
__all__ = [
34+
"Array",
3335
"add",
3436
"asarray",
3537
"asdtype",

sparse/mlir_backend/_conversions.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,10 @@ def asarray(arr, copy: bool | None = None) -> Array:
154154
arr = arr.copy()
155155
return arr
156156

157-
return _from_numpy(np.asarray(arr, copy=copy), copy=None)
157+
if copy is not None and not copy and not isinstance(arr, np.ndarray):
158+
raise ValueError("Cannot non-copy convert this object.")
159+
160+
return _from_numpy(np.asarray(arr), copy=copy)
158161

159162

160163
def from_constituent_arrays(*, format: StorageFormat, arrays: tuple[np.ndarray, ...], shape: tuple[int, ...]) -> Array:

sparse/mlir_backend/_ops.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,10 @@ def add(x1: Array, x2: Array, /) -> Array:
182182

183183

184184
def asformat(x: Array, /, format: StorageFormat) -> Array:
185-
if x.format == format:
185+
if format.rank != x.ndim:
186+
raise ValueError(f"`format.rank != `self.ndim`, {format.rank=}, {x.ndim=}")
187+
188+
if format == x.format:
186189
return x
187190

188191
out_tensor_type = format._get_mlir_type(shape=x.shape)

sparse/mlir_backend/tests/test_simple.py

Lines changed: 137 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,36 @@
3131
)
3232

3333

34+
def parametrize_scipy_fmt_with_arg(name: str) -> pytest.MarkDecorator:
35+
return pytest.mark.parametrize(
36+
name,
37+
["csr", "csc", "coo"],
38+
)
39+
40+
41+
parametrize_scipy_fmt = parametrize_scipy_fmt_with_arg("format")
42+
43+
3444
def assert_sps_equal(
3545
expected: sps.csr_array | sps.csc_array | sps.coo_array,
3646
actual: sps.csr_array | sps.csc_array | sps.coo_array,
47+
/,
48+
*,
49+
check_canonical=False,
50+
check_dtype=True,
3751
) -> None:
52+
assert expected.shape == actual.shape
3853
assert expected.format == actual.format
39-
expected.eliminate_zeros()
40-
expected.sum_duplicates()
4154

42-
actual.eliminate_zeros()
43-
actual.sum_duplicates()
55+
if check_dtype:
56+
assert expected.dtype == actual.dtype
57+
58+
if check_canonical:
59+
expected.eliminate_zeros()
60+
expected.sum_duplicates()
61+
62+
actual.eliminate_zeros()
63+
actual.sum_duplicates()
4464

4565
if expected.format != "coo":
4666
np.testing.assert_array_equal(expected.indptr, actual.indptr)
@@ -108,93 +128,136 @@ def test_dense_format(dtype, shape):
108128
np.testing.assert_equal(actual, data)
109129

110130

131+
def assert_array_equal(
132+
expected: sparse.Array,
133+
actual: sparse.Array,
134+
/,
135+
*,
136+
same_format: bool = True,
137+
same_dtype: bool = True,
138+
data_test_fn: typing.Callable[[np.ndarray, np.ndarray], None] = np.testing.assert_array_equal,
139+
) -> None:
140+
if same_format:
141+
assert expected.format == actual.format
142+
143+
if same_dtype:
144+
assert expected.dtype == actual.dtype
145+
146+
assert expected.shape == actual.shape
147+
actual = actual.asformat(expected.format)
148+
149+
carrs_expected = expected.get_constituent_arrays()
150+
carrs_actual = actual.get_constituent_arrays()
151+
152+
for e, a in zip(carrs_expected[:-1], carrs_actual[:-1], strict=True):
153+
assert e.dtype == a.dtype
154+
np.testing.assert_equal(e, a)
155+
156+
data_test_fn(carrs_expected[-1], carrs_actual[-1])
157+
158+
111159
@parametrize_dtypes
112-
def test_2d_constructors(rng, dtype):
160+
@parametrize_scipy_fmt
161+
def test_roundtrip(rng, dtype, format):
113162
SHAPE = (80, 100)
114163
DENSITY = 0.6
115164
sampler = generate_sampler(dtype, rng)
116-
csr = sps.random_array(SHAPE, density=DENSITY, format="csr", dtype=dtype, random_state=rng, data_sampler=sampler)
117-
csc = sps.random_array(SHAPE, density=DENSITY, format="csc", dtype=dtype, random_state=rng, data_sampler=sampler)
118-
dense = np.arange(math.prod(SHAPE), dtype=dtype).reshape(SHAPE)
119-
coo = sps.random_array(SHAPE, density=DENSITY, format="coo", dtype=dtype, random_state=rng, data_sampler=sampler)
120-
coo.sum_duplicates()
165+
sps_arr = sps.random_array(
166+
SHAPE, density=DENSITY, format=format, dtype=dtype, random_state=rng, data_sampler=sampler
167+
)
168+
169+
sp_arr = sparse.asarray(sps_arr)
170+
sps_roundtripped = sparse.to_scipy(sp_arr)
171+
assert_sps_equal(sps_arr, sps_roundtripped)
121172

122-
csr_tensor = sparse.asarray(csr)
123-
csc_tensor = sparse.asarray(csc)
124-
dense_tensor = sparse.asarray(dense)
125-
coo_tensor = sparse.asarray(coo)
126-
dense_2_tensor = sparse.asarray(np.arange(100, dtype=dtype).reshape((25, 4)) + 10)
173+
sp_arr_roundtripped = sparse.asarray(sps_roundtripped)
127174

128-
csr_retured = sparse.to_scipy(csr_tensor)
129-
assert_sps_equal(csr_retured, csr)
175+
assert_array_equal(sp_arr, sp_arr_roundtripped)
130176

131-
csc_retured = sparse.to_scipy(csc_tensor)
132-
assert_sps_equal(csc_retured, csc)
133177

134-
dense_returned = sparse.to_numpy(dense_tensor)
135-
np.testing.assert_equal(dense_returned, dense)
178+
@parametrize_dtypes
179+
@pytest.mark.parametrize("shape", [(80, 100), (200,), (10, 20, 30)])
180+
def test_roundtrip_dense(rng, dtype, shape):
181+
sampler = generate_sampler(dtype, rng)
182+
np_arr = sampler(shape)
183+
184+
sp_arr = sparse.asarray(np_arr)
185+
np_roundtripped = sparse.to_numpy(sp_arr)
186+
assert np_arr.dtype == np_roundtripped.dtype
187+
np.testing.assert_array_equal(np_arr, np_roundtripped)
136188

137-
coo_returned = sparse.to_scipy(coo_tensor)
138-
np.testing.assert_equal(coo_returned.todense(), coo.todense())
189+
sp_arr_roundtripped = sparse.asarray(np_roundtripped)
139190

140-
dense_2_returned = sparse.to_numpy(dense_2_tensor)
141-
np.testing.assert_equal(dense_2_returned, np.arange(100, dtype=dtype).reshape((25, 4)) + 10)
191+
assert_array_equal(sp_arr, sp_arr_roundtripped)
142192

143193

144194
@parametrize_dtypes
145-
def test_add(rng, dtype):
195+
@parametrize_scipy_fmt_with_arg("format1")
196+
@parametrize_scipy_fmt_with_arg("format2")
197+
def test_add(rng, dtype, format1, format2):
198+
if format1 == "coo" or format2 == "coo":
199+
pytest.xfail(reason="https://github.com/llvm/llvm-project/issues/116012")
200+
146201
SHAPE = (100, 50)
147202
DENSITY = 0.5
148203
sampler = generate_sampler(dtype, rng)
204+
sps_arr1 = sps.random_array(
205+
SHAPE, density=DENSITY, format=format1, dtype=dtype, random_state=rng, data_sampler=sampler
206+
)
207+
sps_arr2 = sps.random_array(
208+
SHAPE, density=DENSITY, format=format2, dtype=dtype, random_state=rng, data_sampler=sampler
209+
)
149210

150-
csr = sps.random_array(SHAPE, density=DENSITY, format="csr", dtype=dtype, random_state=rng, data_sampler=sampler)
151-
csr_2 = sps.random_array(SHAPE, density=DENSITY, format="csr", dtype=dtype, random_state=rng, data_sampler=sampler)
152-
csc = sps.random_array(SHAPE, density=DENSITY, format="csc", dtype=dtype, random_state=rng, data_sampler=sampler)
153-
dense = np.arange(math.prod(SHAPE), dtype=dtype).reshape(SHAPE)
154-
coo = sps.random_array(SHAPE, density=DENSITY, format="coo", dtype=dtype, random_state=rng)
155-
coo.sum_duplicates()
211+
sp_arr1 = sparse.asarray(sps_arr1)
212+
sp_arr2 = sparse.asarray(sps_arr2)
156213

157-
csr_tensor = sparse.asarray(csr)
158-
csr_2_tensor = sparse.asarray(csr_2)
159-
csc_tensor = sparse.asarray(csc)
160-
dense_tensor = sparse.asarray(dense)
161-
coo_tensor = sparse.asarray(coo)
214+
expected = sps_arr1 + sps_arr2
215+
actual = sparse.add(sp_arr1, sp_arr2)
216+
actual_sps = sparse.to_scipy(actual.asformat(sparse.asarray(expected).format))
162217

163-
actual = sparse.to_scipy(sparse.add(csr_tensor, csr_2_tensor))
164-
expected = csr + csr_2
165-
assert_sps_equal(expected, actual)
218+
assert_sps_equal(expected, actual_sps, check_canonical=True)
166219

167-
actual = sparse.to_scipy(sparse.add(csc_tensor, csc_tensor))
168-
expected = csc + csc
169-
assert_sps_equal(expected, actual)
170220

171-
actual = sparse.to_scipy(sparse.add(csc_tensor, csr_tensor))
172-
expected = (csc + csr).asformat("csr")
173-
assert_sps_equal(expected, actual)
221+
@parametrize_dtypes
222+
@pytest.mark.parametrize("shape", [(80, 100), (200,), (10, 20, 30)])
223+
def test_add_dense(rng, dtype, shape):
224+
sampler = generate_sampler(dtype, rng)
225+
np_arr1 = sampler(shape)
226+
np_arr2 = sampler(shape)
174227

175-
actual = sparse.to_numpy(sparse.add(csr_tensor, dense_tensor))
176-
expected = csr + dense
177-
np.testing.assert_array_equal(actual, expected)
228+
sp_arr1 = sparse.asarray(np_arr1)
229+
sp_arr2 = sparse.asarray(np_arr2)
178230

179-
actual = sparse.to_numpy(sparse.add(dense_tensor, csr_tensor))
180-
expected = csr + dense
181-
assert isinstance(actual, np.ndarray)
182-
np.testing.assert_array_equal(actual, expected)
231+
expected = np_arr1 + np_arr2
232+
actual = sparse.add(sp_arr1, sp_arr2)
233+
actual_np = sparse.to_numpy(actual)
183234

184-
actual = sparse.to_numpy(sparse.add(dense_tensor, dense_tensor))
185-
expected = dense + dense
186-
assert isinstance(actual, np.ndarray)
187-
np.testing.assert_array_equal(actual, expected)
235+
np.testing.assert_array_equal(expected, actual_np)
188236

189-
actual = sparse.to_scipy(sparse.add(csr_2_tensor, coo_tensor))
190-
expected = csr_2 + coo
191-
assert_sps_equal(expected, actual)
192237

193-
# This ends up being DCSR, not COO
194-
actual_tensor = sparse.add(coo_tensor, coo_tensor)
195-
actual = sparse.to_scipy(actual_tensor.asformat(coo_tensor.format))
196-
expected = coo + coo
197-
np.testing.assert_array_equal(actual.todense(), expected.todense())
238+
@parametrize_dtypes
239+
@parametrize_scipy_fmt
240+
def test_add_dense_sparse(rng, dtype, format):
241+
if format == "coo":
242+
pytest.xfail(reason="https://github.com/llvm/llvm-project/issues/116012")
243+
sampler = generate_sampler(dtype, rng)
244+
245+
SHAPE = (100, 50)
246+
DENSITY = 0.5
247+
248+
np_arr1 = sampler(SHAPE)
249+
sps_arr2 = sps.random_array(
250+
SHAPE, density=DENSITY, format=format, dtype=dtype, random_state=rng, data_sampler=sampler
251+
)
252+
253+
sp_arr1 = sparse.asarray(np_arr1)
254+
sp_arr2 = sparse.asarray(sps_arr2)
255+
256+
expected = np_arr1 + sps_arr2
257+
actual = sparse.add(sp_arr1, sp_arr2)
258+
actual_np = sparse.to_numpy(actual.asformat(sp_arr1.format))
259+
260+
np.testing.assert_array_equal(expected, actual_np)
198261

199262

200263
@parametrize_dtypes
@@ -220,10 +283,9 @@ def test_csf_format(dtype):
220283
for actual, expected in zip(result_arrays, constituent_arrays, strict=True):
221284
np.testing.assert_array_equal(actual, expected)
222285

223-
res_arrays = sparse.add(csf_array, csf_array).get_constituent_arrays()
224-
expected_arrays = (pos_1, crd_1, pos_2, crd_2, data * 2)
225-
for actual, expected in zip(res_arrays, expected_arrays, strict=True):
226-
np.testing.assert_array_equal(actual, expected)
286+
actual = sparse.add(csf_array, csf_array)
287+
expected = sparse.from_constituent_arrays(format=format, arrays=(pos_1, crd_1, pos_2, crd_2, data * 2), shape=SHAPE)
288+
assert_array_equal(expected, actual)
227289

228290

229291
@parametrize_dtypes
@@ -254,10 +316,9 @@ def test_coo_3d_format(dtype):
254316
for actual, expected in zip(result, carrs, strict=True):
255317
np.testing.assert_array_equal(actual, expected)
256318

257-
result_arrays = sparse.add(coo_array, coo_array).asformat(coo_array.format).get_constituent_arrays()
258-
constituent_arrays = (pos, *crd, data * 2)
259-
for actual, expected in zip(result_arrays, constituent_arrays, strict=True):
260-
np.testing.assert_array_equal(actual, expected)
319+
actual = sparse.add(coo_array, coo_array).asformat(coo_array.format)
320+
expected = sparse.from_constituent_arrays(format=actual.format, arrays=(pos, *crd, data * 2), shape=SHAPE)
321+
assert_array_equal(expected, actual)
261322

262323

263324
@parametrize_dtypes
@@ -281,10 +342,9 @@ def test_sparse_vector_format(dtype):
281342
for actual, expected in zip(result, carrs, strict=True):
282343
np.testing.assert_array_equal(actual, expected)
283344

284-
res_arrs = sparse.add(sv_array, sv_array).get_constituent_arrays()
285-
sv2_expected = (pos, crd, data * 2)
286-
for actual, expected in zip(res_arrs, sv2_expected, strict=True):
287-
np.testing.assert_array_equal(actual, expected)
345+
actual = sparse.add(sv_array, sv_array)
346+
expected = sparse.from_constituent_arrays(format=actual.format, arrays=(pos, crd, data * 2), shape=SHAPE)
347+
assert_array_equal(expected, actual)
288348

289349
dense = np.array([1, 2, 3, 0, 0, 0, 4, 0, 5, 6], dtype=dtype)
290350
dense_array = sparse.asarray(dense)

0 commit comments

Comments
 (0)