Skip to content

Commit bcf317e

Browse files
quantresearch1dloneytupuiperimosocordiaedschult
authored
ENH: sparse: add nonzero functionality to min, max, argmin, argmax (scipy#16467)
* Added sparse nonzero functionality Added nonzero parsing funcitonality into sparse matrix methods Added sparse nonzero functionality Removed redundant else block Changed working and removed enforced summation Corrected formatting issues * fix linting issues * fold explicit into test_minmax_axis * test explicit into existing tests * change argmax_min explicit behaviour * add explicit to test_argmax_overflow * Update scipy/sparse/_data.py Co-authored-by: Pamphile Roy <[email protected]> * Update scipy/sparse/_data.py Co-authored-by: Pamphile Roy <[email protected]> * Update scipy/sparse/_data.py Co-authored-by: Pamphile Roy <[email protected]> * cleanup * incorporate tupui comments * benchmark for argmax * fix lint errors * non canonical test case * add density to argmax benchmark * add back sum_duplicates * cleanup code * merge main * update tests after merge with main * update doc_strings * update docs to argmin/argmax --------- Co-authored-by: Drew Allan Loney <[email protected]> Co-authored-by: Pamphile Roy <[email protected]> Co-authored-by: CJ Carey <[email protected]> Co-authored-by: Dan Schult <[email protected]>
1 parent 7b7fd01 commit bcf317e

File tree

3 files changed

+237
-96
lines changed

3 files changed

+237
-96
lines changed

benchmarks/benchmarks/sparse.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,3 +504,18 @@ def setup(self, density):
504504
def time_rand(self, density):
505505
sparse.rand(self.nrows, self.ncols,
506506
format=self.format, density=density)
507+
508+
509+
class Argmax(Benchmark):
510+
params = [[0.01, 0.1, 0.5], ['csr', 'csc', 'coo'], [True, False]]
511+
param_names = ['density', 'format', 'explicit']
512+
513+
def setup(self, density, format, explicit):
514+
n = 1000
515+
516+
warnings.simplefilter('ignore', SparseEfficiencyWarning)
517+
518+
self.X = sparse.rand(n, n, format=format, density=density)
519+
520+
def time_argmax(self, density, format, explicit):
521+
self.X.argmax(explicit=explicit)

scipy/sparse/_data.py

Lines changed: 113 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ class _minmax_mixin:
171171
These are not implemented for dia_matrix, hence the separate class.
172172
"""
173173

174-
def _min_or_max_axis(self, axis, min_or_max):
174+
def _min_or_max_axis(self, axis, min_or_max, explicit):
175175
N = self.shape[axis]
176176
if N == 0:
177177
raise ValueError("zero-size array to reduction operation")
@@ -182,8 +182,9 @@ def _min_or_max_axis(self, axis, min_or_max):
182182
mat.sum_duplicates()
183183

184184
major_index, value = mat._minor_reduce(min_or_max)
185-
not_full = np.diff(mat.indptr)[major_index] < N
186-
value[not_full] = min_or_max(value[not_full], 0)
185+
if not explicit:
186+
not_full = np.diff(mat.indptr)[major_index] < N
187+
value[not_full] = min_or_max(value[not_full], 0)
187188

188189
mask = value != 0
189190
major_index = np.compress(mask, major_index)
@@ -205,7 +206,7 @@ def _min_or_max_axis(self, axis, min_or_max):
205206
dtype=self.dtype, shape=(M, 1)
206207
)
207208

208-
def _min_or_max(self, axis, out, min_or_max):
209+
def _min_or_max(self, axis, out, min_or_max, explicit):
209210
if out is not None:
210211
raise ValueError("Sparse arrays do not support an 'out' parameter.")
211212

@@ -223,19 +224,19 @@ def _min_or_max(self, axis, out, min_or_max):
223224
if self.nnz == 0:
224225
return zero
225226
m = min_or_max.reduce(self._deduped_data().ravel())
226-
if self.nnz != math.prod(self.shape):
227+
if self.nnz != math.prod(self.shape) and not explicit:
227228
m = min_or_max(zero, m)
228229
return m
229230

230231
if axis < 0:
231232
axis += 2
232233

233234
if (axis == 0) or (axis == 1):
234-
return self._min_or_max_axis(axis, min_or_max)
235+
return self._min_or_max_axis(axis, min_or_max, explicit)
235236
else:
236237
raise ValueError("axis out of range")
237238

238-
def _arg_min_or_max_axis(self, axis, argmin_or_argmax, compare):
239+
def _arg_min_or_max_axis(self, axis, argmin_or_argmax, compare, explicit):
239240
if self.shape[axis] == 0:
240241
raise ValueError("Cannot apply the operation along a zero-sized dimension.")
241242

@@ -257,14 +258,18 @@ def _arg_min_or_max_axis(self, axis, argmin_or_argmax, compare):
257258
indices = mat.indices[p:q]
258259
extreme_index = argmin_or_argmax(data)
259260
extreme_value = data[extreme_index]
260-
if compare(extreme_value, zero) or q - p == line_size:
261-
ret[i] = indices[extreme_index]
261+
if explicit:
262+
if q - p > 0:
263+
ret[i] = indices[extreme_index]
262264
else:
263-
zero_ind = _find_missing_index(indices, line_size)
264-
if extreme_value == zero:
265-
ret[i] = min(extreme_index, zero_ind)
265+
if compare(extreme_value, zero) or q - p == line_size:
266+
ret[i] = indices[extreme_index]
266267
else:
267-
ret[i] = zero_ind
268+
zero_ind = _find_missing_index(indices, line_size)
269+
if extreme_value == zero:
270+
ret[i] = min(extreme_index, zero_ind)
271+
else:
272+
ret[i] = zero_ind
268273

269274
if isinstance(self, sparray):
270275
return ret
@@ -274,7 +279,7 @@ def _arg_min_or_max_axis(self, axis, argmin_or_argmax, compare):
274279

275280
return self._ascontainer(ret)
276281

277-
def _arg_min_or_max(self, axis, out, argmin_or_argmax, compare):
282+
def _arg_min_or_max(self, axis, out, argmin_or_argmax, compare, explicit):
278283
if out is not None:
279284
raise ValueError("Sparse types do not support an 'out' parameter.")
280285

@@ -286,19 +291,24 @@ def _arg_min_or_max(self, axis, out, argmin_or_argmax, compare):
286291
axis = None # avoid calling special axis case. no impact on 1d
287292

288293
if axis is not None:
289-
return self._arg_min_or_max_axis(axis, argmin_or_argmax, compare)
294+
return self._arg_min_or_max_axis(axis, argmin_or_argmax, compare, explicit)
290295

291296
if 0 in self.shape:
292297
raise ValueError("Cannot apply the operation to an empty matrix.")
293298

294299
if self.nnz == 0:
300+
if explicit:
301+
raise ValueError("Cannot apply the operation to zero matrix "
302+
"when explicit=True.")
295303
return 0
296304

297305
zero = self.dtype.type(0)
298306
mat = self.tocoo()
299307
# Convert to canonical form: no duplicates, sorted indices.
300308
mat.sum_duplicates()
301309
extreme_index = argmin_or_argmax(mat.data)
310+
if explicit:
311+
return extreme_index
302312
extreme_value = mat.data[extreme_index]
303313
num_col = mat.shape[-1]
304314

@@ -322,10 +332,11 @@ def _arg_min_or_max(self, axis, out, argmin_or_argmax, compare):
322332
return min(first_implicit_zero_index, extreme_index)
323333
return first_implicit_zero_index
324334

325-
def max(self, axis=None, out=None):
326-
"""
327-
Return the maximum of the array/matrix or maximum along an axis.
328-
This takes all elements into account, not just the non-zero ones.
335+
def max(self, axis=None, out=None, *, explicit=False):
336+
"""Return the maximum of the array/matrix or maximum along an axis.
337+
338+
By default, all elements are taken into account, not just the non-zero ones.
339+
But with `explicit` set, only the stored elements are considered.
329340
330341
Parameters
331342
----------
@@ -339,25 +350,33 @@ def max(self, axis=None, out=None):
339350
compatibility reasons. Do not pass in anything except
340351
for the default value, as this argument is not used.
341352
353+
explicit : {False, True} optional (default: False)
354+
When set to True, only the stored elements will be considered.
355+
If a row/column is empty, the sparse.coo_array returned
356+
has no stored element (i.e. an implicit zero) for that row/column.
357+
358+
.. versionadded:: 1.15.0
359+
342360
Returns
343361
-------
344-
amax : coo_matrix or scalar
362+
amax : coo_array or scalar
345363
Maximum of `a`. If `axis` is None, the result is a scalar value.
346-
If `axis` is given, the result is a sparse.coo_matrix of dimension
364+
If `axis` is given, the result is a sparse.coo_array of dimension
347365
``a.ndim - 1``.
348366
349367
See Also
350368
--------
351369
min : The minimum value of a sparse array/matrix along a given axis.
352-
numpy.matrix.max : NumPy's implementation of 'max' for matrices
370+
numpy.max : NumPy's implementation of 'max'
353371
354372
"""
355-
return self._min_or_max(axis, out, np.maximum)
373+
return self._min_or_max(axis, out, np.maximum, explicit)
356374

357-
def min(self, axis=None, out=None):
358-
"""
359-
Return the minimum of the array/matrix or maximum along an axis.
360-
This takes all elements into account, not just the non-zero ones.
375+
def min(self, axis=None, out=None, *, explicit=False):
376+
"""Return the minimum of the array/matrix or maximum along an axis.
377+
378+
By default, all elements are taken into account, not just the non-zero ones.
379+
But with `explicit` set, only the stored elements are considered.
361380
362381
Parameters
363382
----------
@@ -371,26 +390,34 @@ def min(self, axis=None, out=None):
371390
compatibility reasons. Do not pass in anything except for
372391
the default value, as this argument is not used.
373392
393+
explicit : {False, True} optional (default: False)
394+
When set to True, only the stored elements will be considered.
395+
If a row/column is empty, the sparse.coo_array returned
396+
has no stored element (i.e. an implicit zero) for that row/column.
397+
398+
.. versionadded:: 1.15.0
399+
374400
Returns
375401
-------
376402
amin : coo_matrix or scalar
377403
Minimum of `a`. If `axis` is None, the result is a scalar value.
378-
If `axis` is given, the result is a sparse.coo_matrix of dimension
404+
If `axis` is given, the result is a sparse.coo_array of dimension
379405
``a.ndim - 1``.
380406
381407
See Also
382408
--------
383409
max : The maximum value of a sparse array/matrix along a given axis.
384-
numpy.matrix.min : NumPy's implementation of 'min' for matrices
410+
numpy.min : NumPy's implementation of 'min'
385411
386412
"""
387-
return self._min_or_max(axis, out, np.minimum)
413+
return self._min_or_max(axis, out, np.minimum, explicit)
388414

389-
def nanmax(self, axis=None, out=None):
390-
"""
391-
Return the maximum of the array/matrix or maximum along an axis, ignoring any
392-
NaNs. This takes all elements into account, not just the non-zero
393-
ones.
415+
def nanmax(self, axis=None, out=None, *, explicit=False):
416+
"""Return the maximum, ignoring any Nans, along an axis.
417+
418+
Return the maximum, ignoring any Nans, of the array/matrix along an axis.
419+
By default this takes all elements into account, but with `explicit` set,
420+
only stored elements are considered.
394421
395422
.. versionadded:: 1.11.0
396423
@@ -406,11 +433,18 @@ def nanmax(self, axis=None, out=None):
406433
compatibility reasons. Do not pass in anything except
407434
for the default value, as this argument is not used.
408435
436+
explicit : {False, True} optional (default: False)
437+
When set to True, only the stored elements will be considered.
438+
If a row/column is empty, the sparse.coo_array returned
439+
has no stored element (i.e. an implicit zero) for that row/column.
440+
441+
.. versionadded:: 1.15.0
442+
409443
Returns
410444
-------
411-
amax : coo_matrix or scalar
445+
amax : coo_array or scalar
412446
Maximum of `a`. If `axis` is None, the result is a scalar value.
413-
If `axis` is given, the result is a sparse.coo_matrix of dimension
447+
If `axis` is given, the result is a sparse.coo_array of dimension
414448
``a.ndim - 1``.
415449
416450
See Also
@@ -422,13 +456,14 @@ def nanmax(self, axis=None, out=None):
422456
numpy.nanmax : NumPy's implementation of 'nanmax'.
423457
424458
"""
425-
return self._min_or_max(axis, out, np.fmax)
459+
return self._min_or_max(axis, out, np.fmax, explicit)
426460

427-
def nanmin(self, axis=None, out=None):
428-
"""
429-
Return the minimum of the array/matrix or minimum along an axis, ignoring any
430-
NaNs. This takes all elements into account, not just the non-zero
431-
ones.
461+
def nanmin(self, axis=None, out=None, *, explicit=False):
462+
"""Return the minimum, ignoring any Nans, along an axis.
463+
464+
Return the minimum, ignoring any Nans, of the array/matrix along an axis.
465+
By default this takes all elements into account, but with `explicit` set,
466+
only stored elements are considered.
432467
433468
.. versionadded:: 1.11.0
434469
@@ -444,11 +479,18 @@ def nanmin(self, axis=None, out=None):
444479
compatibility reasons. Do not pass in anything except for
445480
the default value, as this argument is not used.
446481
482+
explicit : {False, True} optional (default: False)
483+
When set to True, only the stored elements will be considered.
484+
If a row/column is empty, the sparse.coo_array returned
485+
has no stored element (i.e. an implicit zero) for that row/column.
486+
487+
.. versionadded:: 1.15.0
488+
447489
Returns
448490
-------
449-
amin : coo_matrix or scalar
491+
amin : coo_array or scalar
450492
Minimum of `a`. If `axis` is None, the result is a scalar value.
451-
If `axis` is given, the result is a sparse.coo_matrix of dimension
493+
If `axis` is given, the result is a sparse.coo_array of dimension
452494
``a.ndim - 1``.
453495
454496
See Also
@@ -460,50 +502,68 @@ def nanmin(self, axis=None, out=None):
460502
numpy.nanmin : NumPy's implementation of 'nanmin'.
461503
462504
"""
463-
return self._min_or_max(axis, out, np.fmin)
505+
return self._min_or_max(axis, out, np.fmin, explicit)
464506

465-
def argmax(self, axis=None, out=None):
507+
def argmax(self, axis=None, out=None, *, explicit=False):
466508
"""Return indices of maximum elements along an axis.
467509
468-
Implicit zero elements are also taken into account. If there are
469-
several maximum values, the index of the first occurrence is returned.
510+
By default, implicit zero elements are taken into account. If there are
511+
several minimum values, the index of the first occurrence is returned.
512+
If `explicit` is set, only explicitly stored elements will be considered.
470513
471514
Parameters
472515
----------
473516
axis : {-2, -1, 0, 1, None}, optional
474517
Axis along which the argmax is computed. If None (default), index
475518
of the maximum element in the flatten data is returned.
519+
476520
out : None, optional
477521
This argument is in the signature *solely* for NumPy
478522
compatibility reasons. Do not pass in anything except for
479523
the default value, as this argument is not used.
480524
525+
explicit : {False, True} optional (default: False)
526+
When set to True, only explicitly stored elements will be considered.
527+
If axis is not None and a row/column has no stored elements, argmax
528+
is undefined, so the index ``0`` is returned for that row/column.
529+
530+
.. versionadded:: 1.15.0
531+
481532
Returns
482533
-------
483534
ind : numpy.matrix or int
484535
Indices of maximum elements. If matrix, its size along `axis` is 1.
485536
"""
486-
return self._arg_min_or_max(axis, out, np.argmax, np.greater)
537+
return self._arg_min_or_max(axis, out, np.argmax, np.greater, explicit)
487538

488-
def argmin(self, axis=None, out=None):
539+
def argmin(self, axis=None, out=None, *, explicit=False):
489540
"""Return indices of minimum elements along an axis.
490541
491-
Implicit zero elements are also taken into account. If there are
542+
By default, implicit zero elements are taken into account. If there are
492543
several minimum values, the index of the first occurrence is returned.
544+
If `explicit` is set, only explicitly stored elements will be considered.
493545
494546
Parameters
495547
----------
496548
axis : {-2, -1, 0, 1, None}, optional
497549
Axis along which the argmin is computed. If None (default), index
498550
of the minimum element in the flatten data is returned.
551+
499552
out : None, optional
500553
This argument is in the signature *solely* for NumPy
501554
compatibility reasons. Do not pass in anything except for
502555
the default value, as this argument is not used.
503556
557+
explicit : {False, True} optional (default: False)
558+
When set to True, only explicitly stored elements will be considered.
559+
If axis is not None and a row/column has no stored elements, argmin
560+
is undefined, so the index ``0`` is returned for that row/column.
561+
562+
.. versionadded:: 1.15.0
563+
504564
Returns
505565
-------
506566
ind : numpy.matrix or int
507567
Indices of minimum elements. If matrix, its size along `axis` is 1.
508568
"""
509-
return self._arg_min_or_max(axis, out, np.argmin, np.less)
569+
return self._arg_min_or_max(axis, out, np.argmin, np.less, explicit)

0 commit comments

Comments
 (0)