Skip to content

Commit 81763d5

Browse files
lorentzenchrsebergogrisel
authored
BUG: weighted nanpercentile, nanquantile and multi-dim q (numpy#26582)
* TST add test_nan_value_with_weight * FIX weighted nanquantile * CLN appease linter * Update numpy/lib/tests/test_nanfunctions.py Co-authored-by: Olivier Grisel <[email protected]> * Simplify code, expand old and add new ndim test with weights * BUG: Expand test and fix multi-dimensional q in the normal path... --------- Co-authored-by: Sebastian Berg <[email protected]> Co-authored-by: Olivier Grisel <[email protected]> Co-authored-by: Sebastian Berg <[email protected]>
1 parent a9d1ef9 commit 81763d5

File tree

2 files changed

+112
-17
lines changed

2 files changed

+112
-17
lines changed

numpy/lib/_nanfunctions_impl.py

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def _copyto(a, val, mask):
141141
return a
142142

143143

144-
def _remove_nan_1d(arr1d, overwrite_input=False):
144+
def _remove_nan_1d(arr1d, second_arr1d=None, overwrite_input=False):
145145
"""
146146
Equivalent to arr1d[~arr1d.isnan()], but in a different order
147147
@@ -151,13 +151,17 @@ def _remove_nan_1d(arr1d, overwrite_input=False):
151151
----------
152152
arr1d : ndarray
153153
Array to remove nans from
154+
second_arr1d : ndarray or None
155+
A second array which will have the same positions removed as arr1d.
154156
overwrite_input : bool
155157
True if `arr1d` can be modified in place
156158
157159
Returns
158160
-------
159161
res : ndarray
160162
Array with nan elements removed
163+
second_res : ndarray or None
164+
Second array with nan element positions of first array removed.
161165
overwrite_input : bool
162166
True if `res` can be modified in place, given the constraint on the
163167
input
@@ -172,9 +176,12 @@ def _remove_nan_1d(arr1d, overwrite_input=False):
172176
if s.size == arr1d.size:
173177
warnings.warn("All-NaN slice encountered", RuntimeWarning,
174178
stacklevel=6)
175-
return arr1d[:0], True
179+
if second_arr1d is None:
180+
return arr1d[:0], None, True
181+
else:
182+
return arr1d[:0], second_arr1d[:0], True
176183
elif s.size == 0:
177-
return arr1d, overwrite_input
184+
return arr1d, second_arr1d, overwrite_input
178185
else:
179186
if not overwrite_input:
180187
arr1d = arr1d.copy()
@@ -183,7 +190,15 @@ def _remove_nan_1d(arr1d, overwrite_input=False):
183190
# fill nans in beginning of array with non-nans of end
184191
arr1d[s[:enonan.size]] = enonan
185192

186-
return arr1d[:-s.size], True
193+
if second_arr1d is None:
194+
return arr1d[:-s.size], None, True
195+
else:
196+
if not overwrite_input:
197+
second_arr1d = second_arr1d.copy()
198+
enonan = second_arr1d[-s.size:][~c[-s.size:]]
199+
second_arr1d[s[:enonan.size]] = enonan
200+
201+
return arr1d[:-s.size], second_arr1d[:-s.size], True
187202

188203

189204
def _divide_by_count(a, b, out=None):
@@ -1061,7 +1076,7 @@ def _nanmedian1d(arr1d, overwrite_input=False):
10611076
Private function for rank 1 arrays. Compute the median ignoring NaNs.
10621077
See nanmedian for parameter usage
10631078
"""
1064-
arr1d_parsed, overwrite_input = _remove_nan_1d(
1079+
arr1d_parsed, _, overwrite_input = _remove_nan_1d(
10651080
arr1d, overwrite_input=overwrite_input,
10661081
)
10671082

@@ -1650,13 +1665,36 @@ def _nanquantile_ureduce_func(
16501665
wgt = None if weights is None else weights.ravel()
16511666
result = _nanquantile_1d(part, q, overwrite_input, method, weights=wgt)
16521667
else:
1653-
result = np.apply_along_axis(_nanquantile_1d, axis, a, q,
1654-
overwrite_input, method, weights)
1655-
# apply_along_axis fills in collapsed axis with results.
1656-
# Move that axis to the beginning to match percentile's
1657-
# convention.
1658-
if q.ndim != 0:
1659-
result = np.moveaxis(result, axis, 0)
1668+
# Note that this code could try to fill in `out` right away
1669+
if weights is None:
1670+
result = np.apply_along_axis(_nanquantile_1d, axis, a, q,
1671+
overwrite_input, method, weights)
1672+
# apply_along_axis fills in collapsed axis with results.
1673+
# Move those axes to the beginning to match percentile's
1674+
# convention.
1675+
if q.ndim != 0:
1676+
from_ax = [axis + i for i in range(q.ndim)]
1677+
result = np.moveaxis(result, from_ax, list(range(q.ndim)))
1678+
else:
1679+
# We need to apply along axis over 2 arrays, a and weights.
1680+
# move operation axes to end for simplicity:
1681+
a = np.moveaxis(a, axis, -1)
1682+
if weights is not None:
1683+
weights = np.moveaxis(weights, axis, -1)
1684+
if out is not None:
1685+
result = out
1686+
else:
1687+
# weights are limited to `inverted_cdf` so the result dtype
1688+
# is known to be identical to that of `a` here:
1689+
result = np.empty_like(a, shape=q.shape + a.shape[:-1])
1690+
1691+
for ii in np.ndindex(a.shape[:-1]):
1692+
result[(...,) + ii] = _nanquantile_1d(
1693+
a[ii], q, weights=weights[ii],
1694+
overwrite_input=overwrite_input, method=method,
1695+
)
1696+
# This path dealt with `out` already...
1697+
return result
16601698

16611699
if out is not None:
16621700
out[...] = result
@@ -1670,8 +1708,9 @@ def _nanquantile_1d(
16701708
Private function for rank 1 arrays. Compute quantile ignoring NaNs.
16711709
See nanpercentile for parameter usage
16721710
"""
1673-
arr1d, overwrite_input = _remove_nan_1d(arr1d,
1674-
overwrite_input=overwrite_input)
1711+
# TODO: What to do when arr1d = [1, np.nan] and weights = [0, 1]?
1712+
arr1d, weights, overwrite_input = _remove_nan_1d(arr1d,
1713+
second_arr1d=weights, overwrite_input=overwrite_input)
16751714
if arr1d.size == 0:
16761715
# convert to scalar
16771716
return np.full(q.shape, np.nan, dtype=arr1d.dtype)[()]

numpy/lib/tests/test_nanfunctions.py

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,7 +1144,8 @@ def test_complex(self):
11441144
assert_raises(TypeError, np.nanpercentile, arr_c, 0.5)
11451145

11461146
@pytest.mark.parametrize("weighted", [False, True])
1147-
def test_result_values(self, weighted):
1147+
@pytest.mark.parametrize("use_out", [False, True])
1148+
def test_result_values(self, weighted, use_out):
11481149
if weighted:
11491150
percentile = partial(np.percentile, method="inverted_cdf")
11501151
nanpercentile = partial(np.nanpercentile, method="inverted_cdf")
@@ -1160,13 +1161,16 @@ def gen_weights(d):
11601161
return None
11611162

11621163
tgt = [percentile(d, 28, weights=gen_weights(d)) for d in _rdat]
1163-
res = nanpercentile(_ndat, 28, axis=1, weights=gen_weights(_ndat))
1164+
out = np.empty_like(tgt) if use_out else None
1165+
res = nanpercentile(_ndat, 28, axis=1,
1166+
weights=gen_weights(_ndat), out=out)
11641167
assert_almost_equal(res, tgt)
11651168
# Transpose the array to fit the output convention of numpy.percentile
11661169
tgt = np.transpose([percentile(d, (28, 98), weights=gen_weights(d))
11671170
for d in _rdat])
1171+
out = np.empty_like(tgt) if use_out else None
11681172
res = nanpercentile(_ndat, (28, 98), axis=1,
1169-
weights=gen_weights(_ndat))
1173+
weights=gen_weights(_ndat), out=out)
11701174
assert_almost_equal(res, tgt)
11711175

11721176
@pytest.mark.parametrize("axis", [None, 0, 1])
@@ -1242,6 +1246,58 @@ def test_multiple_percentiles(self):
12421246
np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6)
12431247
)
12441248

1249+
@pytest.mark.parametrize("nan_weight", [0, 1, 2, 3, 1e200])
1250+
def test_nan_value_with_weight(self, nan_weight):
1251+
x = [1, np.nan, 2, 3]
1252+
result = np.float64(2.0)
1253+
q_unweighted = np.nanpercentile(x, 50, method="inverted_cdf")
1254+
assert_equal(q_unweighted, result)
1255+
1256+
# The weight value at the nan position should not matter.
1257+
w = [1.0, nan_weight, 1.0, 1.0]
1258+
q_weighted = np.nanpercentile(x, 50, weights=w, method="inverted_cdf")
1259+
assert_equal(q_weighted, result)
1260+
1261+
@pytest.mark.parametrize("axis", [0, 1, 2])
1262+
def test_nan_value_with_weight_ndim(self, axis):
1263+
# Create a multi-dimensional array to test
1264+
np.random.seed(1)
1265+
x_no_nan = np.random.random(size=(100, 99, 2))
1266+
# Set some places to NaN (not particularly smart) so there is always
1267+
# some non-Nan.
1268+
x = x_no_nan.copy()
1269+
x[np.arange(99), np.arange(99), 0] = np.nan
1270+
1271+
p = np.array([[20., 50., 30], [70, 33, 80]])
1272+
1273+
# We just use ones as weights, but replace it with 0 or 1e200 at the
1274+
# NaN positions below.
1275+
weights = np.ones_like(x)
1276+
1277+
# For comparison use weighted normal percentile with nan weights at
1278+
# 0 (and no NaNs); not sure this is strictly identical but should be
1279+
# sufficiently so (if a percentile lies exactly on a 0 value).
1280+
weights[np.isnan(x)] = 0
1281+
p_expected = np.percentile(
1282+
x_no_nan, p, axis=axis, weights=weights, method="inverted_cdf")
1283+
1284+
p_unweighted = np.nanpercentile(
1285+
x, p, axis=axis, method="inverted_cdf")
1286+
# The normal and unweighted versions should be identical:
1287+
assert_equal(p_unweighted, p_expected)
1288+
1289+
weights[np.isnan(x)] = 1e200 # huge value, shouldn't matter
1290+
p_weighted = np.nanpercentile(
1291+
x, p, axis=axis, weights=weights, method="inverted_cdf")
1292+
assert_equal(p_weighted, p_expected)
1293+
# Also check with out passed:
1294+
out = np.empty_like(p_weighted)
1295+
res = np.nanpercentile(
1296+
x, p, axis=axis, weights=weights, out=out, method="inverted_cdf")
1297+
1298+
assert res is out
1299+
assert_equal(out, p_expected)
1300+
12451301

12461302
class TestNanFunctions_Quantile:
12471303
# most of this is already tested by TestPercentile

0 commit comments

Comments
 (0)