Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions dpnp/dpnp_iface_nanfunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import warnings

import dpnp
from dpnp.dpnp_utils.dpnp_utils_statistics import dpnp_nanmedian

__all__ = [
"nanargmax",
Expand All @@ -48,6 +49,7 @@
"nancumsum",
"nanmax",
"nanmean",
"nanmedian",
"nanmin",
"nanprod",
"nanstd",
Expand Down Expand Up @@ -568,6 +570,112 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False, *, where=True):
return avg


def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):
"""
Compute the median along the specified axis, while ignoring NaNs.

For full documentation refer to :obj:`numpy.nanmedian`.

Parameters
----------
a : {dpnp.ndarray, usm_ndarray}
Input array.
axis : {None, int, tuple or list of ints}, optional
Axis or axes along which the medians are computed. The default,
``axis=None``, will compute the median along a flattened version of
the array. If a sequence of axes, the array is first flattened along
the given axes, then the median is computed along the resulting
flattened axis.
Default: ``None``.
out : {None, dpnp.ndarray, usm_ndarray}, optional
Alternative output array in which to place the result. It must have
the same shape as the expected output but the type (of the calculated
values) will be cast if necessary.
Default: ``None``.
overwrite_input : bool, optional
If ``True``, then allow use of memory of input array `a` for
calculations. The input array will be modified by the call to
:obj:`dpnp.nanmedian`. This will save memory when you do not need to
preserve the contents of the input array. Treat the input as undefined,
but it will probably be fully or partially sorted.
Default: ``False``.
keepdims : bool, optional
If ``True``, the reduced axes (dimensions) are included in the result
as singleton dimensions, so that the returned array remains
compatible with the input array according to Array Broadcasting
rules. Otherwise, if ``False``, the reduced axes are not included in
the returned array.
Default: ``False``.

Returns
-------
out : dpnp.ndarray
A new array holding the result. If `a` has a floating-point data type,
the returned array will have the same data type as `a`. If `a` has a
boolean or integral data type, the returned array will have the
default floating point data type for the device where input array `a`
is allocated.

See Also
--------
:obj:`dpnp.mean` : Compute the arithmetic mean along the specified axis.
:obj:`dpnp.median` : Compute the median along the specified axis.
:obj:`dpnp.percentile` : Compute the q-th percentile of the data
along the specified axis.

Notes
-----
Given a vector ``V`` of length ``N``, the median of ``V`` is the
middle value of a sorted copy of ``V``, ``V_sorted`` - i.e.,
``V_sorted[(N-1)/2]``, when ``N`` is odd, and the average of the
two middle values of ``V_sorted`` when ``N`` is even.

Examples
--------
>>> import dpnp as np
>>> a = np.array([[10.0, 7, 4], [3, 2, 1]])
>>> a[0, 1] = np.nan
>>> a
array([[10., nan, 4.],
[ 3., 2., 1.]])
>>> np.median(a)
array(nan)
>>> np.nanmedian(a)
array(3.)

>>> np.nanmedian(a, axis=0)
array([6.5, 2., 2.5])
>>> np.nanmedian(a, axis=1)
array([7., 2.])

>>> b = a.copy()
>>> np.nanmedian(b, axis=1, overwrite_input=True)
array([7., 2.])
>>> assert not np.all(a==b)
>>> b = a.copy()
>>> np.nanmedian(b, axis=None, overwrite_input=True)
array(3.)
>>> assert not np.all(a==b)

"""

dpnp.check_supported_arrays_type(a)
if dpnp.issubdtype(a.dtype, dpnp.inexact):
# apply_along_axis in _nanmedian doesn't handle empty arrays well,
# so deal them upfront
if a.size == 0:
return dpnp.nanmean(a, axis, out=out, keepdims=keepdims)
return dpnp_nanmedian(
a,
keepdims=keepdims,
axis=axis,
out=out,
overwrite_input=overwrite_input,
)

return dpnp.median(a, axis, out, overwrite_input, keepdims)


def nanmin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
"""
Return the minimum of an array or minimum along an axis, ignoring any NaNs.
Expand Down
12 changes: 6 additions & 6 deletions dpnp/dpnp_iface_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -765,7 +765,7 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
preserve the contents of the input array. Treat the input as undefined,
but it will probably be fully or partially sorted.
Default: ``False``.
keepdims : {None, bool}, optional
keepdims : bool, optional
If ``True``, the reduced axes (dimensions) are included in the result
as singleton dimensions, so that the returned array remains
compatible with the input array according to Array Broadcasting
Expand All @@ -775,7 +775,7 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):

Returns
-------
dpnp.median : dpnp.ndarray
out : dpnp.ndarray
A new array holding the result. If `a` has a floating-point data type,
the returned array will have the same data type as `a`. If `a` has a
boolean or integral data type, the returned array will have the
Expand Down Expand Up @@ -808,20 +808,20 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
>>> np.median(a, axis=0)
array([6.5, 4.5, 2.5])
>>> np.median(a, axis=1)
array([7., 2.])
array([7., 2.])
>>> np.median(a, axis=(0, 1))
array(3.5)

>>> m = np.median(a, axis=0)
>>> out = np.zeros_like(m)
>>> np.median(a, axis=0, out=m)
array([6.5, 4.5, 2.5])
array([6.5, 4.5, 2.5])
>>> m
array([6.5, 4.5, 2.5])
array([6.5, 4.5, 2.5])

>>> b = a.copy()
>>> np.median(b, axis=1, overwrite_input=True)
array([7., 2.])
array([7., 2.])
>>> assert not np.all(a==b)
>>> b = a.copy()
>>> np.median(b, axis=None, overwrite_input=True)
Expand Down
136 changes: 135 additions & 1 deletion dpnp/dpnp_utils/dpnp_utils_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,94 @@
# *****************************************************************************


import warnings

from dpctl.tensor._numpy_helper import normalize_axis_tuple

import dpnp
from dpnp.dpnp_utils import get_usm_allocations, map_dtype_to_device

__all__ = ["dpnp_cov"]
__all__ = ["dpnp_cov", "dpnp_nanmedian"]


def _calc_nanmedian(a, axis=None, out=None, overwrite_input=False):
"""
Private function that doesn't support extended axis or keepdims.
These methods are extended to this function using _ureduce
See nanmedian for parameter usage

"""
if axis is None or a.ndim == 1:
part = dpnp.ravel(a)
if out is None:
return _nanmedian1d(part, overwrite_input)
else:
out[...] = _nanmedian1d(part, overwrite_input)
return out
else:
result = dpnp.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
if out is not None:
out[...] = result
return result


def _nanmedian1d(arr1d, overwrite_input=False):
"""
Private function for rank 1 arrays. Compute the median ignoring NaNs.
See nanmedian for parameter usage
"""
arr1d_parsed, overwrite_input = _remove_nan_1d(
arr1d,
overwrite_input=overwrite_input,
)

if arr1d_parsed.size == 0:
# Ensure that a nan-esque scalar of the appropriate type (and unit)
# is returned for `complexfloating`
return arr1d[-1]

return dpnp.median(arr1d_parsed, overwrite_input=overwrite_input)


def _remove_nan_1d(arr1d, overwrite_input=False):
"""
Equivalent to arr1d[~arr1d.isnan()], but in a different order

Presumably faster as it incurs fewer copies

Parameters
----------
arr1d : ndarray
Array to remove nans from
overwrite_input : bool
True if `arr1d` can be modified in place

Returns
-------
res : ndarray
Array with nan elements removed
overwrite_input : bool
True if `res` can be modified in place, given the constraint on the
input
"""

mask = dpnp.isnan(arr1d)

s = dpnp.nonzero(mask)[0]
if s.size == arr1d.size:
warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=6)
return arr1d[:0], True
elif s.size == 0:
return arr1d, overwrite_input
else:
if not overwrite_input:
arr1d = arr1d.copy()
# select non-nans at end of array
enonan = arr1d[-s.size :][~mask[-s.size :]]
# fill nans in beginning of array with non-nans of end
arr1d[s[: enonan.size]] = enonan

return arr1d[: -s.size], True


def dpnp_cov(m, y=None, rowvar=True, dtype=None):
Expand Down Expand Up @@ -90,3 +174,53 @@ def _get_2dmin_array(x, dtype):
c *= 1 / fact if fact != 0 else dpnp.nan

return dpnp.squeeze(c)


def dpnp_nanmedian(
a, keepdims=False, axis=None, out=None, overwrite_input=False
):
"""Internal Function."""

nd = a.ndim
if axis is not None:
_axis = normalize_axis_tuple(axis, nd)

if keepdims:
if out is not None:
index_out = tuple(
0 if i in _axis else slice(None) for i in range(nd)
)
out = out[(Ellipsis,) + index_out]

if len(_axis) == 1:
axis = _axis[0]
else:
keep = set(range(nd)) - set(_axis)
nkeep = len(keep)
# swap axis that should not be reduced to front
for i, s in enumerate(sorted(keep)):
a = dpnp.swapaxes(a, i, s)
# merge reduced axis
a = a.reshape(a.shape[:nkeep] + (-1,))
axis = -1
else:
if keepdims:
if out is not None:
index_out = (0,) * nd
out = out[(Ellipsis,) + index_out]

r = _calc_nanmedian(a, axis=axis, out=out, overwrite_input=overwrite_input)

if out is not None:
return out

if keepdims:
if axis is None:
index_r = (dpnp.newaxis,) * nd
else:
index_r = tuple(
dpnp.newaxis if i in _axis else slice(None) for i in range(nd)
)
r = r[(Ellipsis,) + index_r]

return r
Loading
Loading