IntelPython · vtavana · Nov 5, 2024 · Dec 4, 2024
@@ -40,6 +40,7 @@
 import warnings
 
 import dpnp
+from dpnp.dpnp_utils.dpnp_utils_statistics import dpnp_nanmedian
 
 __all__ = [
     "nanargmax",
@@ -48,6 +49,7 @@
     "nancumsum",
     "nanmax",
     "nanmean",
+    "nanmedian",
     "nanmin",
     "nanprod",
     "nanstd",
@@ -568,6 +570,112 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False, *, where=True):
     return avg
 
 
+def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):
+    """
+    Compute the median along the specified axis, while ignoring NaNs.
+
+    For full documentation refer to :obj:`numpy.nanmedian`.
+
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        Input array.
+    axis : {None, int, tuple or list of ints}, optional
+        Axis or axes along which the medians are computed. The default,
+        ``axis=None``, will compute the median along a flattened version of
+        the array. If a sequence of axes, the array is first flattened along
+        the given axes, then the median is computed along the resulting
+        flattened axis.
+        Default: ``None``.
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
+        Alternative output array in which to place the result. It must have
+        the same shape as the expected output but the type (of the calculated
+        values) will be cast if necessary.
+        Default: ``None``.
+    overwrite_input : bool, optional
+       If ``True``, then allow use of memory of input array `a` for
+       calculations. The input array will be modified by the call to
+       :obj:`dpnp.nanmedian`. This will save memory when you do not need to
+       preserve the contents of the input array. Treat the input as undefined,
+       but it will probably be fully or partially sorted.
+       Default: ``False``.
+    keepdims : bool, optional
+        If ``True``, the reduced axes (dimensions) are included in the result
+        as singleton dimensions, so that the returned array remains
+        compatible with the input array according to Array Broadcasting
+        rules. Otherwise, if ``False``, the reduced axes are not included in
+        the returned array.
+        Default: ``False``.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        A new array holding the result. If `a` has a floating-point data type,
+        the returned array will have the same data type as `a`. If `a` has a
+        boolean or integral data type, the returned array will have the
+        default floating point data type for the device where input array `a`
+        is allocated.
+
+    See Also
+    --------
+    :obj:`dpnp.mean` : Compute the arithmetic mean along the specified axis.
+    :obj:`dpnp.median` : Compute the median along the specified axis.
+    :obj:`dpnp.percentile` : Compute the q-th percentile of the data
+                             along the specified axis.
+
+    Notes
+    -----
+    Given a vector ``V`` of length ``N``, the median of ``V`` is the
+    middle value of a sorted copy of ``V``, ``V_sorted`` - i.e.,
+    ``V_sorted[(N-1)/2]``, when ``N`` is odd, and the average of the
+    two middle values of ``V_sorted`` when ``N`` is even.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> a = np.array([[10.0, 7, 4], [3, 2, 1]])
+    >>> a[0, 1] = np.nan
+    >>> a
+    array([[10., nan,  4.],
+           [ 3.,  2.,  1.]])
+    >>> np.median(a)
+    array(nan)
+    >>> np.nanmedian(a)
+    array(3.)
+
+    >>> np.nanmedian(a, axis=0)
+    array([6.5, 2., 2.5])
+    >>> np.nanmedian(a, axis=1)
+    array([7., 2.])
+
+    >>> b = a.copy()
+    >>> np.nanmedian(b, axis=1, overwrite_input=True)
+    array([7., 2.])
+    >>> assert not np.all(a==b)
+    >>> b = a.copy()
+    >>> np.nanmedian(b, axis=None, overwrite_input=True)
+    array(3.)
+    >>> assert not np.all(a==b)
+
+    """
+
+    dpnp.check_supported_arrays_type(a)
+    if dpnp.issubdtype(a.dtype, dpnp.inexact):
+        # apply_along_axis in _nanmedian doesn't handle empty arrays well,
+        # so deal them upfront
+        if a.size == 0:
+            return dpnp.nanmean(a, axis, out=out, keepdims=keepdims)
+        return dpnp_nanmedian(
+            a,
+            keepdims=keepdims,
+            axis=axis,
+            out=out,
+            overwrite_input=overwrite_input,
+        )
+
+    return dpnp.median(a, axis, out, overwrite_input, keepdims)
+
+
 def nanmin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
     """
     Return the minimum of an array or minimum along an axis, ignoring any NaNs.

@@ -765,7 +765,7 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
        preserve the contents of the input array. Treat the input as undefined,
        but it will probably be fully or partially sorted.
        Default: ``False``.
-    keepdims : {None, bool}, optional
+    keepdims : bool, optional
         If ``True``, the reduced axes (dimensions) are included in the result
         as singleton dimensions, so that the returned array remains
         compatible with the input array according to Array Broadcasting
@@ -775,7 +775,7 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
 
     Returns
     -------
-    dpnp.median : dpnp.ndarray
+    out : dpnp.ndarray
         A new array holding the result. If `a` has a floating-point data type,
         the returned array will have the same data type as `a`. If `a` has a
         boolean or integral data type, the returned array will have the
@@ -808,20 +808,20 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
     >>> np.median(a, axis=0)
     array([6.5, 4.5, 2.5])
     >>> np.median(a, axis=1)
-    array([7.,  2.])
+    array([7., 2.])
     >>> np.median(a, axis=(0, 1))
     array(3.5)
 
     >>> m = np.median(a, axis=0)
     >>> out = np.zeros_like(m)
     >>> np.median(a, axis=0, out=m)
-    array([6.5,  4.5,  2.5])
+    array([6.5, 4.5, 2.5])
     >>> m
-    array([6.5,  4.5,  2.5])
+    array([6.5, 4.5, 2.5])
 
     >>> b = a.copy()
     >>> np.median(b, axis=1, overwrite_input=True)
-    array([7.,  2.])
+    array([7., 2.])
     >>> assert not np.all(a==b)
     >>> b = a.copy()
     >>> np.median(b, axis=None, overwrite_input=True)

@@ -24,10 +24,94 @@
 # *****************************************************************************
 
 
+import warnings
+
+from dpctl.tensor._numpy_helper import normalize_axis_tuple
+
 import dpnp
 from dpnp.dpnp_utils import get_usm_allocations, map_dtype_to_device
 
-__all__ = ["dpnp_cov"]
+__all__ = ["dpnp_cov", "dpnp_nanmedian"]
+
+
+def _calc_nanmedian(a, axis=None, out=None, overwrite_input=False):
+    """
+    Private function that doesn't support extended axis or keepdims.
+    These methods are extended to this function using _ureduce
+    See nanmedian for parameter usage
+
+    """
+    if axis is None or a.ndim == 1:
+        part = dpnp.ravel(a)
+        if out is None:
+            return _nanmedian1d(part, overwrite_input)
+        else:
+            out[...] = _nanmedian1d(part, overwrite_input)
+            return out
+    else:
+        result = dpnp.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
+        if out is not None:
+            out[...] = result
+        return result
+
+
+def _nanmedian1d(arr1d, overwrite_input=False):
+    """
+    Private function for rank 1 arrays. Compute the median ignoring NaNs.
+    See nanmedian for parameter usage
+    """
+    arr1d_parsed, overwrite_input = _remove_nan_1d(
+        arr1d,
+        overwrite_input=overwrite_input,
+    )
+
+    if arr1d_parsed.size == 0:
+        # Ensure that a nan-esque scalar of the appropriate type (and unit)
+        # is returned for `complexfloating`
+        return arr1d[-1]
+
+    return dpnp.median(arr1d_parsed, overwrite_input=overwrite_input)
+
+
+def _remove_nan_1d(arr1d, overwrite_input=False):
+    """
+    Equivalent to arr1d[~arr1d.isnan()], but in a different order
+
+    Presumably faster as it incurs fewer copies
+
+    Parameters
+    ----------
+    arr1d : ndarray
+        Array to remove nans from
+    overwrite_input : bool
+        True if `arr1d` can be modified in place
+
+    Returns
+    -------
+    res : ndarray
+        Array with nan elements removed
+    overwrite_input : bool
+        True if `res` can be modified in place, given the constraint on the
+        input
+    """
+
+    mask = dpnp.isnan(arr1d)
+
+    s = dpnp.nonzero(mask)[0]
+    if s.size == arr1d.size:
+        warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=6)
+        return arr1d[:0], True
+    elif s.size == 0:
+        return arr1d, overwrite_input
+    else:
+        if not overwrite_input:
+            arr1d = arr1d.copy()
+        # select non-nans at end of array
+        enonan = arr1d[-s.size :][~mask[-s.size :]]
+        # fill nans in beginning of array with non-nans of end
+        arr1d[s[: enonan.size]] = enonan
+
+        return arr1d[: -s.size], True
 
 
 def dpnp_cov(m, y=None, rowvar=True, dtype=None):
@@ -90,3 +174,53 @@ def _get_2dmin_array(x, dtype):
     c *= 1 / fact if fact != 0 else dpnp.nan
 
     return dpnp.squeeze(c)
+
+
+def dpnp_nanmedian(
+    a, keepdims=False, axis=None, out=None, overwrite_input=False
+):
+    """Internal Function."""
+
+    nd = a.ndim
+    if axis is not None:
+        _axis = normalize_axis_tuple(axis, nd)
+
+        if keepdims:
+            if out is not None:
+                index_out = tuple(
+                    0 if i in _axis else slice(None) for i in range(nd)
+                )
+                out = out[(Ellipsis,) + index_out]
+
+        if len(_axis) == 1:
+            axis = _axis[0]
+        else:
+            keep = set(range(nd)) - set(_axis)
+            nkeep = len(keep)
+            # swap axis that should not be reduced to front
+            for i, s in enumerate(sorted(keep)):
+                a = dpnp.swapaxes(a, i, s)
+            # merge reduced axis
+            a = a.reshape(a.shape[:nkeep] + (-1,))
+            axis = -1
+    else:
+        if keepdims:
+            if out is not None:
+                index_out = (0,) * nd
+                out = out[(Ellipsis,) + index_out]
+
+    r = _calc_nanmedian(a, axis=axis, out=out, overwrite_input=overwrite_input)
+
+    if out is not None:
+        return out
+
+    if keepdims:
+        if axis is None:
+            index_r = (dpnp.newaxis,) * nd
+        else:
+            index_r = tuple(
+                dpnp.newaxis if i in _axis else slice(None) for i in range(nd)
+            )
+        r = r[(Ellipsis,) + index_r]
+
+    return r