BUG: Allow np.percentile to operate on float16 data (numpy#29105)

eendebakpt · web-flow · commit 884aec9750f0 · 2025-11-09T08:12:49.000-05:00
* BUG: Allow np.percentile to operate on float16 data

* add an extra regression test

* add an extra regression test

* remove unused default value

* add release note

* review comments: part1

* review comments: part 2

* review comments: part 3
diff --git a/doc/release/upcoming_changes/29105.change.rst b/doc/release/upcoming_changes/29105.change.rst
@@ -0,0 +1 @@
+* The accuracy of ``np.quantile`` and ``np.percentile`` for 16- and 32-bit floating point input data has been improved.
diff --git a/numpy/lib/_function_base_impl.py b/numpy/lib/_function_base_impl.py
@@ -4219,9 +4219,7 @@ def percentile(a,
     if a.dtype.kind == "c":
         raise TypeError("a must be an array of real numbers")
 
-    # Use dtype of array if possible (e.g., if q is a python int or float)
-    # by making the divisor have the dtype of the data array.
-    q = np.true_divide(q, a.dtype.type(100) if a.dtype.kind == "f" else 100, out=...)
+    q = np.true_divide(q, 100, out=...)
     if not _quantile_is_valid(q):
         raise ValueError("Percentiles must be in the range [0, 100]")
 
@@ -4469,11 +4467,7 @@ def quantile(a,
     if a.dtype.kind == "c":
         raise TypeError("a must be an array of real numbers")
 
-    # Use dtype of array if possible (e.g., if q is a python int or float).
-    if isinstance(q, (int, float)) and a.dtype.kind == "f":
-        q = np.asanyarray(q, dtype=a.dtype)
-    else:
-        q = np.asanyarray(q)
+    q = np.asanyarray(q)
 
     if not _quantile_is_valid(q):
         raise ValueError("Quantiles must be in the range [0, 1]")
@@ -4549,7 +4543,7 @@ def _compute_virtual_index(n, quantiles, alpha: float, beta: float):
     ) - 1
 
 
-def _get_gamma(virtual_indexes, previous_indexes, method):
+def _get_gamma(virtual_indexes, previous_indexes, method, dtype):
     """
     Compute gamma (a.k.a 'm' or 'weight') for the linear interpolation
     of quantiles.
@@ -4570,7 +4564,7 @@ def _get_gamma(virtual_indexes, previous_indexes, method):
     gamma = method["fix_gamma"](gamma, virtual_indexes)
     # Ensure both that we have an array, and that we keep the dtype
     # (which may have been matched to the input array).
-    return np.asanyarray(gamma, dtype=virtual_indexes.dtype)
+    return np.asanyarray(gamma, dtype=dtype)
 
 
 def _lerp(a, b, t, out=None):
@@ -4788,7 +4782,16 @@ def _quantile(
             previous = arr[previous_indexes]
             next = arr[next_indexes]
             # --- Linear interpolation
-            gamma = _get_gamma(virtual_indexes, previous_indexes, method_props)
+            if arr.dtype.kind in "iu":
+                gtype = None
+            elif arr.dtype.kind == "f":
+                # make sure the return value matches the input array type
+                gtype = arr.dtype
+            else:
+                gtype = virtual_indexes.dtype
+
+            gamma = _get_gamma(virtual_indexes, previous_indexes,
+                               method_props, gtype)
             result_shape = virtual_indexes.shape + (1,) * (arr.ndim - 1)
             gamma = gamma.reshape(result_shape)
             result = _lerp(previous,
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
@@ -3277,6 +3277,16 @@ def test_period(self):
         assert_almost_equal(np.interp(x, xp, fp, period=360), y)
 
 
+quantile_methods = [
+    'inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
+    'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear',
+    'median_unbiased', 'normal_unbiased', 'nearest', 'lower', 'higher',
+    'midpoint']
+
+
+methods_supporting_weights = ["inverted_cdf"]
+
+
 class TestPercentile:
 
     def test_basic(self):
@@ -3870,15 +3880,38 @@ def test_nat_basic(self, dtype, pos):
         res = np.percentile(a, 30, axis=0)
         assert_array_equal(np.isnat(res), [False, True, False])
 
-
-quantile_methods = [
-    'inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
-    'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear',
-    'median_unbiased', 'normal_unbiased', 'nearest', 'lower', 'higher',
-    'midpoint']
-
-
-methods_supporting_weights = ["inverted_cdf"]
+    @pytest.mark.parametrize("qtype", [np.float16, np.float32])
+    @pytest.mark.parametrize("method", quantile_methods)
+    def test_percentile_gh_29003(self, qtype, method):
+        # test that with float16 or float32 input we do not get overflow
+        zero = qtype(0)
+        one = qtype(1)
+        a = np.zeros(65521, qtype)
+        a[:20_000] = one
+        z = np.percentile(a, 50, method=method)
+        assert z == zero
+        assert z.dtype == a.dtype
+        z = np.percentile(a, 99, method=method)
+        assert z == one
+        assert z.dtype == a.dtype
+
+    def test_percentile_gh_29003_Fraction(self):
+        zero = Fraction(0)
+        one = Fraction(1)
+        a = np.array([zero] * 65521)
+        a[:20_000] = one
+        z = np.percentile(a, 50)
+        assert z == zero
+        z = np.percentile(a, Fraction(50))
+        assert z == zero
+        assert np.array(z).dtype == a.dtype
+
+        z = np.percentile(a, 99)
+        assert z == one
+        # test that with only Fraction input the return type is a Fraction
+        z = np.percentile(a, Fraction(99))
+        assert z == one
+        assert np.array(z).dtype == a.dtype
 
 
 class TestQuantile:
@@ -4244,6 +4277,21 @@ def test_closest_observation(self):
         assert_equal(4, np.quantile(arr[0:9], q, method=m))
         assert_equal(5, np.quantile(arr, q, method=m))
 
+    def test_quantile_gh_29003_Fraction(self):
+        r = np.quantile([1, 2], q=Fraction(1))
+        assert r == Fraction(2)
+        assert isinstance(r, Fraction)
+
+        r = np.quantile([1, 2], q=Fraction(.5))
+        assert r == Fraction(3, 2)
+        assert isinstance(r, Fraction)
+
+    def test_float16_gh_29003(self):
+        a = np.arange(50_001, dtype=np.float16)
+        q = .999
+        value = np.quantile(a, q)
+        assert value == q * 50_000
+        assert value.dtype == np.float16
 
 class TestLerp:
     @hypothesis.given(t0=st.floats(allow_nan=False, allow_infinity=False,

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+* The accuracy of ``np.quantile`` and ``np.percentile`` for 16- and 32-bit floating point input data has been improved.