pandas-dev · ZenithClown · Dec 13, 2024 · Dec 14, 2024 · Dec 14, 2024 · Dec 14, 2024
diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
@@ -36,7 +36,8 @@ Other enhancements
   when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been
   updated to work correctly with NumPy >= 2 (:issue:`57739`)
 - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
--
+- Median percentile is only included in :meth:`~Series.describe` when a blank
+  list is passed (:issue:`60550`).
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_230.notable_bug_fixes:

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -10795,7 +10795,8 @@ def describe(
             The percentiles to include in the output. All should
             fall between 0 and 1. The default is
             ``[.25, .5, .75]``, which returns the 25th, 50th, and
-            75th percentiles.
+            75th percentiles. If a blank list is passed, then returns
+            only the 50th percentile value.
         include : 'all', list-like of dtypes or None (default), optional
             A white list of data types to include in the result. Ignored
             for ``Series``. Here are the options:

diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py
@@ -74,7 +74,8 @@ def describe_ndframe(
     percentiles : list-like of numbers, optional
         The percentiles to include in the output. All should fall between 0 and 1.
         The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and
-        75th percentiles.
+        75th percentiles. If a blank list is passed, then returns only the
+        50th percentile value.
 
     Returns
     -------
@@ -351,13 +352,13 @@ def _refine_percentiles(
     # explicit conversion of `percentiles` to list
     percentiles = list(percentiles)
 
+    # median should be included only if blank iterable is passed
+    if len(percentiles) == 0:
+        return np.array([0.5])
+
     # get them all to be in [0, 1]
     validate_percentile(percentiles)
 
-    # median should always be included
-    if 0.5 not in percentiles:
-        percentiles.append(0.5)
-
     percentiles = np.asarray(percentiles)
 
     # sort and check for duplicates

diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py
@@ -413,3 +413,34 @@ def test_describe_exclude_pa_dtype(self):
             dtype=pd.ArrowDtype(pa.float64()),
         )
         tm.assert_frame_equal(result, expected)
+
+    def test_refine_percentiles(self):
+        # GH#60550
+        df = DataFrame({"a" : np.arange(0, 10, 1)})
+
+        # the default behavior is to return [0.25, 0.5, 0.75]
+        result = df.describe()
+        expected = DataFrame(
+            {"a" : [10, df.a.mean(), df.a.std(), 0, 2.25, 4.5, 6.75, 9]},
+            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"]
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+        # if an empty list is passed, it should return [0.5]
+        result = df.describe(percentiles=[])
+        expected = DataFrame(
+            {"a" : [10, df.a.mean(), df.a.std(), 0, 4.5, 9]},
+            index=["count", "mean", "std", "min", "50%", "max"]
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+        # if a list is passed, it should return with the same values
+        result = df.describe(percentiles=[0.2])
+        expected = DataFrame(
+            {"a" : [10, df.a.mean(), df.a.std(), 0, 1.8, 9]},
+            index=["count", "mean", "std", "min", "20%", "max"]
+        )
+
+        tm.assert_frame_equal(result, expected)