-
-
Notifications
You must be signed in to change notification settings - Fork 19.1k
Fix bug in Series.describe
where the median is included any time the percentiles
argument is not None
#61158
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
e3b0b5d
8bb3cf3
a0a0c63
bf1effa
28756ad
5ed786c
c57dabb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10818,9 +10818,12 @@ def describe( | |
---------- | ||
percentiles : list-like of numbers, optional | ||
The percentiles to include in the output. All should | ||
fall between 0 and 1. The default is | ||
``[.25, .5, .75]``, which returns the 25th, 50th, and | ||
75th percentiles. | ||
fall between 0 and 1. Here are the options: | ||
|
||
- A list-like of numbers : To include the percentiles listed. If | ||
that list is empty, no percentiles will be returned. | ||
- None (default) : To include the default percentiles, which are the | ||
25th, 50th, and 75th ones. | ||
|
||
include : 'all', list-like of dtypes or None (default), optional | ||
A white list of data types to include in the result. Ignored | ||
for ``Series``. Here are the options: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1565,6 +1565,9 @@ def format_percentiles( | |
>>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) | ||
['0%', '50%', '2.0%', '50%', '66.67%', '99.99%'] | ||
""" | ||
if len(percentiles) == 0: | ||
return [] | ||
|
||
Comment on lines
+1568
to
+1570
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is backward-compatible as it is only extending the range of values that the input parameter can take. |
||
percentiles = np.asarray(percentiles) | ||
|
||
# It checks for np.nan as well | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -413,3 +413,33 @@ def test_describe_exclude_pa_dtype(self): | |
dtype=pd.ArrowDtype(pa.float64()), | ||
) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
@pytest.mark.parametrize("percentiles", [None, [], [0.2]]) | ||
def test_refine_percentiles(self, percentiles): | ||
""" | ||
Test that the percentiles are returned correctly depending on the `percentiles` | ||
argument. | ||
- The default behavior is to return the 25th, 50th, and 75 percentiles | ||
- If `percentiles` is an empty list, no percentiles are returned | ||
- If `percentiles` is a non-empty list, only those percentiles are returned | ||
""" | ||
# GH#60550 | ||
df = DataFrame({"a": np.arange(0, 10, 1)}) | ||
|
||
result = df.describe(percentiles=percentiles) | ||
|
||
if percentiles is None: | ||
percentiles = [0.25, 0.5, 0.75] | ||
|
||
expected = Series( | ||
{ | ||
"count": len(df.a), | ||
"mean": df.a.mean(), | ||
"std": df.a.std(), | ||
"min": df.a.min(), | ||
**{f"{p:.0%}": df.a.quantile(p) for p in percentiles}, | ||
"max": df.a.max(), | ||
}, | ||
).to_frame(name="a") | ||
|
||
|
||
tm.assert_frame_equal(result, expected) |
Uh oh!
There was an error while loading. Please reload this page.