Skip to content

Commit cf821b2

Browse files
committed
Update documentation for sort_values and natural sorting
1 parent e4a03b6 commit cf821b2

File tree

2 files changed

+50
-37
lines changed

2 files changed

+50
-37
lines changed

pandas/core/frame.py

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7173,35 +7173,40 @@ def sort_values(
71737173
`natural sorting <https://en.wikipedia.org/wiki/Natural_sort_order>`__.
71747174
This can be done using
71757175
``natsort`` `package <https://github.com/SethMMorton/natsort>`__,
7176-
which provides sorted indices according
7177-
to their natural order, as shown below:
7176+
which provides a function to generate a key
7177+
to sort data in their natural order:
71787178
71797179
>>> df = pd.DataFrame(
71807180
... {
7181-
... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
7182-
... "value": [10, 20, 30, 40, 50],
7181+
... "hours": ["0hr", "128hr", "0hr", "64hr", "64hr", "128hr"],
7182+
... "mins": ["10mins", "40mins", "40mins", "40mins", "10mins", "10mins"],
7183+
... "value": [10, 20, 30, 40, 50, 60],
71837184
... }
71847185
... )
71857186
>>> df
7186-
time value
7187-
0 0hr 10
7188-
1 128hr 20
7189-
2 72hr 30
7190-
3 48hr 40
7191-
4 96hr 50
7192-
>>> from natsort import index_natsorted
7193-
>>> index_natsorted(df["time"])
7194-
[0, 3, 2, 4, 1]
7187+
hours mins value
7188+
0 0hr 10mins 10
7189+
1 128hr 40mins 20
7190+
2 0hr 40mins 30
7191+
3 64hr 40mins 40
7192+
4 64hr 10mins 50
7193+
5 128hr 10mins 60
7194+
>>> from natsort import natsort_keygen
7195+
>>> natsort_keygen()(df["hours"])
7196+
(('', 0, 'hr'), ('', 128, 'hr'), ('', 72, 'hr'), ('', 48, 'hr'), ('', 96, 'hr'))
7197+
>>> natsort_keygen()(df["mins"])
7198+
(('', 10, 'mins'), ('', 40, 'mins'), ('', 40, 'mins'), ('', 40, 'mins'), ('', 10, 'mins'), ('', 10, 'mins'))
71957199
>>> df.sort_values(
7196-
... by="time",
7197-
... key=lambda x: np.argsort(index_natsorted(x)),
7200+
... by=["hours", "mins"],
7201+
... key=natsort_keygen(),
71987202
... )
7199-
time value
7200-
0 0hr 10
7201-
3 48hr 40
7202-
2 72hr 30
7203-
4 96hr 50
7204-
1 128hr 20
7203+
hours mins value
7204+
0 0hr 10mins 10
7205+
2 0hr 40mins 30
7206+
4 64hr 10mins 50
7207+
3 64hr 40mins 40
7208+
5 128hr 10mins 60
7209+
1 128hr 40mins 20
72057210
"""
72067211
inplace = validate_bool_kwarg(inplace, "inplace")
72077212
axis = self._get_axis_number(axis)

pandas/core/generic.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5004,27 +5004,35 @@ def sort_values(
50045004
50055005
>>> df = pd.DataFrame(
50065006
... {
5007-
... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
5008-
... "value": [10, 20, 30, 40, 50],
5007+
... "hours": ["0hr", "128hr", "0hr", "64hr", "64hr", "128hr"],
5008+
... "mins": ["10mins", "40mins", "40mins", "40mins", "10mins", "10mins"],
5009+
... "value": [10, 20, 30, 40, 50, 60],
50095010
... }
50105011
... )
50115012
>>> df
5012-
time value
5013-
0 0hr 10
5014-
1 128hr 20
5015-
2 72hr 30
5016-
3 48hr 40
5017-
4 96hr 50
5018-
>>> from natsort import index_natsorted
5013+
hours mins value
5014+
0 0hr 10mins 10
5015+
1 128hr 40mins 20
5016+
2 0hr 40mins 30
5017+
3 64hr 40mins 40
5018+
4 64hr 10mins 50
5019+
5 128hr 10mins 60
5020+
>>> from natsort import natsort_keygen
5021+
>>> natsort_keygen()(df["hours"])
5022+
(('', 0, 'hr'), ('', 128, 'hr'), ('', 72, 'hr'), ('', 48, 'hr'), ('', 96, 'hr'))
5023+
>>> natsort_keygen()(df["mins"])
5024+
(('', 10, 'mins'), ('', 40, 'mins'), ('', 40, 'mins'), ('', 40, 'mins'), ('', 10, 'mins'), ('', 10, 'mins'))
50195025
>>> df.sort_values(
5020-
... by="time", key=lambda x: np.argsort(index_natsorted(df["time"]))
5026+
... by=["hours", "mins"],
5027+
... key=natsort_keygen(),
50215028
... )
5022-
time value
5023-
0 0hr 10
5024-
3 48hr 40
5025-
2 72hr 30
5026-
4 96hr 50
5027-
1 128hr 20
5029+
hours mins value
5030+
0 0hr 10mins 10
5031+
2 0hr 40mins 30
5032+
4 64hr 10mins 50
5033+
3 64hr 40mins 40
5034+
5 128hr 10mins 60
5035+
1 128hr 40mins 20
50285036
"""
50295037
raise AbstractMethodError(self)
50305038

0 commit comments

Comments
 (0)