Skip to content

Commit fc30c10

Browse files
committed
Update documentation for sort_values and natural sorting
1 parent e4a03b6 commit fc30c10

File tree

2 files changed

+68
-37
lines changed

2 files changed

+68
-37
lines changed

pandas/core/frame.py

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7173,35 +7173,49 @@ def sort_values(
71737173
`natural sorting <https://en.wikipedia.org/wiki/Natural_sort_order>`__.
71747174
This can be done using
71757175
``natsort`` `package <https://github.com/SethMMorton/natsort>`__,
7176-
which provides sorted indices according
7177-
to their natural order, as shown below:
7176+
which provides a function to generate a key
7177+
to sort data in their natural order:
71787178
71797179
>>> df = pd.DataFrame(
71807180
... {
7181-
... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
7182-
... "value": [10, 20, 30, 40, 50],
7181+
... "hours": ["0hr", "128hr", "0hr", "64hr", "64hr", "128hr"],
7182+
... "mins": [
7183+
... "10mins", "40mins", "40mins", "40mins", "10mins", "10mins"
7184+
... ],
7185+
... "value": [10, 20, 30, 40, 50, 60],
71837186
... }
71847187
... )
71857188
>>> df
7186-
time value
7187-
0 0hr 10
7188-
1 128hr 20
7189-
2 72hr 30
7190-
3 48hr 40
7191-
4 96hr 50
7192-
>>> from natsort import index_natsorted
7193-
>>> index_natsorted(df["time"])
7194-
[0, 3, 2, 4, 1]
7189+
hours mins value
7190+
0 0hr 10mins 10
7191+
1 128hr 40mins 20
7192+
2 0hr 40mins 30
7193+
3 64hr 40mins 40
7194+
4 64hr 10mins 50
7195+
5 128hr 10mins 60
7196+
>>> from natsort import natsort_keygen
7197+
>>> natsort_keygen()(df["hours"])
7198+
(('', 0, 'hr'), ('', 128, 'hr'), ('', 72, 'hr'), ('', 48, 'hr'), ('', 96, 'hr'))
7199+
>>> natsort_keygen()(df["mins"])
7200+
(
7201+
('', 10, 'mins'),
7202+
('', 40, 'mins'),
7203+
('', 40, 'mins'),
7204+
('', 40, 'mins'),
7205+
('', 10, 'mins'),
7206+
('', 10, 'mins'),
7207+
)
71957208
>>> df.sort_values(
7196-
... by="time",
7197-
... key=lambda x: np.argsort(index_natsorted(x)),
7209+
... by=["hours", "mins"],
7210+
... key=natsort_keygen(),
71987211
... )
7199-
time value
7200-
0 0hr 10
7201-
3 48hr 40
7202-
2 72hr 30
7203-
4 96hr 50
7204-
1 128hr 20
7212+
hours mins value
7213+
0 0hr 10mins 10
7214+
2 0hr 40mins 30
7215+
4 64hr 10mins 50
7216+
3 64hr 40mins 40
7217+
5 128hr 10mins 60
7218+
1 128hr 40mins 20
72057219
"""
72067220
inplace = validate_bool_kwarg(inplace, "inplace")
72077221
axis = self._get_axis_number(axis)

pandas/core/generic.py

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5004,27 +5004,44 @@ def sort_values(
50045004
50055005
>>> df = pd.DataFrame(
50065006
... {
5007-
... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
5008-
... "value": [10, 20, 30, 40, 50],
5007+
... "hours": ["0hr", "128hr", "0hr", "64hr", "64hr", "128hr"],
5008+
... "mins": [
5009+
... "10mins", "40mins", "40mins", "40mins", "10mins", "10mins"
5010+
... ],
5011+
... "value": [10, 20, 30, 40, 50, 60],
50095012
... }
50105013
... )
50115014
>>> df
5012-
time value
5013-
0 0hr 10
5014-
1 128hr 20
5015-
2 72hr 30
5016-
3 48hr 40
5017-
4 96hr 50
5018-
>>> from natsort import index_natsorted
5015+
hours mins value
5016+
0 0hr 10mins 10
5017+
1 128hr 40mins 20
5018+
2 0hr 40mins 30
5019+
3 64hr 40mins 40
5020+
4 64hr 10mins 50
5021+
5 128hr 10mins 60
5022+
>>> from natsort import natsort_keygen
5023+
>>> natsort_keygen()(df["hours"])
5024+
(('', 0, 'hr'), ('', 128, 'hr'), ('', 72, 'hr'), ('', 48, 'hr'), ('', 96, 'hr'))
5025+
>>> natsort_keygen()(df["mins"])
5026+
(
5027+
('', 10, 'mins'),
5028+
('', 40, 'mins'),
5029+
('', 40, 'mins'),
5030+
('', 40, 'mins'),
5031+
('', 10, 'mins'),
5032+
('', 10, 'mins'),
5033+
)
50195034
>>> df.sort_values(
5020-
... by="time", key=lambda x: np.argsort(index_natsorted(df["time"]))
5035+
... by=["hours", "mins"],
5036+
... key=natsort_keygen(),
50215037
... )
5022-
time value
5023-
0 0hr 10
5024-
3 48hr 40
5025-
2 72hr 30
5026-
4 96hr 50
5027-
1 128hr 20
5038+
hours mins value
5039+
0 0hr 10mins 10
5040+
2 0hr 40mins 30
5041+
4 64hr 10mins 50
5042+
3 64hr 40mins 40
5043+
5 128hr 10mins 60
5044+
1 128hr 40mins 20
50285045
"""
50295046
raise AbstractMethodError(self)
50305047

0 commit comments

Comments
 (0)