@@ -7447,6 +7447,160 @@ def value_counts(
74477447
74487448 return counts
74497449
7450+ def nsorted (
7451+ self ,
7452+ n : int ,
7453+ columns : IndexLabel ,
7454+ ascending : bool | Sequence [bool ],
7455+ keep : NsmallestNlargestKeep = "first" ,
7456+ ) -> DataFrame :
7457+ """
7458+ Return the first `n` rows ordered by `columns` in the order defined by
7459+ `ascending`.
7460+
7461+ The columns that are not specified are returned as
7462+ well, but not used for ordering.
7463+
7464+ This method is equivalent to
7465+ ``df.sort_values(columns, ascending=ascending).head(n)``, but more
7466+ performant.
7467+
7468+ Parameters
7469+ ----------
7470+ n : int
7471+ Number of rows to return.
7472+ columns : label or list of labels
7473+ Column label(s) to order by.
7474+ ascending : bool or list of bools
7475+ Whether to sort in ascending or descending order.
7476+ If a list, must be the same length as `columns`.
7477+ keep : {'first', 'last', 'all'}, default 'first'
7478+ Where there are duplicate values:
7479+
7480+ - ``first`` : prioritize the first occurrence(s)
7481+ - ``last`` : prioritize the last occurrence(s)
7482+ - ``all`` : keep all the ties of the smallest item even if it means
7483+ selecting more than ``n`` items.
7484+
7485+ Returns
7486+ -------
7487+ DataFrame
7488+ The first `n` rows ordered by the given columns in the order given
7489+ in `ascending`.
7490+
7491+ See Also
7492+ --------
7493+ DataFrame.nlargest : Return the first `n` rows ordered by `columns` in
7494+ descending order.
7495+ DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in
7496+ ascending order.
7497+ DataFrame.sort_values : Sort DataFrame by the values.
7498+ DataFrame.head : Return the first `n` rows without re-ordering.
7499+
7500+ Notes
7501+ -----
7502+ This function cannot be used with all column types. For example, when
7503+ specifying columns with `object` or `category` dtypes, ``TypeError`` is
7504+ raised.
7505+
7506+ Examples
7507+ --------
7508+ >>> df = pd.DataFrame(
7509+ ... {
7510+ ... "population": [
7511+ ... 59000000,
7512+ ... 65000000,
7513+ ... 434000,
7514+ ... 434000,
7515+ ... 434000,
7516+ ... 337000,
7517+ ... 11300,
7518+ ... 11300,
7519+ ... 11300,
7520+ ... ],
7521+ ... "GDP": [1937894, 2583560, 12011, 4520, 12128, 17036, 182, 38, 311],
7522+ ... "alpha-2": ["IT", "FR", "MT", "MV", "BN", "IS", "NR", "TV", "AI"],
7523+ ... },
7524+ ... index=[
7525+ ... "Italy",
7526+ ... "France",
7527+ ... "Malta",
7528+ ... "Maldives",
7529+ ... "Brunei",
7530+ ... "Iceland",
7531+ ... "Nauru",
7532+ ... "Tuvalu",
7533+ ... "Anguilla",
7534+ ... ],
7535+ ... )
7536+ >>> df
7537+ population GDP alpha-2
7538+ Italy 59000000 1937894 IT
7539+ France 65000000 2583560 FR
7540+ Malta 434000 12011 MT
7541+ Maldives 434000 4520 MV
7542+ Brunei 434000 12128 BN
7543+ Iceland 337000 17036 IS
7544+ Nauru 11300 182 NR
7545+ Tuvalu 11300 38 TV
7546+ Anguilla 11300 311 AI
7547+
7548+ In the following example, we will use ``nsorted`` to select the three
7549+ rows having the largest values in column "population".
7550+
7551+ >>> df.nsorted(3, "population", ascending=False)
7552+ population GDP alpha-2
7553+ France 65000000 2583560 FR
7554+ Italy 59000000 1937894 IT
7555+ Malta 434000 12011 MT
7556+
7557+ When using ``keep='last'``, ties are resolved in reverse order:
7558+
7559+ >>> df.nsorted(3, "population", ascending=False, keep="last")
7560+ population GDP alpha-2
7561+ France 65000000 2583560 FR
7562+ Italy 59000000 1937894 IT
7563+ Brunei 434000 12128 BN
7564+
7565+ When using ``keep='all'``, the number of elements kept can go beyond ``n``
7566+ if there are duplicate values for the smallest element. All the
7567+ ties are kept:
7568+
7569+ >>> df.nsorted(3, "population", ascending=False, keep="all")
7570+ population GDP alpha-2
7571+ France 65000000 2583560 FR
7572+ Italy 59000000 1937894 IT
7573+ Malta 434000 12011 MT
7574+ Maldives 434000 4520 MV
7575+ Brunei 434000 12128 BN
7576+
7577+ However, ``nsorted`` does not keep ``n`` distinct largest elements:
7578+
7579+ >>> df.nsorted(5, "population", ascending=False, keep="all")
7580+ population GDP alpha-2
7581+ France 65000000 2583560 FR
7582+ Italy 59000000 1937894 IT
7583+ Malta 434000 12011 MT
7584+ Maldives 434000 4520 MV
7585+ Brunei 434000 12128 BN
7586+
7587+ To order by the largest values in column "population" and break ties
7588+ according to the smallest values in column "GDP", we can specify
7589+ multiple columns and ascending orders like in the next example.
7590+
7591+ >>> df.nsorted(3, ["population", "GDP"], ascending=[False, True])
7592+ population GDP alpha-2
7593+ France 65000000 2583560 FR
7594+ Italy 59000000 1937894 IT
7595+ Maldives 434000 4520 MV
7596+ """
7597+ return selectn .SelectNFrame (
7598+ self ,
7599+ n = n ,
7600+ keep = keep ,
7601+ columns = columns ,
7602+ ).nsorted (ascending = ascending )
7603+
74507604 def nlargest (
74517605 self , n : int , columns : IndexLabel , keep : NsmallestNlargestKeep = "first"
74527606 ) -> DataFrame :
@@ -7457,6 +7611,9 @@ def nlargest(
74577611 descending order. The columns that are not specified are returned as
74587612 well, but not used for ordering.
74597613
7614+ This method is equivalent to
7615+ ``df.nsorted(n, columns, ascending=False)``.
7616+
74607617 This method is equivalent to
74617618 ``df.sort_values(columns, ascending=False).head(n)``, but more
74627619 performant.
@@ -7485,6 +7642,8 @@ def nlargest(
74857642 --------
74867643 DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in
74877644 ascending order.
7645+ DataFrame.nsorted : Return the first `n` rows ordered by `columns` in
7646+ the order given in `ascending`.
74887647 DataFrame.sort_values : Sort DataFrame by the values.
74897648 DataFrame.head : Return the first `n` rows without re-ordering.
74907649
@@ -7553,7 +7712,7 @@ def nlargest(
75537712 Italy 59000000 1937894 IT
75547713 Brunei 434000 12128 BN
75557714
7556- When using ``keep='all'``, the number of element kept can go beyond ``n``
7715+ When using ``keep='all'``, the number of elements kept can go beyond ``n``
75577716 if there are duplicate values for the smallest element, all the
75587717 ties are kept:
75597718
@@ -7584,7 +7743,7 @@ def nlargest(
75847743 Italy 59000000 1937894 IT
75857744 Brunei 434000 12128 BN
75867745 """
7587- return selectn . SelectNFrame ( self , n = n , keep = keep , columns = columns ). nlargest ( )
7746+ return self . nsorted ( n = n , columns = columns , ascending = False , keep = keep )
75887747
75897748 def nsmallest (
75907749 self , n : int , columns : IndexLabel , keep : NsmallestNlargestKeep = "first"
@@ -7596,6 +7755,9 @@ def nsmallest(
75967755 ascending order. The columns that are not specified are returned as
75977756 well, but not used for ordering.
75987757
7758+ This method is equivalent to
7759+ ``df.nsorted(n, columns, ascending=True)``.
7760+
75997761 This method is equivalent to
76007762 ``df.sort_values(columns, ascending=True).head(n)``, but more
76017763 performant.
@@ -7623,6 +7785,8 @@ def nsmallest(
76237785 --------
76247786 DataFrame.nlargest : Return the first `n` rows ordered by `columns` in
76257787 descending order.
7788+ DataFrame.nsorted : Return the first `n` rows ordered by `columns` in
7789+ the order given in `ascending`.
76267790 DataFrame.sort_values : Sort DataFrame by the values.
76277791 DataFrame.head : Return the first `n` rows without re-ordering.
76287792
@@ -7715,7 +7879,7 @@ def nsmallest(
77157879 Anguilla 11300 311 AI
77167880 Nauru 337000 182 NR
77177881 """
7718- return selectn . SelectNFrame ( self , n = n , keep = keep , columns = columns ). nsmallest ( )
7882+ return self . nsorted ( n = n , columns = columns , ascending = True , keep = keep )
77197883
77207884 def swaplevel (self , i : Axis = - 2 , j : Axis = - 1 , axis : Axis = 0 ) -> DataFrame :
77217885 """
0 commit comments