@@ -7447,6 +7447,160 @@ def value_counts(
7447
7447
7448
7448
return counts
7449
7449
7450
+ def nsorted (
7451
+ self ,
7452
+ n : int ,
7453
+ columns : IndexLabel ,
7454
+ ascending : bool | Sequence [bool ],
7455
+ keep : NsmallestNlargestKeep = "first" ,
7456
+ ) -> DataFrame :
7457
+ """
7458
+ Return the first `n` rows ordered by `columns` in the order defined by
7459
+ `ascending`.
7460
+
7461
+ The columns that are not specified are returned as
7462
+ well, but not used for ordering.
7463
+
7464
+ This method is equivalent to
7465
+ ``df.sort_values(columns, ascending=ascending).head(n)``, but more
7466
+ performant.
7467
+
7468
+ Parameters
7469
+ ----------
7470
+ n : int
7471
+ Number of rows to return.
7472
+ columns : label or list of labels
7473
+ Column label(s) to order by.
7474
+ ascending : bool or list of bools
7475
+ Whether to sort in ascending or descending order.
7476
+ If a list, must be the same length as `columns`.
7477
+ keep : {'first', 'last', 'all'}, default 'first'
7478
+ Where there are duplicate values:
7479
+
7480
+ - ``first`` : prioritize the first occurrence(s)
7481
+ - ``last`` : prioritize the last occurrence(s)
7482
+ - ``all`` : keep all the ties of the smallest item even if it means
7483
+ selecting more than ``n`` items.
7484
+
7485
+ Returns
7486
+ -------
7487
+ DataFrame
7488
+ The first `n` rows ordered by the given columns in the order given
7489
+ in `ascending`.
7490
+
7491
+ See Also
7492
+ --------
7493
+ DataFrame.nlargest : Return the first `n` rows ordered by `columns` in
7494
+ descending order.
7495
+ DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in
7496
+ ascending order.
7497
+ DataFrame.sort_values : Sort DataFrame by the values.
7498
+ DataFrame.head : Return the first `n` rows without re-ordering.
7499
+
7500
+ Notes
7501
+ -----
7502
+ This function cannot be used with all column types. For example, when
7503
+ specifying columns with `object` or `category` dtypes, ``TypeError`` is
7504
+ raised.
7505
+
7506
+ Examples
7507
+ --------
7508
+ >>> df = pd.DataFrame(
7509
+ ... {
7510
+ ... "population": [
7511
+ ... 59000000,
7512
+ ... 65000000,
7513
+ ... 434000,
7514
+ ... 434000,
7515
+ ... 434000,
7516
+ ... 337000,
7517
+ ... 11300,
7518
+ ... 11300,
7519
+ ... 11300,
7520
+ ... ],
7521
+ ... "GDP": [1937894, 2583560, 12011, 4520, 12128, 17036, 182, 38, 311],
7522
+ ... "alpha-2": ["IT", "FR", "MT", "MV", "BN", "IS", "NR", "TV", "AI"],
7523
+ ... },
7524
+ ... index=[
7525
+ ... "Italy",
7526
+ ... "France",
7527
+ ... "Malta",
7528
+ ... "Maldives",
7529
+ ... "Brunei",
7530
+ ... "Iceland",
7531
+ ... "Nauru",
7532
+ ... "Tuvalu",
7533
+ ... "Anguilla",
7534
+ ... ],
7535
+ ... )
7536
+ >>> df
7537
+ population GDP alpha-2
7538
+ Italy 59000000 1937894 IT
7539
+ France 65000000 2583560 FR
7540
+ Malta 434000 12011 MT
7541
+ Maldives 434000 4520 MV
7542
+ Brunei 434000 12128 BN
7543
+ Iceland 337000 17036 IS
7544
+ Nauru 11300 182 NR
7545
+ Tuvalu 11300 38 TV
7546
+ Anguilla 11300 311 AI
7547
+
7548
+ In the following example, we will use ``nsorted`` to select the three
7549
+ rows having the largest values in column "population".
7550
+
7551
+ >>> df.nsorted(3, "population", ascending=False)
7552
+ population GDP alpha-2
7553
+ France 65000000 2583560 FR
7554
+ Italy 59000000 1937894 IT
7555
+ Malta 434000 12011 MT
7556
+
7557
+ When using ``keep='last'``, ties are resolved in reverse order:
7558
+
7559
+ >>> df.nsorted(3, "population", ascending=False, keep="last")
7560
+ population GDP alpha-2
7561
+ France 65000000 2583560 FR
7562
+ Italy 59000000 1937894 IT
7563
+ Brunei 434000 12128 BN
7564
+
7565
+ When using ``keep='all'``, the number of elements kept can go beyond ``n``
7566
+ if there are duplicate values for the smallest element. All the
7567
+ ties are kept:
7568
+
7569
+ >>> df.nsorted(3, "population", ascending=False, keep="all")
7570
+ population GDP alpha-2
7571
+ France 65000000 2583560 FR
7572
+ Italy 59000000 1937894 IT
7573
+ Malta 434000 12011 MT
7574
+ Maldives 434000 4520 MV
7575
+ Brunei 434000 12128 BN
7576
+
7577
+ However, ``nsorted`` does not keep ``n`` distinct largest elements:
7578
+
7579
+ >>> df.nsorted(5, "population", ascending=False, keep="all")
7580
+ population GDP alpha-2
7581
+ France 65000000 2583560 FR
7582
+ Italy 59000000 1937894 IT
7583
+ Malta 434000 12011 MT
7584
+ Maldives 434000 4520 MV
7585
+ Brunei 434000 12128 BN
7586
+
7587
+ To order by the largest values in column "population" and break ties
7588
+ according to the smallest values in column "GDP", we can specify
7589
+ multiple columns and ascending orders like in the next example.
7590
+
7591
+ >>> df.nsorted(3, ["population", "GDP"], ascending=[False, True])
7592
+ population GDP alpha-2
7593
+ France 65000000 2583560 FR
7594
+ Italy 59000000 1937894 IT
7595
+ Maldives 434000 4520 MV
7596
+ """
7597
+ return selectn .SelectNFrame (
7598
+ self ,
7599
+ n = n ,
7600
+ keep = keep ,
7601
+ columns = columns ,
7602
+ ).nsorted (ascending = ascending )
7603
+
7450
7604
def nlargest (
7451
7605
self , n : int , columns : IndexLabel , keep : NsmallestNlargestKeep = "first"
7452
7606
) -> DataFrame :
@@ -7457,6 +7611,9 @@ def nlargest(
7457
7611
descending order. The columns that are not specified are returned as
7458
7612
well, but not used for ordering.
7459
7613
7614
+ This method is equivalent to
7615
+ ``df.nsorted(n, columns, ascending=False)``.
7616
+
7460
7617
This method is equivalent to
7461
7618
``df.sort_values(columns, ascending=False).head(n)``, but more
7462
7619
performant.
@@ -7485,6 +7642,8 @@ def nlargest(
7485
7642
--------
7486
7643
DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in
7487
7644
ascending order.
7645
+ DataFrame.nsorted : Return the first `n` rows ordered by `columns` in
7646
+ the order given in `ascending`.
7488
7647
DataFrame.sort_values : Sort DataFrame by the values.
7489
7648
DataFrame.head : Return the first `n` rows without re-ordering.
7490
7649
@@ -7553,7 +7712,7 @@ def nlargest(
7553
7712
Italy 59000000 1937894 IT
7554
7713
Brunei 434000 12128 BN
7555
7714
7556
- When using ``keep='all'``, the number of element kept can go beyond ``n``
7715
+ When using ``keep='all'``, the number of elements kept can go beyond ``n``
7557
7716
if there are duplicate values for the smallest element, all the
7558
7717
ties are kept:
7559
7718
@@ -7584,7 +7743,7 @@ def nlargest(
7584
7743
Italy 59000000 1937894 IT
7585
7744
Brunei 434000 12128 BN
7586
7745
"""
7587
- return selectn . SelectNFrame ( self , n = n , keep = keep , columns = columns ). nlargest ( )
7746
+ return self . nsorted ( n = n , columns = columns , ascending = False , keep = keep )
7588
7747
7589
7748
def nsmallest (
7590
7749
self , n : int , columns : IndexLabel , keep : NsmallestNlargestKeep = "first"
@@ -7596,6 +7755,9 @@ def nsmallest(
7596
7755
ascending order. The columns that are not specified are returned as
7597
7756
well, but not used for ordering.
7598
7757
7758
+ This method is equivalent to
7759
+ ``df.nsorted(n, columns, ascending=True)``.
7760
+
7599
7761
This method is equivalent to
7600
7762
``df.sort_values(columns, ascending=True).head(n)``, but more
7601
7763
performant.
@@ -7623,6 +7785,8 @@ def nsmallest(
7623
7785
--------
7624
7786
DataFrame.nlargest : Return the first `n` rows ordered by `columns` in
7625
7787
descending order.
7788
+ DataFrame.nsorted : Return the first `n` rows ordered by `columns` in
7789
+ the order given in `ascending`.
7626
7790
DataFrame.sort_values : Sort DataFrame by the values.
7627
7791
DataFrame.head : Return the first `n` rows without re-ordering.
7628
7792
@@ -7715,7 +7879,7 @@ def nsmallest(
7715
7879
Anguilla 11300 311 AI
7716
7880
Nauru 337000 182 NR
7717
7881
"""
7718
- return selectn . SelectNFrame ( self , n = n , keep = keep , columns = columns ). nsmallest ( )
7882
+ return self . nsorted ( n = n , columns = columns , ascending = True , keep = keep )
7719
7883
7720
7884
def swaplevel (self , i : Axis = - 2 , j : Axis = - 1 , axis : Axis = 0 ) -> DataFrame :
7721
7885
"""
0 commit comments