1
1
"""
2
- Note: for naming purposes, most tests are title with as e.g. "test_nlargest_foo"
3
- but are implicitly also testing nsmallest_foo.
2
+ Note: for naming purposes, most test method titles include "nsorted"
3
+ (e.g., "test_nlargest_foo") but are implicitly also testing "nsmallest" and
4
+ "nlargest".
4
5
"""
5
6
6
7
from string import ascii_lowercase
@@ -41,11 +42,11 @@ def df_main_dtypes():
41
42
)
42
43
43
44
44
- class TestNLargestNSmallest :
45
+ class TestNSorted :
45
46
# ----------------------------------------------------------------------
46
47
# Top / bottom
47
48
@pytest .mark .parametrize (
48
- "order " ,
49
+ "columns " ,
49
50
[
50
51
["a" ],
51
52
["c" ],
@@ -63,7 +64,7 @@ class TestNLargestNSmallest:
63
64
],
64
65
)
65
66
@pytest .mark .parametrize ("n" , range (1 , 11 ))
66
- def test_nlargest_n (self , nselect_method , n , order ):
67
+ def test_nsorted_n (self , nselect_method , n : int , columns ):
67
68
# GH#10393
68
69
df = pd .DataFrame (
69
70
{
@@ -72,24 +73,24 @@ def test_nlargest_n(self, nselect_method, n, order):
72
73
"c" : np .random .default_rng (2 ).permutation (10 ).astype ("float64" ),
73
74
}
74
75
)
75
- if "b" in order :
76
+ if "b" in columns :
76
77
error_msg = (
77
78
f"Column 'b' has dtype (object|str), "
78
79
f"cannot use method '{ nselect_method } ' with this dtype"
79
80
)
80
81
with pytest .raises (TypeError , match = error_msg ):
81
- getattr (df , nselect_method )(n , order )
82
+ getattr (df , nselect_method )(n , columns )
82
83
else :
83
84
ascending = nselect_method == "nsmallest"
84
- result = getattr (df , nselect_method )(n , order )
85
+ result = getattr (df , nselect_method )(n , columns )
85
86
result .index = pd .Index (list (result .index ))
86
- expected = df .sort_values (order , ascending = ascending ).head (n )
87
+ expected = df .sort_values (columns , ascending = ascending ).head (n )
87
88
tm .assert_frame_equal (result , expected )
88
89
89
90
@pytest .mark .parametrize (
90
91
"columns" , [["group" , "category_string" ], ["group" , "string" ]]
91
92
)
92
- def test_nlargest_error (self , df_main_dtypes , nselect_method , columns ):
93
+ def test_nsorted_error (self , df_main_dtypes , nselect_method , columns ):
93
94
df = df_main_dtypes
94
95
col = columns [1 ]
95
96
error_msg = (
@@ -106,12 +107,12 @@ def test_nlargest_error(self, df_main_dtypes, nselect_method, columns):
106
107
with pytest .raises (TypeError , match = error_msg ):
107
108
getattr (df , nselect_method )(2 , columns )
108
109
109
- def test_nlargest_all_dtypes (self , df_main_dtypes ):
110
+ def test_nsorted_all_dtypes (self , df_main_dtypes ):
110
111
df = df_main_dtypes
111
112
df .nsmallest (2 , list (set (df ) - {"category_string" , "string" }))
112
113
df .nlargest (2 , list (set (df ) - {"category_string" , "string" }))
113
114
114
- def test_nlargest_duplicates_on_starter_columns (self ):
115
+ def test_nsorted_duplicates_on_starter_columns (self ):
115
116
# regression test for GH#22752
116
117
117
118
df = pd .DataFrame ({"a" : [2 , 2 , 2 , 1 , 1 , 1 ], "b" : [1 , 2 , 3 , 3 , 2 , 1 ]})
@@ -128,7 +129,7 @@ def test_nlargest_duplicates_on_starter_columns(self):
128
129
)
129
130
tm .assert_frame_equal (result , expected )
130
131
131
- def test_nlargest_n_identical_values (self ):
132
+ def test_nsorted_n_identical_values (self ):
132
133
# GH#15297
133
134
df = pd .DataFrame ({"a" : [1 ] * 5 , "b" : [1 , 2 , 3 , 4 , 5 ]})
134
135
@@ -141,25 +142,26 @@ def test_nlargest_n_identical_values(self):
141
142
tm .assert_frame_equal (result , expected )
142
143
143
144
@pytest .mark .parametrize (
144
- "order " ,
145
+ "columns " ,
145
146
[["a" , "b" , "c" ], ["c" , "b" , "a" ], ["a" ], ["b" ], ["a" , "b" ], ["c" , "b" ]],
146
147
)
147
148
@pytest .mark .parametrize ("n" , range (1 , 6 ))
148
- def test_nlargest_n_duplicate_index (self , n , order , request ):
149
+ def test_nsorted_n_duplicate_index (self , n : int , columns , request ):
149
150
# GH#13412
150
151
151
152
df = pd .DataFrame (
152
153
{"a" : [1 , 2 , 3 , 4 , 4 ], "b" : [1 , 1 , 1 , 1 , 1 ], "c" : [0 , 1 , 2 , 5 , 4 ]},
153
154
index = [0 , 0 , 1 , 1 , 1 ],
154
155
)
155
- result = df .nsmallest (n , order )
156
- expected = df .sort_values (order , kind = "stable" ).head (n )
156
+ result = df .nsmallest (n , columns )
157
+ expected = df .sort_values (columns , kind = "stable" ).head (n )
157
158
tm .assert_frame_equal (result , expected )
158
159
159
- result = df .nlargest (n , order )
160
- expected = df .sort_values (order , ascending = False , kind = "stable" ).head (n )
160
+ result = df .nlargest (n , columns )
161
+ expected = df .sort_values (columns , ascending = False , kind = "stable" ).head (n )
161
162
if Version (np .__version__ ) >= Version ("1.25" ) and (
162
- (order == ["a" ] and n in (1 , 2 , 3 , 4 )) or ((order == ["a" , "b" ]) and n == 5 )
163
+ (columns == ["a" ] and n in (1 , 2 , 3 , 4 ))
164
+ or ((columns == ["a" , "b" ]) and n == 5 )
163
165
):
164
166
request .applymarker (
165
167
pytest .mark .xfail (
@@ -172,7 +174,7 @@ def test_nlargest_n_duplicate_index(self, n, order, request):
172
174
)
173
175
tm .assert_frame_equal (result , expected )
174
176
175
- def test_nlargest_duplicate_keep_all_ties (self ):
177
+ def test_nsorted_duplicate_keep_all_ties (self ):
176
178
# GH#16818
177
179
df = pd .DataFrame (
178
180
{"a" : [5 , 4 , 4 , 2 , 3 , 3 , 3 , 3 ], "b" : [10 , 9 , 8 , 7 , 5 , 50 , 10 , 20 ]}
@@ -197,7 +199,7 @@ def test_nlargest_duplicate_keep_all_ties(self):
197
199
)
198
200
tm .assert_frame_equal (result , expected )
199
201
200
- def test_nlargest_multiindex_column_lookup (self ):
202
+ def test_nsorted_multiindex_column_lookup (self ):
201
203
# Check whether tuples are correctly treated as multi-level lookups.
202
204
# GH#23033
203
205
df = pd .DataFrame (
0 commit comments