@@ -227,6 +227,22 @@ def fast(group):
227227 tm .assert_frame_equal (fast_df , slow_df )
228228
229229
230+ def test_apply_fast_slow_identical_index ():
231+ # GH#44803
232+ df = DataFrame (
233+ {
234+ "name" : ["Alice" , "Bob" , "Carl" ],
235+ "age" : [20 , 21 , 20 ],
236+ }
237+ ).set_index ("name" )
238+
239+ grp_by_same_value = df .groupby (["age" ], group_keys = False ).apply (lambda group : group )
240+ grp_by_copy = df .groupby (["age" ], group_keys = False ).apply (
241+ lambda group : group .copy ()
242+ )
243+ tm .assert_frame_equal (grp_by_same_value , grp_by_copy )
244+
245+
230246@pytest .mark .parametrize (
231247 "func" ,
232248 [
@@ -255,19 +271,19 @@ def test_apply_with_mixed_dtype():
255271 "foo2" : ["one" , "two" , "two" , "three" , "one" , "two" ],
256272 }
257273 )
258- result = df .apply (lambda x : x , axis = 1 ). dtypes
259- expected = df . dtypes
260- tm .assert_series_equal (result , expected )
274+ result = df .apply (lambda x : x , axis = 1 )
275+ expected = df
276+ tm .assert_frame_equal (result , expected )
261277
262278 # GH 3610 incorrect dtype conversion with as_index=False
263279 df = DataFrame ({"c1" : [1 , 2 , 6 , 6 , 8 ]})
264280 df ["c2" ] = df .c1 / 2.0
265- result1 = df .groupby ("c2" ).mean ().reset_index (). c2
266- result2 = df .groupby ("c2" , as_index = False ).mean (). c2
267- tm .assert_series_equal (result1 , result2 )
281+ result1 = df .groupby ("c2" ).mean ().reset_index ()
282+ result2 = df .groupby ("c2" , as_index = False ).mean ()
283+ tm .assert_frame_equal (result1 , result2 )
268284
269285
270- def test_groupby_as_index_apply ():
286+ def test_groupby_as_index_apply (as_index ):
271287 # GH #4648 and #3417
272288 df = DataFrame (
273289 {
@@ -276,27 +292,35 @@ def test_groupby_as_index_apply():
276292 "time" : range (6 ),
277293 }
278294 )
295+ gb = df .groupby ("user_id" , as_index = as_index )
279296
280- g_as = df .groupby ("user_id" , as_index = True )
281- g_not_as = df .groupby ("user_id" , as_index = False )
282-
283- res_as = g_as .head (2 ).index
284- res_not_as = g_not_as .head (2 ).index
285- exp = Index ([0 , 1 , 2 , 4 ])
286- tm .assert_index_equal (res_as , exp )
287- tm .assert_index_equal (res_not_as , exp )
288-
289- res_as_apply = g_as .apply (lambda x : x .head (2 )).index
290- res_not_as_apply = g_not_as .apply (lambda x : x .head (2 )).index
297+ expected = DataFrame (
298+ {
299+ "item_id" : ["b" , "b" , "a" , "a" ],
300+ "user_id" : [1 , 2 , 1 , 3 ],
301+ "time" : [0 , 1 , 2 , 4 ],
302+ },
303+ index = [0 , 1 , 2 , 4 ],
304+ )
305+ result = gb .head (2 )
306+ tm .assert_frame_equal (result , expected )
291307
292308 # apply doesn't maintain the original ordering
293309 # changed in GH5610 as the as_index=False returns a MI here
294- exp_not_as_apply = Index ([0 , 2 , 1 , 4 ])
295- tp = [(1 , 0 ), (1 , 2 ), (2 , 1 ), (3 , 4 )]
296- exp_as_apply = MultiIndex .from_tuples (tp , names = ["user_id" , None ])
297-
298- tm .assert_index_equal (res_as_apply , exp_as_apply )
299- tm .assert_index_equal (res_not_as_apply , exp_not_as_apply )
310+ if as_index :
311+ tp = [(1 , 0 ), (1 , 2 ), (2 , 1 ), (3 , 4 )]
312+ index = MultiIndex .from_tuples (tp , names = ["user_id" , None ])
313+ else :
314+ index = Index ([0 , 2 , 1 , 4 ])
315+ expected = DataFrame (
316+ {
317+ "item_id" : list ("baba" ),
318+ "time" : [0 , 2 , 1 , 4 ],
319+ },
320+ index = index ,
321+ )
322+ result = gb .apply (lambda x : x .head (2 ))
323+ tm .assert_frame_equal (result , expected )
300324
301325
302326def test_groupby_as_index_apply_str ():
@@ -1455,3 +1479,37 @@ def f_4(grp):
14551479 e .loc ["Pony" ] = np .nan
14561480 e .name = None
14571481 tm .assert_series_equal (result , e )
1482+
1483+
1484+ def test_nonreducer_nonstransform ():
1485+ # GH3380, GH60619
1486+ # Was originally testing mutating in a UDF; now kept as an example
1487+ # of using apply with a nonreducer and nontransformer.
1488+ df = DataFrame (
1489+ {
1490+ "cat1" : ["a" ] * 8 + ["b" ] * 6 ,
1491+ "cat2" : ["c" ] * 2
1492+ + ["d" ] * 2
1493+ + ["e" ] * 2
1494+ + ["f" ] * 2
1495+ + ["c" ] * 2
1496+ + ["d" ] * 2
1497+ + ["e" ] * 2 ,
1498+ "val" : np .random .default_rng (2 ).integers (100 , size = 14 ),
1499+ }
1500+ )
1501+
1502+ def f (x ):
1503+ x = x .copy ()
1504+ x ["rank" ] = x .val .rank (method = "min" )
1505+ return x .groupby ("cat2" )["rank" ].min ()
1506+
1507+ expected = DataFrame (
1508+ {
1509+ "cat1" : list ("aaaabbb" ),
1510+ "cat2" : list ("cdefcde" ),
1511+ "rank" : [3.0 , 2.0 , 5.0 , 1.0 , 2.0 , 4.0 , 1.0 ],
1512+ }
1513+ ).set_index (["cat1" , "cat2" ])["rank" ]
1514+ result = df .groupby ("cat1" ).apply (f )
1515+ tm .assert_series_equal (result , expected )
0 commit comments