@@ -480,7 +480,7 @@ def test_frame_multi_key_function_list_partial_failure(using_infer_string):
480480 funcs = ["mean" , "std" ]
481481 msg = re .escape ("agg function failed [how->mean,dtype->" )
482482 if using_infer_string :
483- msg = "str dtype does not support mean operations "
483+ msg = "dtype 'str' does not support operation 'mean' "
484484 with pytest .raises (TypeError , match = msg ):
485485 grouped .agg (funcs )
486486
@@ -578,6 +578,7 @@ def test_ops_not_as_index(reduction_func):
578578
579579
580580def test_as_index_series_return_frame (df ):
581+ df = df .astype ({"A" : object , "B" : object })
581582 grouped = df .groupby ("A" , as_index = False )
582583 grouped2 = df .groupby (["A" , "B" ], as_index = False )
583584
@@ -671,7 +672,7 @@ def test_raises_on_nuisance(df, using_infer_string):
671672 grouped = df .groupby ("A" )
672673 msg = re .escape ("agg function failed [how->mean,dtype->" )
673674 if using_infer_string :
674- msg = "str dtype does not support mean operations "
675+ msg = "dtype 'str' does not support operation 'mean' "
675676 with pytest .raises (TypeError , match = msg ):
676677 grouped .agg ("mean" )
677678 with pytest .raises (TypeError , match = msg ):
@@ -717,7 +718,7 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
717718 # Added numeric_only as part of GH#46560; these do not drop nuisance
718719 # columns when numeric_only is False
719720 if using_infer_string :
720- msg = f"str dtype does not support { agg_function } operations "
721+ msg = f"dtype 'str' does not support operation ' { agg_function } ' "
721722 klass = TypeError
722723 elif agg_function in ("std" , "sem" ):
723724 klass = ValueError
@@ -740,18 +741,24 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
740741 tm .assert_frame_equal (result , expected )
741742
742743
743- def test_raise_on_nuisance_python_single (df ):
744+ def test_raise_on_nuisance_python_single (df , using_infer_string ):
744745 # GH 38815
745746 grouped = df .groupby ("A" )
746- with pytest .raises (ValueError , match = "could not convert" ):
747+
748+ err = ValueError
749+ msg = "could not convert"
750+ if using_infer_string :
751+ err = TypeError
752+ msg = "dtype 'str' does not support operation 'skew'"
753+ with pytest .raises (err , match = msg ):
747754 grouped .skew ()
748755
749756
750757def test_raise_on_nuisance_python_multiple (three_group , using_infer_string ):
751758 grouped = three_group .groupby (["A" , "B" ])
752759 msg = re .escape ("agg function failed [how->mean,dtype->" )
753760 if using_infer_string :
754- msg = "str dtype does not support mean operations "
761+ msg = "dtype 'str' does not support operation 'mean' "
755762 with pytest .raises (TypeError , match = msg ):
756763 grouped .agg ("mean" )
757764 with pytest .raises (TypeError , match = msg ):
@@ -798,7 +805,7 @@ def test_wrap_aggregated_output_multindex(
798805 keys = [np .array ([0 , 0 , 1 ]), np .array ([0 , 0 , 1 ])]
799806 msg = re .escape ("agg function failed [how->mean,dtype->" )
800807 if using_infer_string :
801- msg = "str dtype does not support mean operations "
808+ msg = "dtype 'str' does not support operation 'mean' "
802809 with pytest .raises (TypeError , match = msg ):
803810 df .groupby (keys ).agg ("mean" )
804811 agged = df .drop (columns = ("baz" , "two" )).groupby (keys ).agg ("mean" )
@@ -976,10 +983,20 @@ def test_groupby_with_hier_columns():
976983 tm .assert_index_equal (result .columns , df .columns [:- 1 ])
977984
978985
979- def test_grouping_ndarray (df ):
986+ def test_grouping_ndarray (df , using_infer_string ):
980987 grouped = df .groupby (df ["A" ].values )
988+ grouped2 = df .groupby (df ["A" ].rename (None ))
989+
990+ if using_infer_string :
991+ msg = "dtype 'str' does not support operation 'sum'"
992+ with pytest .raises (TypeError , match = msg ):
993+ grouped .sum ()
994+ with pytest .raises (TypeError , match = msg ):
995+ grouped2 .sum ()
996+ return
997+
981998 result = grouped .sum ()
982- expected = df . groupby ( df [ "A" ]. rename ( None )) .sum ()
999+ expected = grouped2 .sum ()
9831000 tm .assert_frame_equal (result , expected )
9841001
9851002
@@ -1478,13 +1495,23 @@ def f(group):
14781495 assert names == expected_names
14791496
14801497
1481- def test_no_dummy_key_names (df ):
1498+ def test_no_dummy_key_names (df , using_infer_string ):
14821499 # see gh-1291
1483- result = df .groupby (df ["A" ].values ).sum ()
1500+ gb = df .groupby (df ["A" ].values )
1501+ gb2 = df .groupby ([df ["A" ].values , df ["B" ].values ])
1502+ if using_infer_string :
1503+ msg = "dtype 'str' does not support operation 'sum'"
1504+ with pytest .raises (TypeError , match = msg ):
1505+ gb .sum ()
1506+ with pytest .raises (TypeError , match = msg ):
1507+ gb2 .sum ()
1508+ return
1509+
1510+ result = gb .sum ()
14841511 assert result .index .name is None
14851512
1486- result = df . groupby ([ df [ "A" ]. values , df [ "B" ]. values ]) .sum ()
1487- assert result .index .names == (None , None )
1513+ result2 = gb2 .sum ()
1514+ assert result2 .index .names == (None , None )
14881515
14891516
14901517def test_groupby_sort_multiindex_series ():
@@ -1820,7 +1847,7 @@ def get_categorical_invalid_expected():
18201847 elif is_per :
18211848 msg = "Period type does not support"
18221849 elif is_str :
1823- msg = "str dtype does not support"
1850+ msg = f" dtype 'str' does not support operation ' { op } ' "
18241851 else :
18251852 msg = "category type does not support"
18261853 if op == "skew" :
@@ -2750,7 +2777,7 @@ def test_obj_with_exclusions_duplicate_columns():
27502777def test_groupby_numeric_only_std_no_result (numeric_only ):
27512778 # GH 51080
27522779 dicts_non_numeric = [{"a" : "foo" , "b" : "bar" }, {"a" : "car" , "b" : "dar" }]
2753- df = DataFrame (dicts_non_numeric )
2780+ df = DataFrame (dicts_non_numeric , dtype = object )
27542781 dfgb = df .groupby ("a" , as_index = False , sort = False )
27552782
27562783 if numeric_only :
@@ -2809,10 +2836,14 @@ def test_grouping_with_categorical_interval_columns():
28092836def test_groupby_sum_on_nan_should_return_nan (bug_var ):
28102837 # GH 24196
28112838 df = DataFrame ({"A" : [bug_var , bug_var , bug_var , np .nan ]})
2839+ if isinstance (bug_var , str ):
2840+ df = df .astype (object )
28122841 dfgb = df .groupby (lambda x : x )
28132842 result = dfgb .sum (min_count = 1 )
28142843
2815- expected_df = DataFrame ([bug_var , bug_var , bug_var , None ], columns = ["A" ])
2844+ expected_df = DataFrame (
2845+ [bug_var , bug_var , bug_var , None ], columns = ["A" ], dtype = df ["A" ].dtype
2846+ )
28162847 tm .assert_frame_equal (result , expected_df )
28172848
28182849
0 commit comments