diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 2b9df1b7079da..5e6b845c319c4 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -273,7 +273,7 @@ def test_wrap_agg_out(three_group):
     grouped = three_group.groupby(["A", "B"])
 
     def func(ser):
-        if ser.dtype == object:
+        if ser.dtype in [object, pd.StringDtype("pyarrow_numpy")]:
             raise TypeError("Test error message")
         return ser.sum()
 
@@ -1089,7 +1089,7 @@ def test_lambda_named_agg(func):
     tm.assert_frame_equal(result, expected)
 
 
-def test_aggregate_mixed_types():
+def test_aggregate_mixed_types(using_infer_string):
     # GH 16916
     df = DataFrame(
         data=np.array([0] * 9).reshape(3, 3), columns=list("XYZ"), index=list("abc")
@@ -1097,10 +1097,11 @@ def test_aggregate_mixed_types():
     df["grouping"] = ["group 1", "group 1", 2]
     result = df.groupby("grouping").aggregate(lambda x: x.tolist())
     expected_data = [[[0], [0], [0]], [[0, 0], [0, 0], [0, 0]]]
+    dtype = "string[pyarrow_numpy]" if using_infer_string else object
     expected = DataFrame(
         expected_data,
         index=Index([2, "group 1"], dtype="object", name="grouping"),
-        columns=Index(["X", "Y", "Z"], dtype="object"),
+        columns=Index(["X", "Y", "Z"], dtype=dtype),
     )
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index aafd06e8f88cf..0e7e5a7a1e2a7 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -90,7 +90,7 @@ def test_cython_agg_boolean():
     tm.assert_series_equal(result, expected)
 
 
-def test_cython_agg_nothing_to_agg():
+def test_cython_agg_nothing_to_agg(using_infer_string):
     frame = DataFrame(
         {"a": np.random.default_rng(2).integers(0, 5, 50), "b": ["foo", "bar"] * 25}
     )
@@ -104,8 +104,12 @@ def test_cython_agg_nothing_to_agg():
     )
 
     result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True)
+    dtype = "string[pyarrow_numpy]" if using_infer_string else object
+
     expected = DataFrame(
-        [], index=frame["a"].sort_values().drop_duplicates(), columns=[]
+        [],
+        index=frame["a"].sort_values().drop_duplicates(),
+        columns=Index([], dtype=dtype),
     )
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 12f99e3cf7a63..2ba4b6487532d 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -355,7 +355,8 @@ def test_series_agg_multi_pure_python():
     )
 
     def bad(x):
-        assert len(x.values.base) > 0
+        if x.dtype == object:
+            assert len(x.values.base) > 0
         return "foo"
 
     result = data.groupby(["A", "B"]).agg(bad)
diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py
index 1b852abad6c8e..939b9c346c6e4 100644
--- a/pandas/tests/groupby/methods/test_nth.py
+++ b/pandas/tests/groupby/methods/test_nth.py
@@ -1,3 +1,5 @@
+from decimal import Decimal
+
 import numpy as np
 import pytest
 
@@ -680,7 +682,15 @@ def test_first_multi_key_groupby_categorical():
 @pytest.mark.parametrize("method", ["first", "last", "nth"])
 def test_groupby_last_first_nth_with_none(method, nulls_fixture):
     # GH29645
-    expected = Series(["y"])
+    if nulls_fixture is not pd.NA and (
+        nulls_fixture is pd.NaT
+        or isinstance(nulls_fixture, Decimal)
+        and Decimal.is_nan(nulls_fixture)
+    ):
+        dtype = object
+    else:
+        dtype = None
+    expected = Series(["y"], dtype=dtype)
     data = Series(
         [nulls_fixture, nulls_fixture, nulls_fixture, "y", nulls_fixture],
         index=[0, 0, 0, 0, 0],
diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py
index 9b825b73c26c0..243b48bf13149 100644
--- a/pandas/tests/groupby/methods/test_quantile.py
+++ b/pandas/tests/groupby/methods/test_quantile.py
@@ -159,7 +159,9 @@ def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby,
 def test_quantile_raises():
     df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"])
 
-    with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"):
+    with pytest.raises(
+        TypeError, match="cannot be performed against 'object' dtypes|No matching"
+    ):
         df.groupby("key").quantile()
 
 
@@ -248,7 +250,8 @@ def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only):
         tm.assert_frame_equal(result, expected)
     else:
         with pytest.raises(
-            TypeError, match="'quantile' cannot be performed against 'object' dtypes!"
+            TypeError,
+            match="'quantile' cannot be performed against 'object' dtypes!|No matching",
         ):
             df.groupby("a").quantile(q, numeric_only=numeric_only)
 
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 9bd2c22788fac..20eb2e464b132 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -109,7 +109,7 @@ def test_apply_index_date_object(using_infer_string):
         1.40750,
         1.40649,
     ]
-    dtype = "string[pyarrow_numpy]" if using_infer_string else object
+    dtype = "string[pyarrow_numpy]" if using_infer_string else None
     exp_idx = Index(
         ["2011-05-16", "2011-05-17", "2011-05-18"], dtype=dtype, name="date"
     )
@@ -1243,9 +1243,7 @@ def test_apply_dropna_with_indexed_same(dropna):
     [
         [
             False,
-            DataFrame(
-                [[1, 1, 1], [2, 2, 1]], columns=Index(["a", "b", None], dtype=object)
-            ),
+            DataFrame([[1, 1, 1], [2, 2, 1]], columns=Index(["a", "b", None])),
         ],
         [
             True,
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 5a43a42aa936f..cedb1c701027b 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -312,7 +312,7 @@ def test_apply(ordered):
     tm.assert_series_equal(result, expected)
 
 
-def test_observed(observed):
+def test_observed(observed, using_infer_string, request):
     # multiple groupers, don't re-expand the output space
     # of the grouper
     # gh-14942 (implement)
@@ -320,6 +320,10 @@ def test_observed(observed):
     # gh-8138 (back-compat)
     # gh-8869
 
+    if not observed and using_infer_string:
+        mark = pytest.mark.xfail(reason="fill_value=0 invalid for string dtype")
+        request.applymarker(mark)
+
     cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True)
     cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True)
     df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 00e781e6a7f07..5554e6e407415 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2315,14 +2315,18 @@ def test_groupby_all_nan_groups_drop():
 
 
 @pytest.mark.parametrize("numeric_only", [True, False])
-def test_groupby_empty_multi_column(as_index, numeric_only):
+def test_groupby_empty_multi_column(as_index, numeric_only, using_infer_string):
     # GH 15106 & GH 41998
     df = DataFrame(data=[], columns=["A", "B", "C"])
     gb = df.groupby(["A", "B"], as_index=as_index)
     result = gb.sum(numeric_only=numeric_only)
     if as_index:
         index = MultiIndex([[], []], [[], []], names=["A", "B"])
-        columns = ["C"] if not numeric_only else []
+        if using_infer_string:
+            dtype = "string[pyarrow_numpy]"
+        else:
+            dtype = object
+        columns = ["C"] if not numeric_only else Index([], dtype=dtype)
     else:
         index = RangeIndex(0)
         columns = ["A", "B", "C"] if not numeric_only else ["A", "B"]
@@ -2340,7 +2344,7 @@ def test_groupby_aggregation_non_numeric_dtype():
         {
             "v": [[1, 1], [10, 20]],
         },
-        index=Index(["M", "W"], dtype="object", name="MW"),
+        index=Index(["M", "W"], name="MW"),
     )
 
     gb = df.groupby(by=["MW"])
@@ -2487,11 +2491,16 @@ def test_groupby_none_in_first_mi_level():
     tm.assert_series_equal(result, expected)
 
 
-def test_groupby_none_column_name():
+def test_groupby_none_column_name(using_infer_string):
     # GH#47348
     df = DataFrame({None: [1, 1, 2, 2], "b": [1, 1, 2, 3], "c": [4, 5, 6, 7]})
-    result = df.groupby(by=[None]).sum()
-    expected = DataFrame({"b": [2, 5], "c": [9, 13]}, index=Index([1, 2], name=None))
+    if using_infer_string:
+        result = df.groupby(by=[np.nan]).sum()
+        name = np.nan
+    else:
+        result = df.groupby(by=[None]).sum()
+        name = None
+    expected = DataFrame({"b": [2, 5], "c": [9, 13]}, index=Index([1, 2], name=name))
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index d3b3c945e06de..eba82cd4cca5f 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -112,7 +112,9 @@ def test_groupby_dropna_multi_index_dataframe_nan_in_two_groups(
         ),
     ],
 )
-def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs):
+def test_groupby_dropna_normal_index_dataframe(
+    dropna, idx, outputs, using_infer_string
+):
     # GH 3729
     df_list = [
         ["B", 12, 12, 12],
@@ -123,7 +125,9 @@ def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs):
     df = pd.DataFrame(df_list, columns=["a", "b", "c", "d"])
     grouped = df.groupby("a", dropna=dropna).sum()
 
-    expected = pd.DataFrame(outputs, index=pd.Index(idx, dtype="object", name="a"))
+    dtype = "string[pyarrow_numpy]" if using_infer_string else object
+
+    expected = pd.DataFrame(outputs, index=pd.Index(idx, dtype=dtype, name="a"))
 
     tm.assert_frame_equal(grouped, expected)
 
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 699fffe5d0488..ee27464fd9b4d 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -789,7 +789,7 @@ def test_groupby_empty(self):
         expected = ["name"]
         assert result == expected
 
-    def test_groupby_level_index_value_all_na(self):
+    def test_groupby_level_index_value_all_na(self, using_infer_string):
         # issue 20519
         df = DataFrame(
             [["x", np.nan, 10], [None, np.nan, 20]], columns=["A", "B", "C"]
@@ -805,7 +805,7 @@ def test_groupby_level_index_value_all_na(self):
             columns=["C"],
             dtype="int64",
         )
-        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result, expected, check_index_type=not using_infer_string)
 
     def test_groupby_multiindex_level_empty(self):
         # https://github.com/pandas-dev/pandas/issues/31670
@@ -933,11 +933,14 @@ def test_groupby_with_empty(self):
         grouped = series.groupby(grouper)
         assert next(iter(grouped), None) is None
 
-    def test_groupby_with_single_column(self):
+    def test_groupby_with_single_column(self, using_infer_string):
         df = DataFrame({"a": list("abssbab")})
         tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]])
         # GH 13530
-        exp = DataFrame(index=Index(["a", "b", "s"], name="a"), columns=[])
+        dtype = "string[pyarrow_numpy]" if using_infer_string else object
+        exp = DataFrame(
+            index=Index(["a", "b", "s"], name="a"), columns=Index([], dtype=dtype)
+        )
         tm.assert_frame_equal(df.groupby("a").count(), exp)
         tm.assert_frame_equal(df.groupby("a").sum(), exp)
 
diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py
index 7d5c1625b8ab4..ee59a93695bcf 100644
--- a/pandas/tests/groupby/test_pipe.py
+++ b/pandas/tests/groupby/test_pipe.py
@@ -35,7 +35,7 @@ def square(srs):
     # NDFrame.pipe methods
     result = df.groupby("A").pipe(f).pipe(square)
 
-    index = Index(["bar", "foo"], dtype="object", name="A")
+    index = Index(["bar", "foo"], name="A")
     expected = pd.Series([3.749306591013693, 6.717707873081384], name="B", index=index)
 
     tm.assert_series_equal(expected, result)
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index f9d5de72eda1d..465be55e436f4 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -106,7 +106,7 @@ def _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=""):
 
 @pytest.mark.parametrize("how", ["method", "agg", "transform"])
 def test_groupby_raises_string(
-    how, by, groupby_series, groupby_func, df_with_string_col
+    how, by, groupby_series, groupby_func, df_with_string_col, using_infer_string
 ):
     df = df_with_string_col
     args = get_groupby_method_args(groupby_func, df)
@@ -119,30 +119,41 @@ def test_groupby_raises_string(
             assert not hasattr(gb, "corrwith")
             return
 
+    if using_infer_string:
+        import pyarrow as pa
+
+        errs = (TypeError, pa.lib.ArrowNotImplementedError)
+    else:
+        errs = TypeError
+
     klass, msg = {
         "all": (None, ""),
         "any": (None, ""),
         "bfill": (None, ""),
-        "corrwith": (TypeError, "Could not convert"),
+        "corrwith": (errs, "Could not convert|has no kernel"),
         "count": (None, ""),
         "cumcount": (None, ""),
         "cummax": (
             (NotImplementedError, TypeError),
-            "(function|cummax) is not (implemented|supported) for (this|object) dtype",
+            "(function|cummax) is not (implemented|supported) "
+            "for (this|object|string) dtype",
         ),
         "cummin": (
             (NotImplementedError, TypeError),
-            "(function|cummin) is not (implemented|supported) for (this|object) dtype",
+            "(function|cummin) is not (implemented|supported) "
+            "for (this|object|string) dtype",
         ),
         "cumprod": (
             (NotImplementedError, TypeError),
-            "(function|cumprod) is not (implemented|supported) for (this|object) dtype",
+            "(function|cumprod) is not (implemented|supported) "
+            "for (this|object|string) dtype",
         ),
         "cumsum": (
             (NotImplementedError, TypeError),
-            "(function|cumsum) is not (implemented|supported) for (this|object) dtype",
+            "(function|cumsum) is not (implemented|supported) "
+            "for (this|object|string) dtype",
         ),
-        "diff": (TypeError, "unsupported operand type"),
+        "diff": (errs, "unsupported operand type|has no kernel"),
         "ffill": (None, ""),
         "fillna": (None, ""),
         "first": (None, ""),
@@ -152,21 +163,24 @@ def test_groupby_raises_string(
         "max": (None, ""),
         "mean": (
             TypeError,
-            re.escape("agg function failed [how->mean,dtype->object]"),
+            re.escape("agg function failed [how->mean,dtype->"),
         ),
         "median": (
             TypeError,
-            re.escape("agg function failed [how->median,dtype->object]"),
+            re.escape("agg function failed [how->median,dtype->"),
         ),
         "min": (None, ""),
         "ngroup": (None, ""),
         "nunique": (None, ""),
-        "pct_change": (TypeError, "unsupported operand type"),
+        "pct_change": (errs, "unsupported operand type|has no kernel"),
         "prod": (
             TypeError,
-            re.escape("agg function failed [how->prod,dtype->object]"),
+            re.escape("agg function failed [how->prod,dtype->"),
+        ),
+        "quantile": (
+            TypeError,
+            "cannot be performed against 'object' dtypes!|No matching signature",
         ),
-        "quantile": (TypeError, "cannot be performed against 'object' dtypes!"),
         "rank": (None, ""),
         "sem": (ValueError, "could not convert string to float"),
         "shift": (None, ""),
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index edc94b2beeec1..7fb8eee274244 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -468,7 +468,7 @@ def test_max_min_non_numeric():
     assert "ss" in result
 
 
-def test_max_min_object_multiple_columns():
+def test_max_min_object_multiple_columns(using_infer_string):
     # GH#41111 case where the aggregation is valid for some columns but not
     # others; we split object blocks column-wise, consistent with
     # DataFrame._reduce
@@ -481,7 +481,10 @@ def test_max_min_object_multiple_columns():
         }
     )
     df._consolidate_inplace()  # should already be consolidate, but double-check
-    assert len(df._mgr.blocks) == 2
+    if using_infer_string:
+        assert len(df._mgr.blocks) == 3
+    else:
+        assert len(df._mgr.blocks) == 2
 
     gb = df.groupby("A")
 
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index ea556d043be2d..d1993944d63ea 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -74,7 +74,7 @@ def groupby_with_truncated_bingrouper(frame_for_truncated_bingrouper):
 
 
 class TestGroupBy:
-    def test_groupby_with_timegrouper(self):
+    def test_groupby_with_timegrouper(self, using_infer_string):
         # GH 4161
         # TimeGrouper requires a sorted index
         # also verifies that the resultant index has the correct name
@@ -112,7 +112,9 @@ def test_groupby_with_timegrouper(self):
                 index=exp_dti,
             )
             # Cast to object to avoid implicit cast when setting entry to "CarlCarlCarl"
-            expected = expected.astype({"Buyer": object})
+            dtype = "string[pyarrow_numpy]" if using_infer_string else object
+
+            expected = expected.astype({"Buyer": dtype})
             expected.iloc[0, 0] = "CarlCarlCarl"
             expected.iloc[6, 0] = "CarlCarl"
             expected.iloc[18, 0] = "Joe"
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index e91ca64bb8970..98e53edd4d10c 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -380,10 +380,10 @@ def test_transform_nuisance_raises(df):
     grouped = df.groupby("A")
 
     gbc = grouped["B"]
-    with pytest.raises(TypeError, match="Could not convert"):
+    with pytest.raises(TypeError, match="Could not convert|does not support"):
         gbc.transform(lambda x: np.mean(x))
 
-    with pytest.raises(TypeError, match="Could not convert"):
+    with pytest.raises(TypeError, match="Could not convert|does not support"):
         df.groupby("A").transform(lambda x: np.mean(x))
 
 
@@ -473,7 +473,7 @@ def test_groupby_transform_with_int():
         }
     )
     with np.errstate(all="ignore"):
-        with pytest.raises(TypeError, match="Could not convert"):
+        with pytest.raises(TypeError, match="Could not convert|does not support"):
             df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
         result = df.groupby("A")[["B", "C"]].transform(
             lambda x: (x - x.mean()) / x.std()
@@ -485,7 +485,7 @@ def test_groupby_transform_with_int():
     s = Series([2, 3, 4, 10, 5, -1])
     df = DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": s, "D": "foo"})
     with np.errstate(all="ignore"):
-        with pytest.raises(TypeError, match="Could not convert"):
+        with pytest.raises(TypeError, match="Could not convert|does not support"):
             df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
         result = df.groupby("A")[["B", "C"]].transform(
             lambda x: (x - x.mean()) / x.std()
@@ -749,7 +749,7 @@ def test_cython_transform_frame_column(
         msg = "|".join(
             [
                 "does not support .* operations",
-                ".* is not supported for object dtype",
+                ".* is not supported for (object|string) dtype",
                 "is not implemented for this dtype",
             ]
         )
@@ -1024,19 +1024,20 @@ def test_groupby_transform_with_datetimes(func, values):
     tm.assert_series_equal(result, expected)
 
 
-def test_groupby_transform_dtype():
+def test_groupby_transform_dtype(using_infer_string):
     # GH 22243
     df = DataFrame({"a": [1], "val": [1.35]})
 
     result = df["val"].transform(lambda x: x.map(lambda y: f"+{y}"))
-    expected1 = Series(["+1.35"], name="val", dtype="object")
+    dtype = "string[pyarrow_numpy]" if using_infer_string else object
+    expected1 = Series(["+1.35"], name="val", dtype=dtype)
     tm.assert_series_equal(result, expected1)
 
     result = df.groupby("a")["val"].transform(lambda x: x.map(lambda y: f"+{y}"))
     tm.assert_series_equal(result, expected1)
 
     result = df.groupby("a")["val"].transform(lambda x: x.map(lambda y: f"+({y})"))
-    expected2 = Series(["+(1.35)"], name="val", dtype="object")
+    expected2 = Series(["+(1.35)"], name="val", dtype=dtype)
     tm.assert_series_equal(result, expected2)
 
     df["val"] = df["val"].astype(object)