remove xfails + update expected error msgs

jorisvandenbossche · jorisvandenbossche · commit 42b77dbfb2c8 · 2024-09-21T14:07:16.000+02:00
diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
@@ -12,9 +12,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
 from pandas.errors import SpecificationError
 
 from pandas import (
@@ -212,10 +209,6 @@ def transform(row):
         data.apply(transform, axis=1)
 
 
-# we should raise a proper TypeError instead of propagating the pyarrow error
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-)
 @pytest.mark.parametrize(
     "df, func, expected",
     tm.get_cython_table_params(
@@ -225,21 +218,25 @@ def transform(row):
 def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_string):
     # GH 21224
     if using_infer_string:
-        import pyarrow as pa
+        if df.dtypes.iloc[0].storage == "pyarrow":
+            import pyarrow as pa
 
-        expected = (expected, pa.lib.ArrowNotImplementedError)
+            # TODO(infer_string)
+            # should raise a proper TypeError instead of propagating the pyarrow error
 
-    msg = "can't multiply sequence by non-int of type 'str'|has no kernel"
+            expected = (expected, pa.lib.ArrowNotImplementedError)
+        else:
+            expected = (expected, NotImplementedError)
+
+    msg = (
+        "can't multiply sequence by non-int of type 'str'|has no kernel|cannot perform"
+    )
     warn = None if isinstance(func, str) else FutureWarning
     with pytest.raises(expected, match=msg):
         with tm.assert_produces_warning(warn, match="using DataFrame.cumprod"):
             df.agg(func, axis=axis)
 
 
-# we should raise a proper TypeError instead of propagating the pyarrow error
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-)
 @pytest.mark.parametrize(
     "series, func, expected",
     chain(
@@ -263,11 +260,15 @@ def test_agg_cython_table_raises_series(series, func, expected, using_infer_stri
         msg = r"Cannot convert \['a' 'b' 'c'\] to numeric"
 
     if using_infer_string:
-        import pyarrow as pa
-
-        expected = (expected, pa.lib.ArrowNotImplementedError)
-
-    msg = msg + "|does not support|has no kernel"
+        if series.dtype.storage == "pyarrow":
+            import pyarrow as pa
+
+            # TODO(infer_string)
+            # should raise a proper TypeError instead of propagating the pyarrow error
+            expected = (expected, pa.lib.ArrowNotImplementedError)
+        else:
+            expected = (expected, NotImplementedError)
+    msg = msg + "|does not support|has no kernel|Cannot perform|cannot perform"
     warn = None if isinstance(func, str) else FutureWarning
 
     with pytest.raises(expected, match=msg):
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -276,10 +276,11 @@ def test_stat_op_api_float_string_frame(self, float_string_frame, axis, opname):
                 msg = "'[><]=' not supported between instances of 'float' and 'str'"
             elif opname == "median":
                 msg = re.compile(
-                    r"Cannot convert \[.*\] to numeric|does not support", flags=re.S
+                    r"Cannot convert \[.*\] to numeric|does not support|Cannot perform",
+                    flags=re.S,
                 )
             if not isinstance(msg, re.Pattern):
-                msg = msg + "|does not support"
+                msg = msg + "|does not support|Cannot perform reduction"
             with pytest.raises(TypeError, match=msg):
                 getattr(float_string_frame, opname)(axis=axis)
         if opname != "nunique":
@@ -441,20 +442,13 @@ def test_mixed_ops(self, op):
                 "could not convert",
                 "can't multiply sequence by non-int",
                 "does not support",
+                "Cannot perform",
             ]
         )
         with pytest.raises(TypeError, match=msg):
             getattr(df, op)()
 
         with pd.option_context("use_bottleneck", False):
-            msg = "|".join(
-                [
-                    "Could not convert",
-                    "could not convert",
-                    "can't multiply sequence by non-int",
-                    "does not support",
-                ]
-            )
             with pytest.raises(TypeError, match=msg):
                 getattr(df, op)()
 
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -6,7 +6,6 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import lib
-from pandas.compat import HAS_PYARROW
 
 from pandas.core.dtypes.common import ensure_platform_int
 
@@ -385,10 +384,7 @@ def test_transform_nuisance_raises(df, using_infer_string):
     gbc = grouped["B"]
     msg = "Could not convert"
     if using_infer_string:
-        if df.columns.dtype.storage == "pyarrow":
-            msg = "with dtype str does not support operation 'mean'"
-        else:
-            msg = "Cannot perform reduction 'mean' with string dtype"
+        msg = "Cannot perform reduction 'mean' with string dtype"
     with pytest.raises(TypeError, match=msg):
         gbc.transform(lambda x: np.mean(x))
 
@@ -483,10 +479,7 @@ def test_groupby_transform_with_int(using_infer_string):
     )
     msg = "Could not convert"
     if using_infer_string:
-        if HAS_PYARROW:
-            msg = "with dtype str does not support operation 'mean'"
-        else:
-            msg = "Cannot perform reduction 'mean' with string dtype"
+        msg = "Cannot perform reduction 'mean' with string dtype"
     with np.errstate(all="ignore"):
         with pytest.raises(TypeError, match=msg):
             df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py
@@ -1,10 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
-
 import pandas as pd
 from pandas import Series
 import pandas._testing as tm
@@ -171,48 +167,44 @@ def test_mean_with_convertible_string_raises():
     ser = Series(["1", "2"])
     assert ser.sum() == "12"
 
-    msg = "Could not convert string '12' to numeric|does not support"
+    msg = "Could not convert string '12' to numeric|does not support|Cannot perform"
     with pytest.raises(TypeError, match=msg):
         ser.mean()
 
     df = ser.to_frame()
-    msg = r"Could not convert \['12'\] to numeric|does not support"
+    msg = r"Could not convert \['12'\] to numeric|does not support|Cannot perform"
     with pytest.raises(TypeError, match=msg):
         df.mean()
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-)
 def test_mean_dont_convert_j_to_complex():
     # GH#36703
     df = pd.DataFrame([{"db": "J", "numeric": 123}])
-    msg = r"Could not convert \['J'\] to numeric|does not support"
+    msg = r"Could not convert \['J'\] to numeric|does not support|Cannot perform"
     with pytest.raises(TypeError, match=msg):
         df.mean()
 
     with pytest.raises(TypeError, match=msg):
         df.agg("mean")
 
-    msg = "Could not convert string 'J' to numeric|does not support"
+    msg = "Could not convert string 'J' to numeric|does not support|Cannot perform"
     with pytest.raises(TypeError, match=msg):
         df["db"].mean()
-    msg = "Could not convert string 'J' to numeric|ufunc 'divide'"
+    msg = "Could not convert string 'J' to numeric|ufunc 'divide'|Cannot perform"
     with pytest.raises(TypeError, match=msg):
         np.mean(df["db"].astype("string").array)
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-)
 def test_median_with_convertible_string_raises():
     # GH#34671 this _could_ return a string "2", but definitely not float 2.0
-    msg = r"Cannot convert \['1' '2' '3'\] to numeric|does not support"
+    msg = r"Cannot convert \['1' '2' '3'\] to numeric|does not support|Cannot perform"
     ser = Series(["1", "2", "3"])
     with pytest.raises(TypeError, match=msg):
         ser.median()
 
-    msg = r"Cannot convert \[\['1' '2' '3'\]\] to numeric|does not support"
+    msg = (
+        r"Cannot convert \[\['1' '2' '3'\]\] to numeric|does not support|Cannot perform"
+    )
     df = ser.to_frame()
     with pytest.raises(TypeError, match=msg):
         df.median()