Merge branch 'main' into avoid_copy_categoricls

fjetter · web-flow · commit 673b24716cf8 · 2025-08-01T16:44:11.000+02:00
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -71,7 +71,7 @@ jobs:
             # It will be temporarily activated during tests with locale.setlocale
             extra_loc: "zh_CN"
             platform: ubuntu-24.04
-          - name: "Past no infer strings"
+          - name: "PANDAS_FUTURE_INFER_STRING=0"
             env_file: actions-312.yaml
             pandas_future_infer_string: "0"
             platform: ubuntu-24.04
diff --git a/pandas/errors/cow.py b/pandas/errors/cow.py
@@ -1,24 +1,28 @@
 _chained_assignment_msg = (
     "A value is trying to be set on a copy of a DataFrame or Series "
     "through chained assignment.\n"
-    "When using the Copy-on-Write mode, such chained assignment never works "
-    "to update the original DataFrame or Series, because the intermediate "
-    "object on which we are setting values always behaves as a copy.\n\n"
+    "Such chained assignment never works to update the original DataFrame or "
+    "Series, because the intermediate object on which we are setting values "
+    "always behaves as a copy (due to Copy-on-Write).\n\n"
     "Try using '.loc[row_indexer, col_indexer] = value' instead, to perform "
     "the assignment in a single step.\n\n"
-    "See the caveats in the documentation: "
+    "See the documentation for a more detailed explanation: "
     "https://pandas.pydata.org/pandas-docs/stable/user_guide/"
-    "copy_on_write.html"
+    "copy_on_write.html#chained-assignment"
 )
 
 
 _chained_assignment_method_msg = (
     "A value is trying to be set on a copy of a DataFrame or Series "
     "through chained assignment using an inplace method.\n"
-    "When using the Copy-on-Write mode, such inplace method never works "
-    "to update the original DataFrame or Series, because the intermediate "
-    "object on which we are setting values always behaves as a copy.\n\n"
+    "Such inplace method never works to update the original DataFrame or Series, "
+    "because the intermediate object on which we are setting values always "
+    "behaves as a copy (due to Copy-on-Write).\n\n"
     "For example, when doing 'df[col].method(value, inplace=True)', try "
     "using 'df.method({col: value}, inplace=True)' instead, to perform "
-    "the operation inplace on the original object.\n\n"
+    "the operation inplace on the original object, or try to avoid an inplace "
+    "operation using 'df[col] = df[col].method(value)'.\n\n"
+    "See the documentation for a more detailed explanation: "
+    "https://pandas.pydata.org/pandas-docs/stable/user_guide/"
+    "copy_on_write.html"
 )
diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py
@@ -100,7 +100,7 @@ def test_simple_new_requires_match(self, unit):
         assert dta.dtype == dtype
 
         wrong = DatetimeTZDtype("ns", "UTC")
-        with pytest.raises(AssertionError, match=""):
+        with pytest.raises(AssertionError, match="^$"):
             DatetimeArray._simple_new(arr, dtype=wrong)
 
     def test_std_non_nano(self, unit):
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
@@ -249,10 +249,14 @@ def test_alias_to_unit_raises(self):
 
     def test_alias_to_unit_bad_alias_raises(self):
         # 23990
-        with pytest.raises(TypeError, match=""):
+        with pytest.raises(
+            TypeError, match="Cannot construct a 'DatetimeTZDtype' from"
+        ):
             DatetimeTZDtype("this is a bad string")
 
-        with pytest.raises(TypeError, match=""):
+        with pytest.raises(
+            TypeError, match="Cannot construct a 'DatetimeTZDtype' from"
+        ):
             DatetimeTZDtype("datetime64[ns, US/NotATZ]")
 
     def test_hash_vs_equality(self, dtype):
diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py
@@ -191,7 +191,7 @@ def test_unary_accumulate_axis():
 
 def test_frame_outer_disallowed():
     df = pd.DataFrame({"A": [1, 2]})
-    with pytest.raises(NotImplementedError, match=""):
+    with pytest.raises(NotImplementedError, match="^$"):
         # deprecation enforced in 2.0
         np.subtract.outer(df, df)
 
diff --git a/pandas/tests/indexes/numeric/test_indexing.py b/pandas/tests/indexes/numeric/test_indexing.py
@@ -585,10 +585,10 @@ def test_slice_locs_na(self):
 
     def test_slice_locs_na_raises(self):
         index = Index([np.nan, 1, 2])
-        with pytest.raises(KeyError, match=""):
+        with pytest.raises(KeyError, match="1.5"):
             index.slice_locs(start=1.5)
 
-        with pytest.raises(KeyError, match=""):
+        with pytest.raises(KeyError, match="1.5"):
             index.slice_locs(end=1.5)
 
 
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -712,7 +712,7 @@ def test_drop_by_str_label(self, index):
     )
     @pytest.mark.parametrize("keys", [["foo", "bar"], ["1", "bar"]])
     def test_drop_by_str_label_raises_missing_keys(self, index, keys):
-        with pytest.raises(KeyError, match=""):
+        with pytest.raises(KeyError, match=".* not found in axis"):
             index.drop(keys)
 
     @pytest.mark.parametrize(
@@ -741,7 +741,7 @@ def test_drop_by_numeric_label_loc(self):
 
     def test_drop_by_numeric_label_raises_missing_keys(self):
         index = Index([1, 2, 3])
-        with pytest.raises(KeyError, match=""):
+        with pytest.raises(KeyError, match=re.escape("[4] not found in axis")):
             index.drop([3, 4])
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -89,8 +89,12 @@ def test_series_getitem_returns_scalar(
         (lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
         (lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
         (lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"),
-        (lambda s: s.__getitem__(len(s)), KeyError, ""),  # match should include len(s)
-        (lambda s: s[len(s)], KeyError, ""),  # match should include len(s)
+        (
+            lambda s: s.__getitem__(len(s)),
+            KeyError,
+            "100",
+        ),  # match should include len(s)
+        (lambda s: s[len(s)], KeyError, "100"),  # match should include len(s)
         (
             lambda s: s.iloc[len(s)],
             IndexError,
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
@@ -172,7 +172,7 @@ def test_empty_array(self):
     )
     def test_accepted_input(self, data, record_path, exception_type):
         if exception_type is not None:
-            with pytest.raises(exception_type, match=""):
+            with pytest.raises(exception_type, match="^$"):
                 json_normalize(data, record_path=record_path)
         else:
             result = json_normalize(data, record_path=record_path)
diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py
@@ -129,7 +129,7 @@ def test_read_chunksize_and_nrows_changing_size(all_parsers):
         tm.assert_frame_equal(reader.get_chunk(size=2), expected.iloc[:2])
         tm.assert_frame_equal(reader.get_chunk(size=4), expected.iloc[2:5])
 
-        with pytest.raises(StopIteration, match=""):
+        with pytest.raises(StopIteration, match="^$"):
             reader.get_chunk(size=3)
 
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
@@ -2226,7 +2226,7 @@ def test_api_chunksize_read(conn, request):
 
     # reading the query in chunks with read_sql_query
     if conn_name == "sqlite_buildin":
-        with pytest.raises(NotImplementedError, match=""):
+        with pytest.raises(NotImplementedError, match="^$"):
             sql.read_sql_table("test_chunksize", conn, chunksize=5)
     else:
         res3 = DataFrame()
diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
@@ -52,6 +52,15 @@
 mpl = pytest.importorskip("matplotlib")
 plt = pytest.importorskip("matplotlib.pyplot")
 
+pytestmark = [
+    pytest.mark.filterwarnings(
+        "ignore:divide by zero encountered in scalar divide:RuntimeWarning"
+    ),
+    pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in scalar multiply:RuntimeWarning"
+    ),
+]
+
 
 class TestDataFramePlots:
     @pytest.mark.slow
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
@@ -40,6 +40,15 @@
 from pandas.plotting._matplotlib.converter import DatetimeConverter
 from pandas.plotting._matplotlib.style import get_standard_colors
 
+pytestmark = [
+    pytest.mark.filterwarnings(
+        "ignore:divide by zero encountered in scalar divide:RuntimeWarning"
+    ),
+    pytest.mark.filterwarnings(
+        "ignore:invalid value encountered in scalar multiply:RuntimeWarning"
+    ),
+]
+
 
 @pytest.fixture
 def ts():
diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py
@@ -414,7 +414,7 @@ def test_outer():
     ser = pd.Series([1, 2, 3])
     obj = np.array([1, 2, 3])
 
-    with pytest.raises(NotImplementedError, match=""):
+    with pytest.raises(NotImplementedError, match="^$"):
         np.subtract.outer(ser, obj)
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -457,6 +457,10 @@ filterwarnings = [
   "error:::pandas",
   "error::ResourceWarning",
   "error::pytest.PytestUnraisableExceptionWarning",
+  "error::pytest.PytestWarning",
+  # e.g. Module already imported so cannot be rewritten; _hypothesis_globals
+  "ignore::pytest.PytestAssertRewriteWarning",
+  "ignore::pytest.PytestCacheWarning",
   # TODO(PY311-minimum): Specify EncodingWarning
   # Ignore 3rd party EncodingWarning but raise on pandas'
   "ignore:.*encoding.* argument not specified",