Merge branch 'main' into Test_issue_57930

MCRE-BE · web-flow · commit 067e16f5fbc4 · 2025-01-02T10:02:03.000+01:00
diff --git a/doc/source/development/maintaining.rst b/doc/source/development/maintaining.rst
@@ -488,7 +488,7 @@ Post-Release
    for reference):
 
     - The pandas-dev and pydata mailing lists
-    - Twitter, Mastodon, Telegram and LinkedIn
+    - X, Mastodon, Telegram and LinkedIn
 
 7. Update this release instructions to fix anything incorrect and to update about any
    change since the last release.
diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst
@@ -1210,11 +1210,6 @@ You may set the ``xlabel`` and ``ylabel`` arguments to give the plot custom labe
 for x and y axis. By default, pandas will pick up index name as xlabel, while leaving
 it empty for ylabel.
 
-.. ipython:: python
-   :suppress:
-
-   plt.figure();
-
 .. ipython:: python
 
    df.plot();
diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
@@ -704,7 +704,7 @@ class NaTType(_NaT):
         difference between the current timezone and UTC.
 
         Returns
-        --------
+        -------
         timedelta
             The difference between UTC and the local time as a `timedelta` object.
 
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
@@ -2217,7 +2217,7 @@ class Timestamp(_Timestamp):
         difference between the current timezone and UTC.
 
         Returns
-        --------
+        -------
         timedelta
             The difference between UTC and the local time as a `timedelta` object.
 
diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py
@@ -190,8 +190,8 @@ def eval(
 
     .. warning::
 
-        ``eval`` can run arbitrary code which can make you vulnerable to code
-         injection and untrusted data.
+        This function can run arbitrary code which can make you vulnerable to code
+        injection if you pass user input to this function.
 
     Parameters
     ----------
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4476,8 +4476,10 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
         """
         Query the columns of a DataFrame with a boolean expression.
 
-        This method can run arbitrary code which can make you vulnerable to code
-        injection if you pass user input to this function.
+        .. warning::
+
+            This method can run arbitrary code which can make you vulnerable to code
+            injection if you pass user input to this function.
 
         Parameters
         ----------
@@ -4634,6 +4636,11 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
         """
         Evaluate a string describing operations on DataFrame columns.
 
+        .. warning::
+
+            This method can run arbitrary code which can make you vulnerable to code
+            injection if you pass user input to this function.
+
         Operates on columns only, not specific rows or elements.  This allows
         `eval` to run arbitrary code, which can make you vulnerable to code
         injection if you pass user input to this function.
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
@@ -3700,7 +3700,7 @@ def casefold(self):
     Series.str.isupper : Check whether all characters are uppercase.
 
     Examples
-    ------------
+    --------
     The ``s5.str.istitle`` method checks for whether all words are in title
     case (whether only the first letter of each word is capitalized). Words are
     assumed to be as any sequence of non-numeric characters separated by
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -1644,7 +1644,7 @@ def _update_ctx_header(self, attrs: DataFrame, axis: AxisInt) -> None:
         for j in attrs.columns:
             ser = attrs[j]
             for i, c in ser.items():
-                if not c:
+                if not c or pd.isna(c):
                     continue
                 css_list = maybe_convert_css_to_tuples(c)
                 if axis == 0:
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -5297,6 +5297,8 @@ def _dtype_to_kind(dtype_str: str) -> str:
         kind = "integer"
     elif dtype_str == "object":
         kind = "object"
+    elif dtype_str == "str":
+        kind = "str"
     else:
         raise ValueError(f"cannot interpret dtype of [{dtype_str}]")
 
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
@@ -227,6 +227,22 @@ def fast(group):
     tm.assert_frame_equal(fast_df, slow_df)
 
 
+def test_apply_fast_slow_identical_index():
+    # GH#44803
+    df = DataFrame(
+        {
+            "name": ["Alice", "Bob", "Carl"],
+            "age": [20, 21, 20],
+        }
+    ).set_index("name")
+
+    grp_by_same_value = df.groupby(["age"], group_keys=False).apply(lambda group: group)
+    grp_by_copy = df.groupby(["age"], group_keys=False).apply(
+        lambda group: group.copy()
+    )
+    tm.assert_frame_equal(grp_by_same_value, grp_by_copy)
+
+
 @pytest.mark.parametrize(
     "func",
     [
@@ -255,19 +271,19 @@ def test_apply_with_mixed_dtype():
             "foo2": ["one", "two", "two", "three", "one", "two"],
         }
     )
-    result = df.apply(lambda x: x, axis=1).dtypes
-    expected = df.dtypes
-    tm.assert_series_equal(result, expected)
+    result = df.apply(lambda x: x, axis=1)
+    expected = df
+    tm.assert_frame_equal(result, expected)
 
     # GH 3610 incorrect dtype conversion with as_index=False
     df = DataFrame({"c1": [1, 2, 6, 6, 8]})
     df["c2"] = df.c1 / 2.0
-    result1 = df.groupby("c2").mean().reset_index().c2
-    result2 = df.groupby("c2", as_index=False).mean().c2
-    tm.assert_series_equal(result1, result2)
+    result1 = df.groupby("c2").mean().reset_index()
+    result2 = df.groupby("c2", as_index=False).mean()
+    tm.assert_frame_equal(result1, result2)
 
 
-def test_groupby_as_index_apply():
+def test_groupby_as_index_apply(as_index):
     # GH #4648 and #3417
     df = DataFrame(
         {
@@ -276,27 +292,35 @@ def test_groupby_as_index_apply():
             "time": range(6),
         }
     )
+    gb = df.groupby("user_id", as_index=as_index)
 
-    g_as = df.groupby("user_id", as_index=True)
-    g_not_as = df.groupby("user_id", as_index=False)
-
-    res_as = g_as.head(2).index
-    res_not_as = g_not_as.head(2).index
-    exp = Index([0, 1, 2, 4])
-    tm.assert_index_equal(res_as, exp)
-    tm.assert_index_equal(res_not_as, exp)
-
-    res_as_apply = g_as.apply(lambda x: x.head(2)).index
-    res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
+    expected = DataFrame(
+        {
+            "item_id": ["b", "b", "a", "a"],
+            "user_id": [1, 2, 1, 3],
+            "time": [0, 1, 2, 4],
+        },
+        index=[0, 1, 2, 4],
+    )
+    result = gb.head(2)
+    tm.assert_frame_equal(result, expected)
 
     # apply doesn't maintain the original ordering
     # changed in GH5610 as the as_index=False returns a MI here
-    exp_not_as_apply = Index([0, 2, 1, 4])
-    tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
-    exp_as_apply = MultiIndex.from_tuples(tp, names=["user_id", None])
-
-    tm.assert_index_equal(res_as_apply, exp_as_apply)
-    tm.assert_index_equal(res_not_as_apply, exp_not_as_apply)
+    if as_index:
+        tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
+        index = MultiIndex.from_tuples(tp, names=["user_id", None])
+    else:
+        index = Index([0, 2, 1, 4])
+    expected = DataFrame(
+        {
+            "item_id": list("baba"),
+            "time": [0, 2, 1, 4],
+        },
+        index=index,
+    )
+    result = gb.apply(lambda x: x.head(2))
+    tm.assert_frame_equal(result, expected)
 
 
 def test_groupby_as_index_apply_str():
@@ -1455,3 +1479,37 @@ def f_4(grp):
     e.loc["Pony"] = np.nan
     e.name = None
     tm.assert_series_equal(result, e)
+
+
+def test_nonreducer_nonstransform():
+    # GH3380, GH60619
+    # Was originally testing mutating in a UDF; now kept as an example
+    # of using apply with a nonreducer and nontransformer.
+    df = DataFrame(
+        {
+            "cat1": ["a"] * 8 + ["b"] * 6,
+            "cat2": ["c"] * 2
+            + ["d"] * 2
+            + ["e"] * 2
+            + ["f"] * 2
+            + ["c"] * 2
+            + ["d"] * 2
+            + ["e"] * 2,
+            "val": np.random.default_rng(2).integers(100, size=14),
+        }
+    )
+
+    def f(x):
+        x = x.copy()
+        x["rank"] = x.val.rank(method="min")
+        return x.groupby("cat2")["rank"].min()
+
+    expected = DataFrame(
+        {
+            "cat1": list("aaaabbb"),
+            "cat2": list("cdefcde"),
+            "rank": [3.0, 2.0, 5.0, 1.0, 2.0, 4.0, 1.0],
+        }
+    ).set_index(["cat1", "cat2"])["rank"]
+    result = df.groupby("cat1").apply(f)
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -3297,3 +3297,23 @@ def test_loc_reindexing_of_empty_index(self):
         df.loc[Series([False] * 4, index=df.index, name=0), 0] = df[0]
         expected = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"])
         tm.assert_frame_equal(df, expected)
+
+    def test_loc_setitem_matching_index(self):
+        # GH 25548
+        s = Series(0.0, index=list("abcd"))
+        s1 = Series(1.0, index=list("ab"))
+        s2 = Series(2.0, index=list("xy"))
+
+        # Test matching indices
+        s.loc[["a", "b"]] = s1
+
+        result = s[["a", "b"]]
+        expected = s1
+        tm.assert_series_equal(result, expected)
+
+        # Test unmatched indices
+        s.loc[["a", "b"]] = s2
+
+        result = s[["a", "b"]]
+        expected = Series([np.nan, np.nan], index=["a", "b"])
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/io/formats/style/test_to_latex.py b/pandas/tests/io/formats/style/test_to_latex.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas import (
     DataFrame,
     MultiIndex,
@@ -731,7 +729,6 @@ def test_longtable_caption_label(styler, caption, cap_exp, label, lab_exp):
     )
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("index", [True, False])
 @pytest.mark.parametrize(
     "columns, siunitx",
diff --git a/pandas/tests/io/pytables/test_file_handling.py b/pandas/tests/io/pytables/test_file_handling.py
diff --git a/pandas/tests/io/pytables/test_subclass.py b/pandas/tests/io/pytables/test_subclass.py
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py