Skip to content

Commit 067e16f

Browse files
authored
Merge branch 'main' into Test_issue_57930
2 parents 271b06d + 8fbe6ac commit 067e16f

File tree

16 files changed

+154
-140
lines changed

16 files changed

+154
-140
lines changed

doc/source/development/maintaining.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ Post-Release
488488
for reference):
489489

490490
- The pandas-dev and pydata mailing lists
491-
- Twitter, Mastodon, Telegram and LinkedIn
491+
- X, Mastodon, Telegram and LinkedIn
492492

493493
7. Update this release instructions to fix anything incorrect and to update about any
494494
change since the last release.

doc/source/user_guide/visualization.rst

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,11 +1210,6 @@ You may set the ``xlabel`` and ``ylabel`` arguments to give the plot custom labe
12101210
for x and y axis. By default, pandas will pick up index name as xlabel, while leaving
12111211
it empty for ylabel.
12121212

1213-
.. ipython:: python
1214-
:suppress:
1215-
1216-
plt.figure();
1217-
12181213
.. ipython:: python
12191214
12201215
df.plot();

pandas/_libs/tslibs/nattype.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,7 @@ class NaTType(_NaT):
704704
difference between the current timezone and UTC.
705705
706706
Returns
707-
--------
707+
-------
708708
timedelta
709709
The difference between UTC and the local time as a `timedelta` object.
710710

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2217,7 +2217,7 @@ class Timestamp(_Timestamp):
22172217
difference between the current timezone and UTC.
22182218

22192219
Returns
2220-
--------
2220+
-------
22212221
timedelta
22222222
The difference between UTC and the local time as a `timedelta` object.
22232223

pandas/core/computation/eval.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ def eval(
190190
191191
.. warning::
192192
193-
``eval`` can run arbitrary code which can make you vulnerable to code
194-
injection and untrusted data.
193+
This function can run arbitrary code which can make you vulnerable to code
194+
injection if you pass user input to this function.
195195
196196
Parameters
197197
----------

pandas/core/frame.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4476,8 +4476,10 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
44764476
"""
44774477
Query the columns of a DataFrame with a boolean expression.
44784478
4479-
This method can run arbitrary code which can make you vulnerable to code
4480-
injection if you pass user input to this function.
4479+
.. warning::
4480+
4481+
This method can run arbitrary code which can make you vulnerable to code
4482+
injection if you pass user input to this function.
44814483
44824484
Parameters
44834485
----------
@@ -4634,6 +4636,11 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
46344636
"""
46354637
Evaluate a string describing operations on DataFrame columns.
46364638
4639+
.. warning::
4640+
4641+
This method can run arbitrary code which can make you vulnerable to code
4642+
injection if you pass user input to this function.
4643+
46374644
Operates on columns only, not specific rows or elements. This allows
46384645
`eval` to run arbitrary code, which can make you vulnerable to code
46394646
injection if you pass user input to this function.

pandas/core/strings/accessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3700,7 +3700,7 @@ def casefold(self):
37003700
Series.str.isupper : Check whether all characters are uppercase.
37013701
37023702
Examples
3703-
------------
3703+
--------
37043704
The ``s5.str.istitle`` method checks for whether all words are in title
37053705
case (whether only the first letter of each word is capitalized). Words are
37063706
assumed to be as any sequence of non-numeric characters separated by

pandas/io/formats/style.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1644,7 +1644,7 @@ def _update_ctx_header(self, attrs: DataFrame, axis: AxisInt) -> None:
16441644
for j in attrs.columns:
16451645
ser = attrs[j]
16461646
for i, c in ser.items():
1647-
if not c:
1647+
if not c or pd.isna(c):
16481648
continue
16491649
css_list = maybe_convert_css_to_tuples(c)
16501650
if axis == 0:

pandas/io/pytables.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5297,6 +5297,8 @@ def _dtype_to_kind(dtype_str: str) -> str:
52975297
kind = "integer"
52985298
elif dtype_str == "object":
52995299
kind = "object"
5300+
elif dtype_str == "str":
5301+
kind = "str"
53005302
else:
53015303
raise ValueError(f"cannot interpret dtype of [{dtype_str}]")
53025304

pandas/tests/groupby/test_apply.py

Lines changed: 82 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,22 @@ def fast(group):
227227
tm.assert_frame_equal(fast_df, slow_df)
228228

229229

230+
def test_apply_fast_slow_identical_index():
231+
# GH#44803
232+
df = DataFrame(
233+
{
234+
"name": ["Alice", "Bob", "Carl"],
235+
"age": [20, 21, 20],
236+
}
237+
).set_index("name")
238+
239+
grp_by_same_value = df.groupby(["age"], group_keys=False).apply(lambda group: group)
240+
grp_by_copy = df.groupby(["age"], group_keys=False).apply(
241+
lambda group: group.copy()
242+
)
243+
tm.assert_frame_equal(grp_by_same_value, grp_by_copy)
244+
245+
230246
@pytest.mark.parametrize(
231247
"func",
232248
[
@@ -255,19 +271,19 @@ def test_apply_with_mixed_dtype():
255271
"foo2": ["one", "two", "two", "three", "one", "two"],
256272
}
257273
)
258-
result = df.apply(lambda x: x, axis=1).dtypes
259-
expected = df.dtypes
260-
tm.assert_series_equal(result, expected)
274+
result = df.apply(lambda x: x, axis=1)
275+
expected = df
276+
tm.assert_frame_equal(result, expected)
261277

262278
# GH 3610 incorrect dtype conversion with as_index=False
263279
df = DataFrame({"c1": [1, 2, 6, 6, 8]})
264280
df["c2"] = df.c1 / 2.0
265-
result1 = df.groupby("c2").mean().reset_index().c2
266-
result2 = df.groupby("c2", as_index=False).mean().c2
267-
tm.assert_series_equal(result1, result2)
281+
result1 = df.groupby("c2").mean().reset_index()
282+
result2 = df.groupby("c2", as_index=False).mean()
283+
tm.assert_frame_equal(result1, result2)
268284

269285

270-
def test_groupby_as_index_apply():
286+
def test_groupby_as_index_apply(as_index):
271287
# GH #4648 and #3417
272288
df = DataFrame(
273289
{
@@ -276,27 +292,35 @@ def test_groupby_as_index_apply():
276292
"time": range(6),
277293
}
278294
)
295+
gb = df.groupby("user_id", as_index=as_index)
279296

280-
g_as = df.groupby("user_id", as_index=True)
281-
g_not_as = df.groupby("user_id", as_index=False)
282-
283-
res_as = g_as.head(2).index
284-
res_not_as = g_not_as.head(2).index
285-
exp = Index([0, 1, 2, 4])
286-
tm.assert_index_equal(res_as, exp)
287-
tm.assert_index_equal(res_not_as, exp)
288-
289-
res_as_apply = g_as.apply(lambda x: x.head(2)).index
290-
res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
297+
expected = DataFrame(
298+
{
299+
"item_id": ["b", "b", "a", "a"],
300+
"user_id": [1, 2, 1, 3],
301+
"time": [0, 1, 2, 4],
302+
},
303+
index=[0, 1, 2, 4],
304+
)
305+
result = gb.head(2)
306+
tm.assert_frame_equal(result, expected)
291307

292308
# apply doesn't maintain the original ordering
293309
# changed in GH5610 as the as_index=False returns a MI here
294-
exp_not_as_apply = Index([0, 2, 1, 4])
295-
tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
296-
exp_as_apply = MultiIndex.from_tuples(tp, names=["user_id", None])
297-
298-
tm.assert_index_equal(res_as_apply, exp_as_apply)
299-
tm.assert_index_equal(res_not_as_apply, exp_not_as_apply)
310+
if as_index:
311+
tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
312+
index = MultiIndex.from_tuples(tp, names=["user_id", None])
313+
else:
314+
index = Index([0, 2, 1, 4])
315+
expected = DataFrame(
316+
{
317+
"item_id": list("baba"),
318+
"time": [0, 2, 1, 4],
319+
},
320+
index=index,
321+
)
322+
result = gb.apply(lambda x: x.head(2))
323+
tm.assert_frame_equal(result, expected)
300324

301325

302326
def test_groupby_as_index_apply_str():
@@ -1455,3 +1479,37 @@ def f_4(grp):
14551479
e.loc["Pony"] = np.nan
14561480
e.name = None
14571481
tm.assert_series_equal(result, e)
1482+
1483+
1484+
def test_nonreducer_nonstransform():
1485+
# GH3380, GH60619
1486+
# Was originally testing mutating in a UDF; now kept as an example
1487+
# of using apply with a nonreducer and nontransformer.
1488+
df = DataFrame(
1489+
{
1490+
"cat1": ["a"] * 8 + ["b"] * 6,
1491+
"cat2": ["c"] * 2
1492+
+ ["d"] * 2
1493+
+ ["e"] * 2
1494+
+ ["f"] * 2
1495+
+ ["c"] * 2
1496+
+ ["d"] * 2
1497+
+ ["e"] * 2,
1498+
"val": np.random.default_rng(2).integers(100, size=14),
1499+
}
1500+
)
1501+
1502+
def f(x):
1503+
x = x.copy()
1504+
x["rank"] = x.val.rank(method="min")
1505+
return x.groupby("cat2")["rank"].min()
1506+
1507+
expected = DataFrame(
1508+
{
1509+
"cat1": list("aaaabbb"),
1510+
"cat2": list("cdefcde"),
1511+
"rank": [3.0, 2.0, 5.0, 1.0, 2.0, 4.0, 1.0],
1512+
}
1513+
).set_index(["cat1", "cat2"])["rank"]
1514+
result = df.groupby("cat1").apply(f)
1515+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)