Skip to content

Commit 7bbb2db

Browse files
authored
Merge branch 'pandas-dev:main' into Pandas_Cheat_Sheet_FA
2 parents 11a72a9 + a8a84c8 commit 7bbb2db

File tree

15 files changed

+113
-84
lines changed

15 files changed

+113
-84
lines changed

doc/source/user_guide/visualization.rst

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,11 +1210,6 @@ You may set the ``xlabel`` and ``ylabel`` arguments to give the plot custom labe
12101210
for x and y axis. By default, pandas will pick up index name as xlabel, while leaving
12111211
it empty for ylabel.
12121212

1213-
.. ipython:: python
1214-
:suppress:
1215-
1216-
plt.figure();
1217-
12181213
.. ipython:: python
12191214
12201215
df.plot();

pandas/_libs/tslibs/nattype.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,7 @@ class NaTType(_NaT):
704704
difference between the current timezone and UTC.
705705
706706
Returns
707-
--------
707+
-------
708708
timedelta
709709
The difference between UTC and the local time as a `timedelta` object.
710710

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2217,7 +2217,7 @@ class Timestamp(_Timestamp):
22172217
difference between the current timezone and UTC.
22182218

22192219
Returns
2220-
--------
2220+
-------
22212221
timedelta
22222222
The difference between UTC and the local time as a `timedelta` object.
22232223

pandas/core/computation/eval.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ def eval(
190190
191191
.. warning::
192192
193-
``eval`` can run arbitrary code which can make you vulnerable to code
194-
injection and untrusted data.
193+
This function can run arbitrary code which can make you vulnerable to code
194+
injection if you pass user input to this function.
195195
196196
Parameters
197197
----------

pandas/core/frame.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4476,8 +4476,10 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
44764476
"""
44774477
Query the columns of a DataFrame with a boolean expression.
44784478
4479-
This method can run arbitrary code which can make you vulnerable to code
4480-
injection if you pass user input to this function.
4479+
.. warning::
4480+
4481+
This method can run arbitrary code which can make you vulnerable to code
4482+
injection if you pass user input to this function.
44814483
44824484
Parameters
44834485
----------
@@ -4634,6 +4636,11 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
46344636
"""
46354637
Evaluate a string describing operations on DataFrame columns.
46364638
4639+
.. warning::
4640+
4641+
This method can run arbitrary code which can make you vulnerable to code
4642+
injection if you pass user input to this function.
4643+
46374644
Operates on columns only, not specific rows or elements. This allows
46384645
`eval` to run arbitrary code, which can make you vulnerable to code
46394646
injection if you pass user input to this function.

pandas/core/strings/accessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3700,7 +3700,7 @@ def casefold(self):
37003700
Series.str.isupper : Check whether all characters are uppercase.
37013701
37023702
Examples
3703-
------------
3703+
--------
37043704
The ``s5.str.istitle`` method checks for whether all words are in title
37053705
case (whether only the first letter of each word is capitalized). Words are
37063706
assumed to be as any sequence of non-numeric characters separated by

pandas/io/formats/style.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1644,7 +1644,7 @@ def _update_ctx_header(self, attrs: DataFrame, axis: AxisInt) -> None:
16441644
for j in attrs.columns:
16451645
ser = attrs[j]
16461646
for i, c in ser.items():
1647-
if not c:
1647+
if not c or pd.isna(c):
16481648
continue
16491649
css_list = maybe_convert_css_to_tuples(c)
16501650
if axis == 0:

pandas/io/pytables.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5297,6 +5297,8 @@ def _dtype_to_kind(dtype_str: str) -> str:
52975297
kind = "integer"
52985298
elif dtype_str == "object":
52995299
kind = "object"
5300+
elif dtype_str == "str":
5301+
kind = "str"
53005302
else:
53015303
raise ValueError(f"cannot interpret dtype of [{dtype_str}]")
53025304

pandas/tests/groupby/test_apply.py

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -255,19 +255,19 @@ def test_apply_with_mixed_dtype():
255255
"foo2": ["one", "two", "two", "three", "one", "two"],
256256
}
257257
)
258-
result = df.apply(lambda x: x, axis=1).dtypes
259-
expected = df.dtypes
260-
tm.assert_series_equal(result, expected)
258+
result = df.apply(lambda x: x, axis=1)
259+
expected = df
260+
tm.assert_frame_equal(result, expected)
261261

262262
# GH 3610 incorrect dtype conversion with as_index=False
263263
df = DataFrame({"c1": [1, 2, 6, 6, 8]})
264264
df["c2"] = df.c1 / 2.0
265-
result1 = df.groupby("c2").mean().reset_index().c2
266-
result2 = df.groupby("c2", as_index=False).mean().c2
267-
tm.assert_series_equal(result1, result2)
265+
result1 = df.groupby("c2").mean().reset_index()
266+
result2 = df.groupby("c2", as_index=False).mean()
267+
tm.assert_frame_equal(result1, result2)
268268

269269

270-
def test_groupby_as_index_apply():
270+
def test_groupby_as_index_apply(as_index):
271271
# GH #4648 and #3417
272272
df = DataFrame(
273273
{
@@ -276,27 +276,35 @@ def test_groupby_as_index_apply():
276276
"time": range(6),
277277
}
278278
)
279+
gb = df.groupby("user_id", as_index=as_index)
279280

280-
g_as = df.groupby("user_id", as_index=True)
281-
g_not_as = df.groupby("user_id", as_index=False)
282-
283-
res_as = g_as.head(2).index
284-
res_not_as = g_not_as.head(2).index
285-
exp = Index([0, 1, 2, 4])
286-
tm.assert_index_equal(res_as, exp)
287-
tm.assert_index_equal(res_not_as, exp)
288-
289-
res_as_apply = g_as.apply(lambda x: x.head(2)).index
290-
res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
281+
expected = DataFrame(
282+
{
283+
"item_id": ["b", "b", "a", "a"],
284+
"user_id": [1, 2, 1, 3],
285+
"time": [0, 1, 2, 4],
286+
},
287+
index=[0, 1, 2, 4],
288+
)
289+
result = gb.head(2)
290+
tm.assert_frame_equal(result, expected)
291291

292292
# apply doesn't maintain the original ordering
293293
# changed in GH5610 as the as_index=False returns a MI here
294-
exp_not_as_apply = Index([0, 2, 1, 4])
295-
tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
296-
exp_as_apply = MultiIndex.from_tuples(tp, names=["user_id", None])
297-
298-
tm.assert_index_equal(res_as_apply, exp_as_apply)
299-
tm.assert_index_equal(res_not_as_apply, exp_not_as_apply)
294+
if as_index:
295+
tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
296+
index = MultiIndex.from_tuples(tp, names=["user_id", None])
297+
else:
298+
index = Index([0, 2, 1, 4])
299+
expected = DataFrame(
300+
{
301+
"item_id": list("baba"),
302+
"time": [0, 2, 1, 4],
303+
},
304+
index=index,
305+
)
306+
result = gb.apply(lambda x: x.head(2))
307+
tm.assert_frame_equal(result, expected)
300308

301309

302310
def test_groupby_as_index_apply_str():

pandas/tests/groupby/test_apply_mutate.py

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,20 @@ def test_mutate_groups():
3838
}
3939
)
4040

41-
def f_copy(x):
41+
def f(x):
4242
x = x.copy()
4343
x["rank"] = x.val.rank(method="min")
4444
return x.groupby("cat2")["rank"].min()
4545

46-
def f_no_copy(x):
47-
x["rank"] = x.val.rank(method="min")
48-
return x.groupby("cat2")["rank"].min()
49-
50-
grpby_copy = df.groupby("cat1").apply(f_copy)
51-
grpby_no_copy = df.groupby("cat1").apply(f_no_copy)
52-
tm.assert_series_equal(grpby_copy, grpby_no_copy)
46+
expected = pd.DataFrame(
47+
{
48+
"cat1": list("aaaabbb"),
49+
"cat2": list("cdefcde"),
50+
"rank": [3.0, 2.0, 5.0, 1.0, 2.0, 4.0, 1.0],
51+
}
52+
).set_index(["cat1", "cat2"])["rank"]
53+
result = df.groupby("cat1").apply(f)
54+
tm.assert_series_equal(result, expected)
5355

5456

5557
def test_no_mutate_but_looks_like():
@@ -61,22 +63,3 @@ def test_no_mutate_but_looks_like():
6163
result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].value)
6264
result2 = df.groupby("key", group_keys=True).apply(lambda x: x.value)
6365
tm.assert_series_equal(result1, result2)
64-
65-
66-
def test_apply_function_with_indexing():
67-
# GH: 33058
68-
df = pd.DataFrame(
69-
{"col1": ["A", "A", "A", "B", "B", "B"], "col2": [1, 2, 3, 4, 5, 6]}
70-
)
71-
72-
def fn(x):
73-
x.loc[x.index[-1], "col2"] = 0
74-
return x.col2
75-
76-
result = df.groupby(["col1"], as_index=False).apply(fn)
77-
expected = pd.Series(
78-
[1, 2, 0, 4, 5, 0],
79-
index=range(6),
80-
name="col2",
81-
)
82-
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)