Skip to content

Commit d4ea527

Browse files
authored
Merge branch 'main' into bug-update-60228
2 parents 8db4edc + e62fcb1 commit d4ea527

File tree

13 files changed

+146
-15
lines changed

13 files changed

+146
-15
lines changed

.github/workflows/comment-commands.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ permissions:
1111
jobs:
1212
issue_assign:
1313
runs-on: ubuntu-22.04
14-
if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
14+
if: (!github.event.issue.pull_request) && trim(github.event.comment.body) == 'take'
1515
concurrency:
1616
group: ${{ github.actor }}-issue-assign
1717
steps:

ci/code_checks.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
109109
-i "pandas.core.resample.Resampler.std SA01" \
110110
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
111111
-i "pandas.core.resample.Resampler.var SA01" \
112-
-i "pandas.errors.ChainedAssignmentError SA01" \
113112
-i "pandas.errors.DuplicateLabelError SA01" \
114113
-i "pandas.errors.IntCastingNaNError SA01" \
115114
-i "pandas.errors.InvalidIndexError SA01" \

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ Other enhancements
5454
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5555
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
5656
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
57+
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
5758
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
5859
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
5960
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
@@ -763,7 +764,7 @@ ExtensionArray
763764

764765
Styler
765766
^^^^^^
766-
-
767+
- Bug in :meth:`Styler.to_latex` where styling column headers when combined with a hidden index or hidden index-levels is fixed.
767768

768769
Other
769770
^^^^^

pandas/core/frame.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4742,7 +4742,8 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
47424742
3 4 4 7 8 0
47434743
4 5 2 6 7 3
47444744
4745-
For columns with spaces in their name, you can use backtick quoting.
4745+
For columns with spaces or other disallowed characters in their name, you can
4746+
use backtick quoting.
47464747
47474748
>>> df.eval("B * `C&C`")
47484749
0 100

pandas/errors/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,11 @@ class ChainedAssignmentError(Warning):
487487
For more information on Copy-on-Write,
488488
see :ref:`the user guide<copy_on_write>`.
489489
490+
See Also
491+
--------
492+
options.mode.copy_on_write : Global setting for enabling or disabling
493+
Copy-on-Write behavior.
494+
490495
Examples
491496
--------
492497
>>> pd.options.mode.copy_on_write = True

pandas/io/_util.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,12 @@ def arrow_table_to_pandas(
6060
table: pyarrow.Table,
6161
dtype_backend: DtypeBackend | Literal["numpy"] | lib.NoDefault = lib.no_default,
6262
null_to_int64: bool = False,
63+
to_pandas_kwargs: dict | None = None,
6364
) -> pd.DataFrame:
6465
pa = import_optional_dependency("pyarrow")
6566

67+
to_pandas_kwargs = {} if to_pandas_kwargs is None else to_pandas_kwargs
68+
6669
types_mapper: type[pd.ArrowDtype] | None | Callable
6770
if dtype_backend == "numpy_nullable":
6871
mapping = _arrow_dtype_mapping()
@@ -80,5 +83,5 @@ def arrow_table_to_pandas(
8083
else:
8184
raise NotImplementedError
8285

83-
df = table.to_pandas(types_mapper=types_mapper)
86+
df = table.to_pandas(types_mapper=types_mapper, **to_pandas_kwargs)
8487
return df

pandas/io/formats/style_render.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -868,7 +868,8 @@ def _translate_latex(self, d: dict, clines: str | None) -> None:
868868
or multirow sparsification (so that \multirow and \multicol work correctly).
869869
"""
870870
index_levels = self.index.nlevels
871-
visible_index_level_n = index_levels - sum(self.hide_index_)
871+
# GH 52218
872+
visible_index_level_n = max(1, index_levels - sum(self.hide_index_))
872873
d["head"] = [
873874
[
874875
{**col, "cellstyle": self.ctx_columns[r, c - visible_index_level_n]}

pandas/io/parquet.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ def read(
242242
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
243243
storage_options: StorageOptions | None = None,
244244
filesystem=None,
245+
to_pandas_kwargs: dict[str, Any] | None = None,
245246
**kwargs,
246247
) -> DataFrame:
247248
kwargs["use_pandas_metadata"] = True
@@ -266,7 +267,11 @@ def read(
266267
"make_block is deprecated",
267268
DeprecationWarning,
268269
)
269-
result = arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend)
270+
result = arrow_table_to_pandas(
271+
pa_table,
272+
dtype_backend=dtype_backend,
273+
to_pandas_kwargs=to_pandas_kwargs,
274+
)
270275

271276
if pa_table.schema.metadata:
272277
if b"PANDAS_ATTRS" in pa_table.schema.metadata:
@@ -347,6 +352,7 @@ def read(
347352
filters=None,
348353
storage_options: StorageOptions | None = None,
349354
filesystem=None,
355+
to_pandas_kwargs: dict | None = None,
350356
**kwargs,
351357
) -> DataFrame:
352358
parquet_kwargs: dict[str, Any] = {}
@@ -362,6 +368,10 @@ def read(
362368
raise NotImplementedError(
363369
"filesystem is not implemented for the fastparquet engine."
364370
)
371+
if to_pandas_kwargs is not None:
372+
raise NotImplementedError(
373+
"to_pandas_kwargs is not implemented for the fastparquet engine."
374+
)
365375
path = stringify_path(path)
366376
handles = None
367377
if is_fsspec_url(path):
@@ -452,7 +462,7 @@ def to_parquet(
452462
.. versionadded:: 2.1.0
453463
454464
kwargs
455-
Additional keyword arguments passed to the engine
465+
Additional keyword arguments passed to the engine.
456466
457467
Returns
458468
-------
@@ -491,6 +501,7 @@ def read_parquet(
491501
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
492502
filesystem: Any = None,
493503
filters: list[tuple] | list[list[tuple]] | None = None,
504+
to_pandas_kwargs: dict | None = None,
494505
**kwargs,
495506
) -> DataFrame:
496507
"""
@@ -564,6 +575,12 @@ def read_parquet(
564575
565576
.. versionadded:: 2.1.0
566577
578+
to_pandas_kwargs : dict | None, default None
579+
Keyword arguments to pass through to :func:`pyarrow.Table.to_pandas`
580+
when ``engine="pyarrow"``.
581+
582+
.. versionadded:: 3.0.0
583+
567584
**kwargs
568585
Any additional kwargs are passed to the engine.
569586
@@ -636,5 +653,6 @@ def read_parquet(
636653
storage_options=storage_options,
637654
dtype_backend=dtype_backend,
638655
filesystem=filesystem,
656+
to_pandas_kwargs=to_pandas_kwargs,
639657
**kwargs,
640658
)

pandas/tests/io/formats/test_to_latex.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1405,3 +1405,88 @@ def test_to_latex_multiindex_multirow(self):
14051405
"""
14061406
)
14071407
assert result == expected
1408+
1409+
def test_to_latex_multiindex_format_single_index_hidden(self):
1410+
# GH 52218
1411+
df = DataFrame(
1412+
{
1413+
"A": [1, 2],
1414+
"B": [4, 5],
1415+
}
1416+
)
1417+
result = (
1418+
df.style.hide(axis="index")
1419+
.map_index(lambda v: "textbf:--rwrap;", axis="columns")
1420+
.to_latex()
1421+
)
1422+
expected = _dedent(r"""
1423+
\begin{tabular}{rr}
1424+
\textbf{A} & \textbf{B} \\
1425+
1 & 4 \\
1426+
2 & 5 \\
1427+
\end{tabular}
1428+
""")
1429+
assert result == expected
1430+
1431+
def test_to_latex_multiindex_format_triple_index_two_hidden(self):
1432+
# GH 52218
1433+
arrays = [
1434+
["A", "A", "B", "B"],
1435+
["one", "two", "one", "two"],
1436+
["x", "x", "y", "y"],
1437+
]
1438+
index = pd.MultiIndex.from_arrays(
1439+
arrays, names=["Level 0", "Level 1", "Level 2"]
1440+
)
1441+
df = DataFrame(
1442+
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
1443+
index=index,
1444+
columns=["C1", "C2", "C3"],
1445+
)
1446+
result = (
1447+
df.style.hide(axis="index", level=[0, 1])
1448+
.map_index(lambda v: "textbf:--rwrap;", axis="columns")
1449+
.to_latex()
1450+
)
1451+
expected = _dedent(r"""
1452+
\begin{tabular}{lrrr}
1453+
& \textbf{C1} & \textbf{C2} & \textbf{C3} \\
1454+
Level 2 & & & \\
1455+
x & 0 & 0 & 0 \\
1456+
x & 0 & 0 & 0 \\
1457+
y & 0 & 0 & 0 \\
1458+
y & 0 & 0 & 0 \\
1459+
\end{tabular}
1460+
""")
1461+
assert result == expected
1462+
1463+
def test_to_latex_multiindex_format_triple_index_all_hidden(self):
1464+
# GH 52218
1465+
arrays = [
1466+
["A", "A", "B", "B"],
1467+
["one", "two", "one", "two"],
1468+
["x", "x", "y", "y"],
1469+
]
1470+
index = pd.MultiIndex.from_arrays(
1471+
arrays, names=["Level 0", "Level 1", "Level 2"]
1472+
)
1473+
df = DataFrame(
1474+
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
1475+
index=index,
1476+
columns=["C1", "C2", "C3"],
1477+
)
1478+
result = (
1479+
df.style.hide(axis="index", level=[0, 1, 2])
1480+
.map_index(lambda v: "textbf:--rwrap;", axis="columns")
1481+
.to_latex()
1482+
)
1483+
expected = _dedent(r"""
1484+
\begin{tabular}{rrr}
1485+
\textbf{C1} & \textbf{C2} & \textbf{C3} \\
1486+
0 & 0 & 0 \\
1487+
0 & 0 & 0 \\
1488+
0 & 0 & 0 \\
1489+
0 & 0 & 0 \\
1490+
\end{tabular}
1491+
""")
1492+
assert result == expected

pandas/tests/io/test_parquet.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,6 +1172,20 @@ def test_non_nanosecond_timestamps(self, temp_file):
11721172
)
11731173
tm.assert_frame_equal(result, expected)
11741174

1175+
def test_maps_as_pydicts(self, pa):
1176+
pyarrow = pytest.importorskip("pyarrow", "13.0.0")
1177+
1178+
schema = pyarrow.schema(
1179+
[("foo", pyarrow.map_(pyarrow.string(), pyarrow.int64()))]
1180+
)
1181+
df = pd.DataFrame([{"foo": {"A": 1}}, {"foo": {"B": 2}}])
1182+
check_round_trip(
1183+
df,
1184+
pa,
1185+
write_kwargs={"schema": schema},
1186+
read_kwargs={"to_pandas_kwargs": {"maps_as_pydicts": "strict"}},
1187+
)
1188+
11751189

11761190
class TestParquetFastParquet(Base):
11771191
def test_basic(self, fp, df_full, request):

0 commit comments

Comments
 (0)