From 341108576cac99192750c76dcf4ac3fe45187df7 Mon Sep 17 00:00:00 2001 From: heoh Date: Mon, 25 Aug 2025 15:43:07 +0000 Subject: [PATCH] Remove the DataFrame.attrs saving to parquet metadata on Pandas' side --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/io/parquet.py | 12 ------------ 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 03ad8ed162c95..c40e4a9c15a65 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -205,6 +205,7 @@ Other enhancements - :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`) - :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`) - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`) +- :meth:`DataFrame.to_parquet` and :func:`read_parquet` no longer manage ``attrs`` directly, but instead follow Apache Arrow's metadata handling (:issue:`62098`) - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`) - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 037d255d848ba..0b528a2d798a9 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -3,7 +3,6 @@ from __future__ import annotations import io -import json import os from typing import ( TYPE_CHECKING, @@ -190,12 +189,6 @@ def write( table = self.api.Table.from_pandas(df, **from_pandas_kwargs) - if df.attrs: - df_metadata = {"PANDAS_ATTRS": json.dumps(df.attrs)} - existing_metadata = table.schema.metadata - merged_metadata = {**existing_metadata, **df_metadata} - table = table.replace_schema_metadata(merged_metadata) - path_or_handle, handles, filesystem = _get_path_or_handle( path, filesystem, @@ -275,11 +268,6 @@ def read( dtype_backend=dtype_backend, to_pandas_kwargs=to_pandas_kwargs, ) - - if pa_table.schema.metadata: - if b"PANDAS_ATTRS" in pa_table.schema.metadata: - df_metadata = pa_table.schema.metadata[b"PANDAS_ATTRS"] - result.attrs = json.loads(df_metadata) return result finally: if handles is not None: