Skip to content

Commit e71c341

Browse files
ilan-golddcherianpre-commit-ci[bot]
authored
fix: pd.Series in pandas>=3 does not preserve object dtype metadata (#10564)
Co-authored-by: Deepak Cherian <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 63b0742 commit e71c341

File tree

2 files changed

+18
-2
lines changed

2 files changed

+18
-2
lines changed

xarray/core/variable.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import numpy as np
1414
import pandas as pd
1515
from numpy.typing import ArrayLike
16+
from packaging.version import Version
1617

1718
import xarray as xr # only for Dataset and DataArray
1819
from xarray.compat.array_api_compat import to_like_array
@@ -208,7 +209,10 @@ def _maybe_wrap_data(data):
208209

209210
def _possibly_convert_objects(values):
210211
"""Convert object arrays into datetime64 and timedelta64 according
211-
to the pandas convention.
212+
to the pandas convention. For backwards compat, as of 3.0.0 pandas,
213+
object dtype inputs are cast to strings by `pandas.Series`
214+
but we output them as object dtype with the input metadata preserved as well.
215+
212216
213217
* datetime.datetime
214218
* datetime.timedelta
@@ -223,6 +227,17 @@ def _possibly_convert_objects(values):
223227
result.flags.writeable = True
224228
except ValueError:
225229
result = result.copy()
230+
# For why we need this behavior: https://github.com/pandas-dev/pandas/issues/61938
231+
# Object datatype inputs that are strings
232+
# will be converted to strings by `pandas.Series`, and as of 3.0.0, lose
233+
# `dtype.metadata`. If the roundtrip back to numpy in this function yields an
234+
# object array again, the dtype.metadata will be preserved.
235+
if (
236+
result.dtype.kind == "O"
237+
and values.dtype.kind == "O"
238+
and Version(pd.__version__) >= Version("3.0.0dev0")
239+
):
240+
result.dtype = values.dtype
226241
return result
227242

228243

xarray/tests/test_dataset.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import numpy as np
1414
import pandas as pd
1515
import pytest
16+
from packaging.version import Version
1617
from pandas.core.indexes.datetimes import DatetimeIndex
1718

1819
# remove once numpy 2.0 is the oldest supported version
@@ -299,7 +300,7 @@ def test_repr(self) -> None:
299300
var1 (dim1, dim2) float64 576B -0.9891 -0.3678 1.288 ... -0.2116 0.364
300301
var2 (dim1, dim2) float64 576B 0.953 1.52 1.704 ... 0.1347 -0.6423
301302
var3 (dim3, dim1) float64 640B 0.4107 0.9941 0.1665 ... 0.716 1.555
302-
var4 (dim1) category 32B b c b a c a c a{var5}
303+
var4 (dim1) category 3{6 if Version(pd.__version__) >= Version("3.0.0dev0") else 2}B b c b a c a c a{var5}
303304
Attributes:
304305
foo: bar"""
305306
)

0 commit comments

Comments
 (0)