Skip to content

Commit d4d095f

Browse files
Now complies with pre-commit requirements and added an entry in v3.0.0.rst
1 parent e8eabea commit d4d095f

File tree

3 files changed

+17
-13
lines changed

3 files changed

+17
-13
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,7 @@ MultiIndex
712712
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
713713
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
714714
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
715+
- Bug in :class:`MultiIndex.concat` where extension dtypes such as ``timestamp[pyarrow]`` were silently coerced to ``object`` instead of preserving their original dtype (:issue:`58421`)
715716
-
716717

717718
I/O

pandas/core/reshape/concat.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
from pandas.core.dtypes.common import (
2424
is_bool,
25+
is_extension_array_dtype,
2526
is_scalar,
2627
)
2728
from pandas.core.dtypes.concat import concat_compat
@@ -36,6 +37,7 @@
3637
factorize_from_iterables,
3738
)
3839
import pandas.core.common as com
40+
from pandas.core.construction import array as pd_array
3941
from pandas.core.indexes.api import (
4042
Index,
4143
MultiIndex,
@@ -47,10 +49,6 @@
4749
)
4850
from pandas.core.internals import concatenate_managers
4951

50-
from pandas.core.dtypes.common import is_extension_array_dtype
51-
52-
from pandas.core.construction import array
53-
5452
if TYPE_CHECKING:
5553
from collections.abc import (
5654
Callable,
@@ -832,9 +830,9 @@ def _concat_indexes(indexes) -> Index:
832830
sample = indexes[0]
833831
try:
834832
# this helps preserve extension types like timestamp[pyarrow]
835-
arr = array(values, dtype=sample.dtype)
833+
arr = pd_array(values, dtype=sample.dtype)
836834
except Exception:
837-
arr = array(values) # fallback
835+
arr = pd_array(values) # fallback
838836

839837
return Index(arr)
840838

@@ -906,7 +904,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
906904
unzipped = list(zip(*concat_index))
907905
for i, level_values in enumerate(unzipped):
908906
# reconstruct each level using original dtype
909-
arr = array(level_values, dtype=original_dtypes[i])
907+
arr = pd_array(level_values, dtype=original_dtypes[i])
910908
level_codes, _ = factorize_from_iterable(arr)
911909
levels.append(ensure_index(arr))
912910
codes_list.append(level_codes)

pandas/tests/frame/methods/test_concat_arrow_index.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1-
import pandas as pd
21
import pytest
32

3+
import pandas as pd
44

55
schema = {
66
"id": "int64[pyarrow]",
77
"time": "timestamp[s][pyarrow]",
88
"value": "float[pyarrow]",
99
}
1010

11+
1112
@pytest.mark.parametrize("dtype", ["timestamp[s][pyarrow]"])
1213
def test_concat_preserves_pyarrow_timestamp(dtype):
1314
dfA = (
@@ -38,9 +39,13 @@ def test_concat_preserves_pyarrow_timestamp(dtype):
3839
)
3940

4041
df = pd.concat([dfA, dfB], keys=[0, 1], names=["run"])
41-
42-
# chech whether df.index is multiIndex
43-
assert isinstance(df.index, pd.MultiIndex), f"Expected MultiIndex, but received {type(df.index)}"
44-
42+
43+
# check whether df.index is multiIndex
44+
assert isinstance(df.index, pd.MultiIndex), (
45+
f"Expected MultiIndex, but received {type(df.index)}"
46+
)
47+
4548
# Verifying special dtype timestamp[s][pyarrow] stays intact after concat
46-
assert df.index.levels[2].dtype == dtype, f"Expected {dtype}, but received {df.index.levels[2].dtype}"
49+
assert df.index.levels[2].dtype == dtype, (
50+
f"Expected {dtype}, but received {df.index.levels[2].dtype}"
51+
)

0 commit comments

Comments
 (0)