Skip to content

Commit 3598fc4

Browse files
committed
BUG: setitem-with-expansion losing EADtype on obj.index
1 parent cc40732 commit 3598fc4

File tree

7 files changed

+38
-19
lines changed

7 files changed

+38
-19
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,7 @@ Other API changes
603603
an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining
604604
the dtype of the resulting Index (:issue:`60797`)
605605
- :class:`IncompatibleFrequency` now subclasses ``TypeError`` instead of ``ValueError``. As a result, joins with mismatched frequencies now cast to object like other non-comparable joins, and arithmetic with indexes with mismatched frequencies align (:issue:`55782`)
606+
- :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`)
606607
- :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`)
607608
- Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`)
608609
- Numpy functions like ``np.isinf`` that return a bool dtype when called on a :class:`Index` object now return a bool-dtype :class:`Index` instead of ``np.ndarray`` (:issue:`52676`)
@@ -974,8 +975,8 @@ Indexing
974975
- Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)
975976
- Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`)
976977
- Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
978+
- Bug in adding new rows to a :class:`DataFrame` or :class:`Series` with :meth:`.loc` failing to retain dtype on the object's index in some cases (:issue:`41626`)
977979
- Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`)
978-
-
979980

980981
Missing
981982
^^^^^^^
@@ -1093,7 +1094,7 @@ Reshaping
10931094
- Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`)
10941095
- Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)
10951096
- Bug in :meth:`DataFrame.merge` with :class:`CategoricalDtype` columns incorrectly raising ``RecursionError`` (:issue:`56376`)
1096-
-
1097+
- Bug in :meth:`DataFrame.merge` with a ``float32`` index incorrectly casting the index to ``float64`` (:issue:`41626`)
10971098

10981099
Sparse
10991100
^^^^^^

pandas/core/frame.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10918,6 +10918,13 @@ def _append_internal(
1091810918
),
1091910919
)
1092010920
row_df = other.to_frame().T
10921+
if isinstance(self.index.dtype, ExtensionDtype):
10922+
# GH#41626 retain e.g. CategoricalDtype if reached via
10923+
# df.loc[key] = item
10924+
row_df.index = self.index.array._cast_pointwise_result(
10925+
row_df.index._values
10926+
)
10927+
1092110928
# infer_objects is needed for
1092210929
# test_append_empty_frame_to_series_with_dateutil_tz
1092310930
other = row_df.infer_objects().rename_axis(index.names)

pandas/core/indexes/category.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
)
1919

2020
from pandas.core.dtypes.common import is_scalar
21-
from pandas.core.dtypes.concat import concat_compat
2221
from pandas.core.dtypes.dtypes import CategoricalDtype
2322
from pandas.core.dtypes.missing import (
2423
is_valid_na_for_dtype,
@@ -519,17 +518,3 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
519518
"""
520519
mapped = self._values.map(mapper, na_action=na_action)
521520
return Index(mapped, name=self.name)
522-
523-
def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
524-
# if calling index is category, don't check dtype of others
525-
try:
526-
cat = Categorical._concat_same_type(
527-
[self._is_dtype_compat(c) for c in to_concat]
528-
)
529-
except TypeError:
530-
# not all to_concat elements are among our categories (or NA)
531-
532-
res = concat_compat([x._values for x in to_concat])
533-
return Index(res, name=name)
534-
else:
535-
return type(self)._simple_new(cat, name=name)

pandas/core/reshape/merge.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1483,7 +1483,11 @@ def _create_join_index(
14831483
mask = indexer == -1
14841484
if np.any(mask):
14851485
fill_value = na_value_for_dtype(index.dtype, compat=False)
1486-
index = index.append(Index([fill_value]))
1486+
if not index._can_hold_na:
1487+
new_index = Index([fill_value])
1488+
else:
1489+
new_index = Index([fill_value], dtype=index.dtype)
1490+
index = index.append(new_index)
14871491
if indexer is None:
14881492
return index.copy()
14891493
return index.take(indexer)

pandas/tests/indexes/categorical/test_append.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,11 @@ def test_append_mismatched_categories(self, ci):
3636
ci.append(ci.values.reorder_categories(list("abc")))
3737

3838
def test_append_category_objects(self, ci):
39+
# GH#41626 pre-3.0 this used to cast the object-dtype index to
40+
# ci.dtype
3941
# with objects
4042
result = ci.append(Index(["c", "a"]))
41-
expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
43+
expected = Index(list("aabbcaca"))
4244
tm.assert_index_equal(result, expected, exact=True)
4345

4446
def test_append_non_categories(self, ci):

pandas/tests/indexing/test_loc.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from pandas._libs import index as libindex
1818
from pandas.errors import IndexingError
19+
import pandas.util._test_decorators as td
1920

2021
import pandas as pd
2122
from pandas import (
@@ -1963,6 +1964,22 @@ def test_loc_drops_level(self):
19631964

19641965

19651966
class TestLocSetitemWithExpansion:
1967+
@td.skip_if_no("pyarrow")
1968+
def test_loc_setitem_with_expansion_preserves_ea_dtype(self):
1969+
# GH#41626 retain index.dtype in setitem-with-expansion
1970+
idx = Index([Timestamp(0).date()], dtype="date32[pyarrow]")
1971+
df = DataFrame({"A": range(1)}, index=idx)
1972+
item = Timestamp("1970-01-02").date()
1973+
1974+
df.loc[item] = 1
1975+
1976+
exp_index = Index([idx[0], item], dtype=idx.dtype)
1977+
tm.assert_index_equal(df.index, exp_index)
1978+
1979+
ser = df["A"].iloc[:-1]
1980+
ser.loc[item] = 1
1981+
tm.assert_index_equal(ser.index, exp_index)
1982+
19661983
def test_loc_setitem_with_expansion_large_dataframe(self, monkeypatch):
19671984
# GH#10692
19681985
size_cutoff = 50

pandas/tests/reshape/merge/test_merge.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,6 +1378,9 @@ def test_merge_on_index_with_more_values(self, how, index, expected_index):
13781378
# GH 24212
13791379
# pd.merge gets [0, 1, 2, -1, -1, -1] as left_indexer, ensure that
13801380
# -1 is interpreted as a missing value instead of the last element
1381+
if index.dtype == "float32" and expected_index.dtype == "float64":
1382+
# GH#41626
1383+
expected_index = expected_index.astype("float32")
13811384
df1 = DataFrame({"a": [0, 1, 2], "key": [0, 1, 2]}, index=index)
13821385
df2 = DataFrame({"b": [0, 1, 2, 3, 4, 5]})
13831386
result = df1.merge(df2, left_on="key", right_index=True, how=how)

0 commit comments

Comments
 (0)