Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,7 @@ Indexing
- Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
- Bug in adding new rows with :meth:`DataFrame.loc.__setitem__` or :class:`Series.loc.__setitem__` which failed to retain dtype on the object's index in some cases (:issue:`41626`)
- Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`)
- Bugs in setitem-with-expansion when adding new rows failing to keep the original dtype in some cases (:issue:`32346`, :issue:`15231`, :issue:`47503`, :issue:`6485`, :issue:`25383`, :issue:`52235`, :issue:`17026`, :issue:`56010`)

Missing
^^^^^^^
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,12 @@ def _cast_pointwise_result(self, values) -> ArrayLike:
result = result.astype(dtype) # type: ignore[assignment]
return result

elif pa.types.is_timestamp(arr.type) and pa.types.is_timestamp(
self._pa_array.type
):
if arr.type.tz == self._pa_array.type.tz:
arr = arr.cast(self._pa_array.type)

elif pa.types.is_date(arr.type) and pa.types.is_date(self._pa_array.type):
arr = arr.cast(self._pa_array.type)
elif pa.types.is_time(arr.type) and pa.types.is_time(self._pa_array.type):
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
validate_insert_loc,
)

from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
from pandas.core.dtypes.common import (
is_list_like,
is_scalar,
Expand Down Expand Up @@ -383,7 +384,8 @@ def _cast_pointwise_result(self, values) -> ArrayLike:
Cast the result of a pointwise operation (e.g. Series.map) to an
array, preserve dtype_backend if possible.
"""
values = np.asarray(values, dtype=object)
if not (isinstance(values, np.ndarray) and values.dtype == object):
values = construct_1d_object_array_from_listlike(values)
return lib.maybe_convert_objects(values, convert_non_numeric=True)

# ------------------------------------------------------------------------
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,12 @@ def _cast_pointwise_result(self, values) -> ArrayLike:
result._data, dtype=self.dtype.numpy_dtype
)
result = type(result)(new_data, result._mask)
elif lkind == "f" and rkind == "i":
result = cast(BaseMaskedArray, result)
new_data = maybe_downcast_to_dtype(
result._data, dtype=self.dtype.numpy_dtype
)
result = type(self)(new_data, result._mask)
return result

@classmethod
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1598,6 +1598,16 @@ def itemsize(self) -> int:
"""
return self._dtype.itemsize

def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
from pandas.core.dtypes.cast import find_common_type

dtypes = [x.numpy_dtype if isinstance(x, NumpyEADtype) else x for x in dtypes]
if not all(isinstance(x, np.dtype) for x in dtypes):
return None

common_dtype = find_common_type(dtypes)
return NumpyEADtype(common_dtype)


class BaseMaskedDtype(ExtensionDtype):
"""
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@
from pandas.core.indexing import (
check_bool_indexer,
check_dict_or_set_indexers,
infer_and_maybe_downcast,
)
from pandas.core.internals import BlockManager
from pandas.core.internals.construction import (
Expand Down Expand Up @@ -10942,6 +10943,15 @@ def _append_internal(
# test_append_empty_frame_to_series_with_dateutil_tz
row_df = row_df.infer_objects().rename_axis(index.names)

if len(row_df.columns) == len(self.columns):
# Try to retain our original dtype when doing the concat, GH#...
for i in range(len(self.columns)):
arr = self.iloc[:, i].array

casted = infer_and_maybe_downcast(arr, row_df.iloc[:, i]._values)

row_df.isetitem(i, casted)

from pandas.core.reshape.concat import concat

result = concat(
Expand Down
90 changes: 69 additions & 21 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
import numpy as np

from pandas._libs.indexing import NDFrameIndexerBase
from pandas._libs.lib import item_from_zerodim
from pandas._libs.lib import (
is_np_dtype,
item_from_zerodim,
)
from pandas.compat import PYPY
from pandas.compat._constants import (
REF_COUNT,
Expand All @@ -35,7 +38,7 @@

from pandas.core.dtypes.cast import (
can_hold_element,
maybe_promote,
maybe_downcast_to_dtype,
)
from pandas.core.dtypes.common import (
is_array_like,
Expand All @@ -50,7 +53,10 @@
is_sequence,
)
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.dtypes import (
ExtensionDtype,
NumpyEADtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCSeries,
Expand All @@ -59,7 +65,6 @@
construct_1d_array_from_inferred_fill_value,
infer_fill_value,
is_valid_na_for_dtype,
isna,
na_value_for_dtype,
)

Expand Down Expand Up @@ -87,6 +92,7 @@
)

from pandas._typing import (
ArrayLike,
Axis,
AxisInt,
T,
Expand All @@ -97,6 +103,7 @@
DataFrame,
Series,
)
from pandas.core.arrays import ExtensionArray

# "null slice"
_NS = slice(None, None)
Expand Down Expand Up @@ -934,14 +941,55 @@ def __setitem__(self, key, value) -> None:
else:
maybe_callable = com.apply_if_callable(key, self.obj)
key = self._raise_callable_usage(key, maybe_callable)
indexer = self._get_setitem_indexer(key)
orig_obj = self.obj[:].iloc[:0].copy() # copy to avoid extra refs
indexer = self._get_setitem_indexer(key) # may alter self.obj
self._has_valid_setitem_indexer(key)

iloc: _iLocIndexer = (
cast("_iLocIndexer", self) if self.name == "iloc" else self.obj.iloc
)
iloc._setitem_with_indexer(indexer, value, self.name)

self._post_expansion_casting(orig_obj)

def _post_expansion_casting(self, orig_obj) -> None:
if orig_obj.shape[0] != self.obj.shape[0]:
# setitem-with-expansion added new rows. Try to retain
# original dtypes
if orig_obj.ndim == 1:
if orig_obj.dtype != self.obj.dtype:
new_arr = infer_and_maybe_downcast(orig_obj.array, self.obj._values)
new_ser = self.obj._constructor(
new_arr, index=self.obj.index, name=self.obj.name
)
self.obj._mgr = new_ser._mgr
elif orig_obj.shape[1] == self.obj.shape[1]:
# We added rows but not columns
for i in range(orig_obj.shape[1]):
new_dtype = self.obj.dtypes.iloc[i]
orig_dtype = orig_obj.dtypes.iloc[i]
if new_dtype != orig_dtype:
new_arr = infer_and_maybe_downcast(
orig_obj.iloc[:, i].array, self.obj.iloc[:, i]._values
)
self.obj.isetitem(i, new_arr)

elif orig_obj.columns.is_unique and self.obj.columns.is_unique:
for col in orig_obj.columns:
new_dtype = self.obj[col].dtype
orig_dtype = orig_obj[col].dtype
if new_dtype != orig_dtype:
new_arr = infer_and_maybe_downcast(
orig_obj[col].array, self.obj[col]._values
)
self.obj[col] = new_arr
else:
# In these cases there isn't a one-to-one correspondence between
# old columns and new columns, which makes casting hairy.
# Punt on these for now, as there are no tests that get here
# as of 2025-09-29
pass

def _validate_key(self, key, axis: AxisInt) -> None:
"""
Ensure that key is valid for current indexer.
Expand Down Expand Up @@ -2189,9 +2237,10 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
# Columns F and G will initially be set to np.void.
# Here, we replace those temporary `np.void` columns with
# columns of the appropriate dtype, based on `value`.
self.obj.iloc[:, loc] = construct_1d_array_from_inferred_fill_value(
new_arr = construct_1d_array_from_inferred_fill_value(
value, len(self.obj)
)
self.obj.isetitem(loc, new_arr)
self.obj._mgr.column_setitem(loc, plane_indexer, value)

def _setitem_single_block(self, indexer, value, name: str) -> None:
Expand Down Expand Up @@ -2260,27 +2309,14 @@ def _setitem_with_indexer_missing(self, indexer, value):

# this preserves dtype of the value and of the object
if not is_scalar(value):
new_dtype = None
pass

elif is_valid_na_for_dtype(value, self.obj.dtype):
if not is_object_dtype(self.obj.dtype):
# Every NA value is suitable for object, no conversion needed
value = na_value_for_dtype(self.obj.dtype, compat=False)

new_dtype = maybe_promote(self.obj.dtype, value)[0]

elif isna(value):
new_dtype = None
elif not self.obj.empty and not is_object_dtype(self.obj.dtype):
# We should not cast, if we have object dtype because we can
# set timedeltas into object series
curr_dtype = self.obj.dtype
curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)
new_dtype = maybe_promote(curr_dtype, value)[0]
else:
new_dtype = None

new_values = Series([value], dtype=new_dtype)._values
new_values = infer_and_maybe_downcast(self.obj.array, [value])

if len(self.obj._values):
# GH#22717 handle casting compatibility that np.concatenate
Expand Down Expand Up @@ -2808,3 +2844,15 @@ def check_dict_or_set_indexers(key) -> None:
raise TypeError(
"Passing a dict as an indexer is not supported. Use a list instead."
)


def infer_and_maybe_downcast(orig: ExtensionArray, new_arr) -> ArrayLike:
new_arr = orig._cast_pointwise_result(new_arr)

dtype = orig.dtype
if isinstance(dtype, NumpyEADtype):
dtype = dtype.numpy_dtype

if is_np_dtype(new_arr.dtype, "f") and is_np_dtype(dtype, "iu"):
new_arr = maybe_downcast_to_dtype(new_arr, dtype)
return new_arr
13 changes: 13 additions & 0 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,3 +472,16 @@ def test_setitem_2d_values(self, data):
df.loc[[0, 1], :] = df.loc[[1, 0], :].values
assert (df.loc[0, :] == original[1]).all()
assert (df.loc[1, :] == original[0]).all()

def test_loc_setitem_with_expansion_retains_ea_dtype(self, data):
# GH#32346
data = data.dropna().unique()
ser = pd.Series(data[:-1])
ser.loc[len(ser)] = data[-1]
expected = pd.Series(data)
tm.assert_series_equal(ser, expected)

df = pd.DataFrame({"A": data[:-1]})
df.loc[len(df)] = [data[-1]]
expected = expected.to_frame("A")
tm.assert_frame_equal(df, expected)
1 change: 0 additions & 1 deletion pandas/tests/frame/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def test_26395(indexer_al):
indexer_al(df)["C", "D"] = "hello"


@pytest.mark.xfail(reason="unwanted upcast")
def test_15231():
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
df.loc[2] = Series({"a": 5, "b": 6})
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,10 +956,10 @@ def test_mi_add_cell_missing_row_non_unique():
result.loc["d", (1, "A")] = 3
expected = DataFrame(
[
[1.0, 2.0, 5.0, 6.0],
[3.0, 4.0, 7.0, 8.0],
[3.0, -1.0, -1, -1],
[3.0, np.nan, np.nan, np.nan],
[1, 2.0, 5.0, 6.0],
[3, 4.0, 7.0, 8.0],
[3, -1.0, -1, -1],
[3, np.nan, np.nan, np.nan],
],
index=["a", "a", "c", "d"],
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/indexing/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_loc_scalar(self, df):
tm.assert_frame_equal(df2, expected)

def test_loc_setitem_with_expansion_non_category(self, df):
# Setting-with-expansion with a new key "d" that is not among caegories
# Setting-with-expansion with a new key "d" that is not among categories
df.loc["a"] = 20

# Setting a new row on an existing column
Expand All @@ -88,21 +88,27 @@ def test_loc_setitem_with_expansion_non_category(self, df):
bidx3 = Index(list("aabbcad"), name="B")
expected3 = DataFrame(
{
"A": [20, 20, 2, 3, 4, 20, 10.0],
"A": [20, 20, 2, 3, 4, 20, 10],
},
index=Index(bidx3),
)
tm.assert_frame_equal(df3, expected3)

def test_loc_setitem_with_expansion_non_category_new_column(self, df):
# Setting-with-expansion with a new key "d" that is not among categories
# Setting a new row _and_ new column
df.loc["a"] = 20

df4 = df.copy()
df4.loc["d", "C"] = 10

bidx3 = Index(list("aabbcad"), name="B")
expected3 = DataFrame(
{
"A": [20, 20, 2, 3, 4, 20, np.nan],
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 10],
},
index=Index(bidx3),
index=bidx3,
)
tm.assert_frame_equal(df4, expected3)

Expand Down
Loading
Loading