Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,7 @@ Indexing
- Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
- Bug in adding new rows with :meth:`DataFrame.loc.__setitem__` or :class:`Series.loc.__setitem__` which failed to retain dtype on the object's index in some cases (:issue:`41626`)
- Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`)
- Bugs in setitem-with-expansion when adding new rows failing to keep the original dtype in some cases (:issue:`32346`, :issue:`15231`, :issue:`47503`, :issue:`6485`, :issue:`25383`, :issue:`52235`, :issue:`17026`, :issue:`56010`)

Missing
^^^^^^^
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,12 @@ def _cast_pointwise_result(self, values) -> ArrayLike:
result = result.astype(dtype) # type: ignore[assignment]
return result

elif pa.types.is_timestamp(arr.type) and pa.types.is_timestamp(
self._pa_array.type
):
if arr.type.tz == self._pa_array.type.tz:
arr = arr.cast(self._pa_array.type)

elif pa.types.is_date(arr.type) and pa.types.is_date(self._pa_array.type):
arr = arr.cast(self._pa_array.type)
elif pa.types.is_time(arr.type) and pa.types.is_time(self._pa_array.type):
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
validate_insert_loc,
)

from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
from pandas.core.dtypes.common import (
is_list_like,
is_scalar,
Expand Down Expand Up @@ -383,7 +384,8 @@ def _cast_pointwise_result(self, values) -> ArrayLike:
Cast the result of a pointwise operation (e.g. Series.map) to an
array, preserve dtype_backend if possible.
"""
values = np.asarray(values, dtype=object)
if not (isinstance(values, np.ndarray) and values.dtype == object):
values = construct_1d_object_array_from_listlike(values)
return lib.maybe_convert_objects(values, convert_non_numeric=True)

# ------------------------------------------------------------------------
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,12 @@ def _cast_pointwise_result(self, values) -> ArrayLike:
result._data, dtype=self.dtype.numpy_dtype
)
result = type(result)(new_data, result._mask)
elif lkind == "f" and rkind == "i":
result = cast(BaseMaskedArray, result)
new_data = maybe_downcast_to_dtype(
result._data, dtype=self.dtype.numpy_dtype
)
result = type(self)(new_data, result._mask)
return result

@classmethod
Expand Down
11 changes: 11 additions & 0 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1598,6 +1598,17 @@ def itemsize(self) -> int:
"""
return self._dtype.itemsize

def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
from pandas.core.dtypes.cast import find_common_type

dtypes = [x.numpy_dtype if isinstance(x, NumpyEADtype) else x for x in dtypes]
if not all(isinstance(x, np.dtype) for x in dtypes):
return None

common_dtype = find_common_type(dtypes)
# error: Argument 1 to "NumpyEADtype" has incompatible type
return NumpyEADtype(common_dtype) # type: ignore[arg-type]


class BaseMaskedDtype(ExtensionDtype):
"""
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@
from pandas.core.indexing import (
check_bool_indexer,
check_dict_or_set_indexers,
infer_and_maybe_downcast,
)
from pandas.core.internals import BlockManager
from pandas.core.internals.construction import (
Expand Down Expand Up @@ -10942,6 +10943,15 @@ def _append_internal(
# test_append_empty_frame_to_series_with_dateutil_tz
row_df = row_df.infer_objects().rename_axis(index.names)

if len(row_df.columns) == len(self.columns):
# Try to retain our original dtype when doing the concat, GH#...
for i in range(len(self.columns)):
arr = self.iloc[:, i].array

casted = infer_and_maybe_downcast(arr, row_df.iloc[:, i]._values)

row_df.isetitem(i, casted)

from pandas.core.reshape.concat import concat

result = concat(
Expand Down
93 changes: 72 additions & 21 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
import numpy as np

from pandas._libs.indexing import NDFrameIndexerBase
from pandas._libs.lib import item_from_zerodim
from pandas._libs.lib import (
is_np_dtype,
item_from_zerodim,
)
from pandas.compat import PYPY
from pandas.compat._constants import (
REF_COUNT,
Expand All @@ -35,7 +38,7 @@

from pandas.core.dtypes.cast import (
can_hold_element,
maybe_promote,
maybe_downcast_to_dtype,
)
from pandas.core.dtypes.common import (
is_array_like,
Expand All @@ -50,7 +53,10 @@
is_sequence,
)
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.dtypes import (
ExtensionDtype,
NumpyEADtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCSeries,
Expand All @@ -59,7 +65,6 @@
construct_1d_array_from_inferred_fill_value,
infer_fill_value,
is_valid_na_for_dtype,
isna,
na_value_for_dtype,
)

Expand Down Expand Up @@ -87,6 +92,7 @@
)

from pandas._typing import (
ArrayLike,
Axis,
AxisInt,
T,
Expand All @@ -97,6 +103,7 @@
DataFrame,
Series,
)
from pandas.core.arrays import ExtensionArray

# "null slice"
_NS = slice(None, None)
Expand Down Expand Up @@ -934,14 +941,55 @@ def __setitem__(self, key, value) -> None:
else:
maybe_callable = com.apply_if_callable(key, self.obj)
key = self._raise_callable_usage(key, maybe_callable)
indexer = self._get_setitem_indexer(key)
orig_obj = self.obj[:].iloc[:0].copy() # copy to avoid extra refs
indexer = self._get_setitem_indexer(key) # may alter self.obj
self._has_valid_setitem_indexer(key)

iloc: _iLocIndexer = (
cast("_iLocIndexer", self) if self.name == "iloc" else self.obj.iloc
)
iloc._setitem_with_indexer(indexer, value, self.name)

self._post_expansion_casting(orig_obj)

def _post_expansion_casting(self, orig_obj) -> None:
if orig_obj.shape[0] != self.obj.shape[0]:
# setitem-with-expansion added new rows. Try to retain
# original dtypes
if orig_obj.ndim == 1:
if orig_obj.dtype != self.obj.dtype:
new_arr = infer_and_maybe_downcast(orig_obj.array, self.obj._values)
new_ser = self.obj._constructor(
new_arr, index=self.obj.index, name=self.obj.name
)
self.obj._mgr = new_ser._mgr
elif orig_obj.shape[1] == self.obj.shape[1]:
# We added rows but not columns
for i in range(orig_obj.shape[1]):
new_dtype = self.obj.dtypes.iloc[i]
orig_dtype = orig_obj.dtypes.iloc[i]
if new_dtype != orig_dtype:
new_arr = infer_and_maybe_downcast(
orig_obj.iloc[:, i].array, self.obj.iloc[:, i]._values
)
self.obj.isetitem(i, new_arr)

elif orig_obj.columns.is_unique and self.obj.columns.is_unique:
for col in orig_obj.columns:
new_dtype = self.obj[col].dtype
orig_dtype = orig_obj[col].dtype
if new_dtype != orig_dtype:
new_arr = infer_and_maybe_downcast(
orig_obj[col].array, self.obj[col]._values
)
self.obj[col] = new_arr
else:
# In these cases there isn't a one-to-one correspondence between
# old columns and new columns, which makes casting hairy.
# Punt on these for now, as there are no tests that get here
# as of 2025-09-29
pass

def _validate_key(self, key, axis: AxisInt) -> None:
"""
Ensure that key is valid for current indexer.
Expand Down Expand Up @@ -2189,9 +2237,10 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
# Columns F and G will initially be set to np.void.
# Here, we replace those temporary `np.void` columns with
# columns of the appropriate dtype, based on `value`.
self.obj.iloc[:, loc] = construct_1d_array_from_inferred_fill_value(
new_arr = construct_1d_array_from_inferred_fill_value(
value, len(self.obj)
)
self.obj.isetitem(loc, new_arr)
self.obj._mgr.column_setitem(loc, plane_indexer, value)

def _setitem_single_block(self, indexer, value, name: str) -> None:
Expand Down Expand Up @@ -2260,27 +2309,14 @@ def _setitem_with_indexer_missing(self, indexer, value):

# this preserves dtype of the value and of the object
if not is_scalar(value):
new_dtype = None
pass

elif is_valid_na_for_dtype(value, self.obj.dtype):
if not is_object_dtype(self.obj.dtype):
# Every NA value is suitable for object, no conversion needed
value = na_value_for_dtype(self.obj.dtype, compat=False)

new_dtype = maybe_promote(self.obj.dtype, value)[0]

elif isna(value):
new_dtype = None
elif not self.obj.empty and not is_object_dtype(self.obj.dtype):
# We should not cast, if we have object dtype because we can
# set timedeltas into object series
curr_dtype = self.obj.dtype
curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)
new_dtype = maybe_promote(curr_dtype, value)[0]
else:
new_dtype = None

new_values = Series([value], dtype=new_dtype)._values
new_values = infer_and_maybe_downcast(self.obj.array, [value])

if len(self.obj._values):
# GH#22717 handle casting compatibility that np.concatenate
Expand Down Expand Up @@ -2808,3 +2844,18 @@ def check_dict_or_set_indexers(key) -> None:
raise TypeError(
"Passing a dict as an indexer is not supported. Use a list instead."
)


def infer_and_maybe_downcast(orig: ExtensionArray, new_arr) -> ArrayLike:
new_arr = orig._cast_pointwise_result(new_arr)

dtype = orig.dtype
if isinstance(dtype, NumpyEADtype):
# error: Incompatible types in assignment (expression has
# type "dtype[Any]", variable has type "ExtensionDtype")
# [assignment]
dtype = dtype.numpy_dtype # type: ignore[assignment]

if is_np_dtype(new_arr.dtype, "f") and is_np_dtype(dtype, "iu"):
new_arr = maybe_downcast_to_dtype(new_arr, dtype)
return new_arr
13 changes: 13 additions & 0 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,3 +472,16 @@ def test_setitem_2d_values(self, data):
df.loc[[0, 1], :] = df.loc[[1, 0], :].values
assert (df.loc[0, :] == original[1]).all()
assert (df.loc[1, :] == original[0]).all()

def test_loc_setitem_with_expansion_retains_ea_dtype(self, data):
# GH#32346
data = data.dropna().unique()
ser = pd.Series(data[:-1])
ser.loc[len(ser)] = data[-1]
expected = pd.Series(data)
tm.assert_series_equal(ser, expected)

df = pd.DataFrame({"A": data[:-1]})
df.loc[len(df)] = [data[-1]]
expected = expected.to_frame("A")
tm.assert_frame_equal(df, expected)
8 changes: 8 additions & 0 deletions pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,14 @@ def test_loc_setitem_with_expansion_preserves_ea_index_dtype(
request.applymarker(mark)
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)

def test_loc_setitem_with_expansion_retains_ea_dtype(
self, data, using_infer_string, request
):
if not using_infer_string and data.dtype.storage == "python":
mark = pytest.mark.xfail(reason="Gives object")
request.applymarker(mark)
super().test_loc_setitem_with_expansion_retains_ea_dtype(data)


class Test2DCompat(base.Dim2CompatTests):
@pytest.fixture(autouse=True)
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/frame/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def test_26395(indexer_al):
indexer_al(df)["C", "D"] = "hello"


@pytest.mark.xfail(reason="unwanted upcast")
def test_15231():
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
df.loc[2] = Series({"a": 5, "b": 6})
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,10 +956,10 @@ def test_mi_add_cell_missing_row_non_unique():
result.loc["d", (1, "A")] = 3
expected = DataFrame(
[
[1.0, 2.0, 5.0, 6.0],
[3.0, 4.0, 7.0, 8.0],
[3.0, -1.0, -1, -1],
[3.0, np.nan, np.nan, np.nan],
[1, 2.0, 5.0, 6.0],
[3, 4.0, 7.0, 8.0],
[3, -1.0, -1, -1],
[3, np.nan, np.nan, np.nan],
],
index=["a", "a", "c", "d"],
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/indexing/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_loc_scalar(self, df):
tm.assert_frame_equal(df2, expected)

def test_loc_setitem_with_expansion_non_category(self, df):
# Setting-with-expansion with a new key "d" that is not among caegories
# Setting-with-expansion with a new key "d" that is not among categories
df.loc["a"] = 20

# Setting a new row on an existing column
Expand All @@ -88,21 +88,27 @@ def test_loc_setitem_with_expansion_non_category(self, df):
bidx3 = Index(list("aabbcad"), name="B")
expected3 = DataFrame(
{
"A": [20, 20, 2, 3, 4, 20, 10.0],
"A": [20, 20, 2, 3, 4, 20, 10],
},
index=Index(bidx3),
)
tm.assert_frame_equal(df3, expected3)

def test_loc_setitem_with_expansion_non_category_new_column(self, df):
# Setting-with-expansion with a new key "d" that is not among categories
# Setting a new row _and_ new column
df.loc["a"] = 20

df4 = df.copy()
df4.loc["d", "C"] = 10

bidx3 = Index(list("aabbcad"), name="B")
expected3 = DataFrame(
{
"A": [20, 20, 2, 3, 4, 20, np.nan],
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 10],
},
index=Index(bidx3),
index=bidx3,
)
tm.assert_frame_equal(df4, expected3)

Expand Down
Loading
Loading