From fd5309abc2415786ca5e984224614f64ddfd6d31 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Sep 2025 12:52:48 -0700 Subject: [PATCH 1/8] BUG: setitem-with-expansion unwanted casting --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/arrays/arrow/array.py | 6 + pandas/core/arrays/base.py | 4 +- pandas/core/arrays/masked.py | 6 + pandas/core/dtypes/dtypes.py | 10 ++ pandas/core/frame.py | 10 ++ pandas/core/indexing.py | 90 ++++++++++---- pandas/tests/extension/base/setitem.py | 13 ++ pandas/tests/frame/indexing/test_coercion.py | 1 - pandas/tests/indexing/multiindex/test_loc.py | 8 +- pandas/tests/indexing/test_categorical.py | 12 +- pandas/tests/indexing/test_loc.py | 121 ++++++++++++++++++- pandas/tests/indexing/test_partial.py | 34 ++---- pandas/tests/series/indexing/test_setitem.py | 13 +- 14 files changed, 264 insertions(+), 65 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 91ce855f03b08..7edb722a0aeb7 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1031,6 +1031,7 @@ Indexing - Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`) - Bug in adding new rows with :meth:`DataFrame.loc.__setitem__` or :class:`Series.loc.__setitem__` which failed to retain dtype on the object's index in some cases (:issue:`41626`) - Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`) +- Bugs in setitem-with-expansion when adding new rows failing to keep the original dtype in some cases (:issue:`32346`, :issue:`15231`, :issue:`47503`, :issue:`6485`, :issue:`25383`, :issue:`52235`, :issue:`17026`, :issue:`56010`) Missing ^^^^^^^ diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index b8dd44a58e8ec..8be0db8a2cac9 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -461,6 +461,12 @@ def _cast_pointwise_result(self, values) -> ArrayLike: result = result.astype(dtype) # type: ignore[assignment] return result + elif pa.types.is_timestamp(arr.type) and pa.types.is_timestamp( + self._pa_array.type + ): + if arr.type.tz == self._pa_array.type.tz: + arr = arr.cast(self._pa_array.type) + elif pa.types.is_date(arr.type) and pa.types.is_date(self._pa_array.type): arr = arr.cast(self._pa_array.type) elif pa.types.is_time(arr.type) and pa.types.is_time(self._pa_array.type): diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index fcd7611b3e6b5..a7391682f870b 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -37,6 +37,7 @@ validate_insert_loc, ) +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import ( is_list_like, is_scalar, @@ -383,7 +384,8 @@ def _cast_pointwise_result(self, values) -> ArrayLike: Cast the result of a pointwise operation (e.g. Series.map) to an array, preserve dtype_backend if possible. """ - values = np.asarray(values, dtype=object) + if not (isinstance(values, np.ndarray) and values.dtype == object): + values = construct_1d_object_array_from_listlike(values) return lib.maybe_convert_objects(values, convert_non_numeric=True) # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index d20dc87259a37..4ac62ef7e3898 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -163,6 +163,12 @@ def _cast_pointwise_result(self, values) -> ArrayLike: result._data, dtype=self.dtype.numpy_dtype ) result = type(result)(new_data, result._mask) + elif lkind == "f" and rkind == "i": + result = cast(BaseMaskedArray, result) + new_data = maybe_downcast_to_dtype( + result._data, dtype=self.dtype.numpy_dtype + ) + result = type(self)(new_data, result._mask) return result @classmethod diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 1e6761b2e1db0..20443eff32ef2 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1598,6 +1598,16 @@ def itemsize(self) -> int: """ return self._dtype.itemsize + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + from pandas.core.dtypes.cast import find_common_type + + dtypes = [x.numpy_dtype if isinstance(x, NumpyEADtype) else x for x in dtypes] + if not all(isinstance(x, np.dtype) for x in dtypes): + return None + + common_dtype = find_common_type(dtypes) + return NumpyEADtype(common_dtype) + class BaseMaskedDtype(ExtensionDtype): """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 91f5cd1679a61..e16bbcae735c2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -170,6 +170,7 @@ from pandas.core.indexing import ( check_bool_indexer, check_dict_or_set_indexers, + infer_and_maybe_downcast, ) from pandas.core.internals import BlockManager from pandas.core.internals.construction import ( @@ -10942,6 +10943,15 @@ def _append_internal( # test_append_empty_frame_to_series_with_dateutil_tz row_df = row_df.infer_objects().rename_axis(index.names) + if len(row_df.columns) == len(self.columns): + # Try to retain our original dtype when doing the concat, GH#... + for i in range(len(self.columns)): + arr = self.iloc[:, i].array + + casted = infer_and_maybe_downcast(arr, row_df.iloc[:, i]._values) + + row_df.isetitem(i, casted) + from pandas.core.reshape.concat import concat result = concat( diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 98eb6034b6289..726b5b2ed32c5 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -14,7 +14,10 @@ import numpy as np from pandas._libs.indexing import NDFrameIndexerBase -from pandas._libs.lib import item_from_zerodim +from pandas._libs.lib import ( + is_np_dtype, + item_from_zerodim, +) from pandas.compat import PYPY from pandas.compat._constants import ( REF_COUNT, @@ -35,7 +38,7 @@ from pandas.core.dtypes.cast import ( can_hold_element, - maybe_promote, + maybe_downcast_to_dtype, ) from pandas.core.dtypes.common import ( is_array_like, @@ -50,7 +53,10 @@ is_sequence, ) from pandas.core.dtypes.concat import concat_compat -from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + NumpyEADtype, +) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCSeries, @@ -59,7 +65,6 @@ construct_1d_array_from_inferred_fill_value, infer_fill_value, is_valid_na_for_dtype, - isna, na_value_for_dtype, ) @@ -87,6 +92,7 @@ ) from pandas._typing import ( + ArrayLike, Axis, AxisInt, T, @@ -97,6 +103,7 @@ DataFrame, Series, ) + from pandas.core.arrays import ExtensionArray # "null slice" _NS = slice(None, None) @@ -934,7 +941,8 @@ def __setitem__(self, key, value) -> None: else: maybe_callable = com.apply_if_callable(key, self.obj) key = self._raise_callable_usage(key, maybe_callable) - indexer = self._get_setitem_indexer(key) + orig_obj = self.obj[:].iloc[:0].copy() # copy to avoid extra refs + indexer = self._get_setitem_indexer(key) # may alter self.obj self._has_valid_setitem_indexer(key) iloc: _iLocIndexer = ( @@ -942,6 +950,46 @@ def __setitem__(self, key, value) -> None: ) iloc._setitem_with_indexer(indexer, value, self.name) + self._post_expansion_casting(orig_obj) + + def _post_expansion_casting(self, orig_obj) -> None: + if orig_obj.shape[0] != self.obj.shape[0]: + # setitem-with-expansion added new rows. Try to retain + # original dtypes + if orig_obj.ndim == 1: + if orig_obj.dtype != self.obj.dtype: + new_arr = infer_and_maybe_downcast(orig_obj.array, self.obj._values) + new_ser = self.obj._constructor( + new_arr, index=self.obj.index, name=self.obj.name + ) + self.obj._mgr = new_ser._mgr + elif orig_obj.shape[1] == self.obj.shape[1]: + # We added rows but not columns + for i in range(orig_obj.shape[1]): + new_dtype = self.obj.dtypes.iloc[i] + orig_dtype = orig_obj.dtypes.iloc[i] + if new_dtype != orig_dtype: + new_arr = infer_and_maybe_downcast( + orig_obj.iloc[:, i].array, self.obj.iloc[:, i]._values + ) + self.obj.isetitem(i, new_arr) + + elif orig_obj.columns.is_unique and self.obj.columns.is_unique: + for col in orig_obj.columns: + new_dtype = self.obj[col].dtype + orig_dtype = orig_obj[col].dtype + if new_dtype != orig_dtype: + new_arr = infer_and_maybe_downcast( + orig_obj[col].array, self.obj[col]._values + ) + self.obj[col] = new_arr + else: + # In these cases there isn't a one-to-one correspondence between + # old columns and new columns, which makes casting hairy. + # Punt on these for now, as there are no tests that get here + # as of 2025-09-29 + pass + def _validate_key(self, key, axis: AxisInt) -> None: """ Ensure that key is valid for current indexer. @@ -2189,9 +2237,10 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: # Columns F and G will initially be set to np.void. # Here, we replace those temporary `np.void` columns with # columns of the appropriate dtype, based on `value`. - self.obj.iloc[:, loc] = construct_1d_array_from_inferred_fill_value( + new_arr = construct_1d_array_from_inferred_fill_value( value, len(self.obj) ) + self.obj.isetitem(loc, new_arr) self.obj._mgr.column_setitem(loc, plane_indexer, value) def _setitem_single_block(self, indexer, value, name: str) -> None: @@ -2260,27 +2309,14 @@ def _setitem_with_indexer_missing(self, indexer, value): # this preserves dtype of the value and of the object if not is_scalar(value): - new_dtype = None + pass elif is_valid_na_for_dtype(value, self.obj.dtype): if not is_object_dtype(self.obj.dtype): # Every NA value is suitable for object, no conversion needed value = na_value_for_dtype(self.obj.dtype, compat=False) - new_dtype = maybe_promote(self.obj.dtype, value)[0] - - elif isna(value): - new_dtype = None - elif not self.obj.empty and not is_object_dtype(self.obj.dtype): - # We should not cast, if we have object dtype because we can - # set timedeltas into object series - curr_dtype = self.obj.dtype - curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype) - new_dtype = maybe_promote(curr_dtype, value)[0] - else: - new_dtype = None - - new_values = Series([value], dtype=new_dtype)._values + new_values = infer_and_maybe_downcast(self.obj.array, [value]) if len(self.obj._values): # GH#22717 handle casting compatibility that np.concatenate @@ -2808,3 +2844,15 @@ def check_dict_or_set_indexers(key) -> None: raise TypeError( "Passing a dict as an indexer is not supported. Use a list instead." ) + + +def infer_and_maybe_downcast(orig: ExtensionArray, new_arr) -> ArrayLike: + new_arr = orig._cast_pointwise_result(new_arr) + + dtype = orig.dtype + if isinstance(dtype, NumpyEADtype): + dtype = dtype.numpy_dtype + + if is_np_dtype(new_arr.dtype, "f") and is_np_dtype(dtype, "iu"): + new_arr = maybe_downcast_to_dtype(new_arr, dtype) + return new_arr diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index b273c9b9f092a..b571a86fd595c 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -472,3 +472,16 @@ def test_setitem_2d_values(self, data): df.loc[[0, 1], :] = df.loc[[1, 0], :].values assert (df.loc[0, :] == original[1]).all() assert (df.loc[1, :] == original[0]).all() + + def test_loc_setitem_with_expansion_retains_ea_dtype(self, data): + # GH#32346 + data = data.dropna().unique() + ser = pd.Series(data[:-1]) + ser.loc[len(ser)] = data[-1] + expected = pd.Series(data) + tm.assert_series_equal(ser, expected) + + df = pd.DataFrame({"A": data[:-1]}) + df.loc[len(df)] = [data[-1]] + expected = expected.to_frame("A") + tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/indexing/test_coercion.py b/pandas/tests/frame/indexing/test_coercion.py index 472bfb7772a80..07e64fbced22f 100644 --- a/pandas/tests/frame/indexing/test_coercion.py +++ b/pandas/tests/frame/indexing/test_coercion.py @@ -98,7 +98,6 @@ def test_26395(indexer_al): indexer_al(df)["C", "D"] = "hello" -@pytest.mark.xfail(reason="unwanted upcast") def test_15231(): df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) df.loc[2] = Series({"a": 5, "b": 6}) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 51c8d5b3569f5..2605d65a289a5 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -956,10 +956,10 @@ def test_mi_add_cell_missing_row_non_unique(): result.loc["d", (1, "A")] = 3 expected = DataFrame( [ - [1.0, 2.0, 5.0, 6.0], - [3.0, 4.0, 7.0, 8.0], - [3.0, -1.0, -1, -1], - [3.0, np.nan, np.nan, np.nan], + [1, 2.0, 5.0, 6.0], + [3, 4.0, 7.0, 8.0], + [3, -1.0, -1, -1], + [3, np.nan, np.nan, np.nan], ], index=["a", "a", "c", "d"], columns=MultiIndex.from_product([[1, 2], ["A", "B"]]), diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index a31f463d0b17e..041533691ed89 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -79,7 +79,7 @@ def test_loc_scalar(self, df): tm.assert_frame_equal(df2, expected) def test_loc_setitem_with_expansion_non_category(self, df): - # Setting-with-expansion with a new key "d" that is not among caegories + # Setting-with-expansion with a new key "d" that is not among categories df.loc["a"] = 20 # Setting a new row on an existing column @@ -88,21 +88,27 @@ def test_loc_setitem_with_expansion_non_category(self, df): bidx3 = Index(list("aabbcad"), name="B") expected3 = DataFrame( { - "A": [20, 20, 2, 3, 4, 20, 10.0], + "A": [20, 20, 2, 3, 4, 20, 10], }, index=Index(bidx3), ) tm.assert_frame_equal(df3, expected3) + def test_loc_setitem_with_expansion_non_category_new_column(self, df): + # Setting-with-expansion with a new key "d" that is not among categories # Setting a new row _and_ new column + df.loc["a"] = 20 + df4 = df.copy() df4.loc["d", "C"] = 10 + + bidx3 = Index(list("aabbcad"), name="B") expected3 = DataFrame( { "A": [20, 20, 2, 3, 4, 20, np.nan], "C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 10], }, - index=Index(bidx3), + index=bidx3, ) tm.assert_frame_equal(df4, expected3) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 8e4845a72ec35..6ff2d21b51a3f 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2149,7 +2149,7 @@ def test_loc_setitem_with_expansion_nonunique_index(self, index, has_ref): assert exp_index[-1][0] == key else: assert exp_index[-1] == key - exp_data = np.arange(N + 1).astype(np.float64) + exp_data = np.arange(N + 1) expected = DataFrame(exp_data, index=exp_index) # Add new row, but no new columns @@ -2208,6 +2208,125 @@ def test_loc_setitem_ea_not_full_column(self): assert expected.dtypes["B"] == val.dtype tm.assert_frame_equal(df, expected) + def test_loc_setitem_expansion_both(self): + # GH#47503 + ct_arr = np.array([[70, 150], [66, 81]]) + df = DataFrame( + data=ct_arr, + columns=["Outstanding", "Not Outstanding"], + index=["Bank", "Credit Union"], + ) + ctot = df.copy() + + df.loc["Total", :] = ctot.sum(axis=0) + df.loc[:, "Total"] = ctot.sum(axis=1) + + expected = DataFrame( + { + "Outstanding": [70, 66, 136], + "Not Outstanding": [150, 81, 231], + "Total": [220, 147, np.nan], + }, + index=["Bank", "Credit Union", "Total"], + ) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_with_expansion_dtype_retention(self): + # GH#6485 + df = DataFrame({"a": range(10)}, dtype="i4") + df.loc[10] = 10 + + expected = DataFrame({"a": range(11)}, dtype="i4") + tm.assert_frame_equal(df, expected) + + ser = df["a"].iloc[:-1] + ser.loc[10] = 10 + tm.assert_series_equal(ser, expected["a"]) + + def test_loc_setitem_with_expansion_dtype_retention_empty(self): + # GH#6485 + df = DataFrame({"a": Series([], dtype="i8")}) + df.loc[0, "a"] = 3 + + expected = DataFrame({"a": [3]}, dtype="i8") + tm.assert_frame_equal(df, expected) + + ser = df["a"].iloc[:-1] + ser.loc[0] = 3 + tm.assert_series_equal(ser, expected["a"]) + + def test_loc_setitem_with_expansion_multi_column(self): + # GH#15231 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + df.loc[2] = Series({"a": 7}) + + expected = DataFrame({"a": [1, 3, 7], "b": [2, 4, np.nan]}) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_with_expansion_categorical(self): + # GH#25383 + df = DataFrame( + { + "reg": [0, 1, 2], + "cat": Categorical(["a", "b", "b"], categories=["a", "b", "c", "d"]), + } + ) + df.loc[3] = (3, "c") + + expected = DataFrame( + { + "reg": [0, 1, 2, 3], + "cat": Categorical( + ["a", "b", "b", "c"], categories=["a", "b", "c", "d"] + ), + } + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.xfail( + reason="Pending decision on whether to special-case empty cases." + ) + def test_loc_setitem_with_expansion_empty_stays_object(self): + # GH#31805 + df = DataFrame(columns=["A", "B", "C"]) + df.loc[0] = [2015, 1, 7.0] + + expected = DataFrame({"A": [2015], "B": [1], "C": [7.0]}, dtype=object) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_with_expansion_retains_ea_dtype(self): + # GH#32346 + ser = Series([1, 2, 3], dtype="Int64") + ser.loc[3] = 4 + expected = Series([1, 2, 3, 4], dtype="Int64") + tm.assert_series_equal(ser, expected) + + @td.skip_if_no("pyarrow") + def test_setitem_with_expansion_pyarrow_scalar_retains_dtype(self): + # GH#52235 + ts1 = Timestamp("2025-09-15") + ts2 = Timestamp("2025-09-16") + ser = Series([ts1], dtype="date32[pyarrow]") + + import pyarrow as pa + + item = pa.scalar(ts2, type="date32") + + ser[1] = item + + expected = Series([ts1, ts2], dtype="date32[pyarrow]") + tm.assert_series_equal(ser, expected) + + def test_loc_setitem_with_expansion_multiindex_retains_dtypes(self): + # GH#17026 + mi = MultiIndex.from_tuples([("a", "c"), ("b", "c"), ("c", "d")]) + expected = DataFrame([[1, 2], [3, 4], [5, 6]], index=mi) + + df = expected.iloc[:-1] + df.loc[("c", "d"), :] = [5, 6] + + tm.assert_frame_equal(df, expected) + class TestLocCallable: def test_frame_loc_getitem_callable(self): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 6f20d0e4e7cbf..76fc99e6ad57f 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -235,42 +235,30 @@ def test_partial_set_empty_frame_empty_consistencies(self, using_infer_string): class TestPartialSetting: - def test_partial_setting(self): + # Prior to 3.0, the 5.0 case was cast to float, which did not match the + # behavior when setting 5.0 in non-expansion cases + @pytest.mark.parametrize("item", [5, 5.0]) + def test_partial_setting(self, indexer_sl, item): # GH2578, allow ix and friends to partially set # series - s_orig = Series([1, 2, 3]) - - s = s_orig.copy() - s[5] = 5 - expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - s = s_orig.copy() - s.loc[5] = 5 + ser = Series([1, 2, 3]) expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - s = s_orig.copy() - s[5] = 5.0 - expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - s = s_orig.copy() - s.loc[5] = 5.0 - expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) + indexer_sl(ser)[5] = item + tm.assert_series_equal(ser, expected) + def test_cannot_expand_with_iloc_iat(self): # iloc/iat raise - s = s_orig.copy() + ser = Series([1, 2, 3]) msg = "iloc cannot enlarge its target object" with pytest.raises(IndexError, match=msg): - s.iloc[3] = 5.0 + ser.iloc[3] = 5.0 msg = "index 3 is out of bounds for axis 0 with size 3" with pytest.raises(IndexError, match=msg): - s.iat[3] = 5.0 + ser.iat[3] = 5.0 @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") def test_partial_setting_frame(self): diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index f894005296781..67b1994230af3 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -3,7 +3,6 @@ date, datetime, ) -from decimal import Decimal import numpy as np import pytest @@ -515,10 +514,6 @@ def test_setitem_empty_series_timestamp_preserves_dtype(self): ) def test_append_timedelta_does_not_cast(self, td, using_infer_string, request): # GH#22717 inserting a Timedelta should _not_ cast to int64 - if using_infer_string and not isinstance(td, Timedelta): - # TODO: GH#56010 - request.applymarker(pytest.mark.xfail(reason="inferred as string")) - expected = Series(["x", td], index=[0, "td"], dtype=object) ser = Series(["x"]) @@ -592,12 +587,8 @@ def test_setitem_enlargement_object_none(self, nulls_fixture, using_infer_string # GH#48665 ser = Series(["a", "b"]) ser[3] = nulls_fixture - dtype = ( - "str" - if using_infer_string and not isinstance(nulls_fixture, Decimal) - else object - ) - expected = Series(["a", "b", nulls_fixture], index=[0, 1, 3], dtype=dtype) + + expected = Series(["a", "b", nulls_fixture], index=[0, 1, 3], dtype="str") tm.assert_series_equal(ser, expected) if using_infer_string: ser[3] is np.nan From 98b4c7a320a6e419af3c24601e6d97e076b7d2a0 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Sep 2025 14:18:05 -0700 Subject: [PATCH 2/8] 32bit build fixup --- pandas/tests/indexing/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 6ff2d21b51a3f..020d0261b65d2 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2149,7 +2149,7 @@ def test_loc_setitem_with_expansion_nonunique_index(self, index, has_ref): assert exp_index[-1][0] == key else: assert exp_index[-1] == key - exp_data = np.arange(N + 1) + exp_data = np.arange(N + 1).astype(np.int64) expected = DataFrame(exp_data, index=exp_index) # Add new row, but no new columns From d35bce84a76d8b9fac433b17adf3ba64c62b0090 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Sep 2025 14:20:30 -0700 Subject: [PATCH 3/8] no-pyarrow fixup --- pandas/tests/series/indexing/test_setitem.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 67b1994230af3..1457c349fecb0 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -3,10 +3,12 @@ date, datetime, ) +from decimal import Decimal import numpy as np import pytest +from pandas.compat import HAS_PYARROW from pandas.compat.numpy import np_version_gt2 from pandas.errors import IndexingError @@ -588,7 +590,14 @@ def test_setitem_enlargement_object_none(self, nulls_fixture, using_infer_string ser = Series(["a", "b"]) ser[3] = nulls_fixture - expected = Series(["a", "b", nulls_fixture], index=[0, 1, 3], dtype="str") + dtype = ( + "str" + if using_infer_string + and not (isinstance(nulls_fixture, Decimal) and not HAS_PYARROW) + else object + ) + + expected = Series(["a", "b", nulls_fixture], index=[0, 1, 3], dtype=dtype) tm.assert_series_equal(ser, expected) if using_infer_string: ser[3] is np.nan From 2cc49a00a2509e7e68f61fbcc914c03e14d535ce Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Sep 2025 15:38:20 -0700 Subject: [PATCH 4/8] 32bit fixups --- pandas/tests/indexing/test_loc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 020d0261b65d2..3c1d80c6b0409 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2223,8 +2223,8 @@ def test_loc_setitem_expansion_both(self): expected = DataFrame( { - "Outstanding": [70, 66, 136], - "Not Outstanding": [150, 81, 231], + "Outstanding": np.array([70, 66, 136]), + "Not Outstanding": np.array([150, 81, 231]), "Total": [220, 147, np.nan], }, index=["Bank", "Credit Union", "Total"], From b434c32126c246a35c4479c7337b4c1ec93a66b1 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Sep 2025 15:39:12 -0700 Subject: [PATCH 5/8] no-infer fixup --- pandas/tests/series/indexing/test_setitem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 1457c349fecb0..29505e694ecd0 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -599,7 +599,7 @@ def test_setitem_enlargement_object_none(self, nulls_fixture, using_infer_string expected = Series(["a", "b", nulls_fixture], index=[0, 1, 3], dtype=dtype) tm.assert_series_equal(ser, expected) - if using_infer_string: + if dtype == "str": ser[3] is np.nan else: assert ser[3] is nulls_fixture From 79cab13095a32590fc8bf2eaeeb5b1d473d8072b Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Sep 2025 15:58:47 -0700 Subject: [PATCH 6/8] mypy fixup --- pandas/core/indexing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 726b5b2ed32c5..3f1baa7c4b9c1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2851,7 +2851,10 @@ def infer_and_maybe_downcast(orig: ExtensionArray, new_arr) -> ArrayLike: dtype = orig.dtype if isinstance(dtype, NumpyEADtype): - dtype = dtype.numpy_dtype + # error: Incompatible types in assignment (expression has + # type "dtype[Any]", variable has type "ExtensionDtype") + # [assignment] + dtype = dtype.numpy_dtype # type: ignore[assignment] if is_np_dtype(new_arr.dtype, "f") and is_np_dtype(dtype, "iu"): new_arr = maybe_downcast_to_dtype(new_arr, dtype) From a02cf707e49d4ecf3a8897a78d4ece904a98ef1b Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Sep 2025 15:59:30 -0700 Subject: [PATCH 7/8] mypy ignore --- pandas/core/dtypes/dtypes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 20443eff32ef2..6282da885ca26 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1606,7 +1606,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: return None common_dtype = find_common_type(dtypes) - return NumpyEADtype(common_dtype) + # error: Argument 1 to "NumpyEADtype" has incompatible type + return NumpyEADtype(common_dtype) # type: ignore[arg-type] class BaseMaskedDtype(ExtensionDtype): From 3bbac43957bf679bfdd7723f26070af92009eb01 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Sep 2025 17:39:38 -0700 Subject: [PATCH 8/8] fix not using_infer_string build --- pandas/tests/extension/test_string.py | 8 ++++++++ pandas/tests/series/indexing/test_setitem.py | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index e373ff12c4086..e21db610496f4 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -265,6 +265,14 @@ def test_loc_setitem_with_expansion_preserves_ea_index_dtype( request.applymarker(mark) super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data) + def test_loc_setitem_with_expansion_retains_ea_dtype( + self, data, using_infer_string, request + ): + if not using_infer_string and data.dtype.storage == "python": + mark = pytest.mark.xfail(reason="Gives object") + request.applymarker(mark) + super().test_loc_setitem_with_expansion_retains_ea_dtype(data) + class Test2DCompat(base.Dim2CompatTests): @pytest.fixture(autouse=True) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 29505e694ecd0..9c80073095ff8 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -601,8 +601,10 @@ def test_setitem_enlargement_object_none(self, nulls_fixture, using_infer_string tm.assert_series_equal(ser, expected) if dtype == "str": ser[3] is np.nan - else: + elif using_infer_string: assert ser[3] is nulls_fixture + else: + assert type(ser[3]) is type(nulls_fixture) def test_setitem_scalar_into_readonly_backing_data():