From 4264e1566997dc601d21459cb2d694a433738187 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 28 Jan 2020 20:13:12 -0800 Subject: [PATCH 1/4] CLN: remove IndexEngine.set_value --- pandas/_libs/index.pyx | 30 +++++++++++--------------- pandas/core/frame.py | 10 ++++++--- pandas/core/indexes/base.py | 6 +++--- pandas/core/series.py | 22 ++++++------------- pandas/tests/indexing/test_indexing.py | 2 +- 5 files changed, 30 insertions(+), 40 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 2dfc14378baf6..2432a75561492 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -87,20 +87,6 @@ cdef class IndexEngine: else: return get_value_at(arr, loc, tz=tz) - cpdef set_value(self, ndarray arr, object key, object value): - """ - Parameters - ---------- - arr : 1-dimensional ndarray - """ - cdef: - object loc - - loc = self.get_loc(key) - value = convert_scalar(arr, value) - - arr[loc] = value - cpdef get_loc(self, object val): cdef: Py_ssize_t loc @@ -586,16 +572,24 @@ cpdef convert_scalar(ndarray arr, object value): raise ValueError("cannot set a Timedelta with a non-timedelta " f"{type(value).__name__}") - if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and - not issubclass(arr.dtype.type, np.bool_)): + else: + validate_numeric_casting(arr.dtype, value) + + return value + + +cpdef validate_numeric_casting(dtype, object value): + # Note: we can't type dtype as cnp.dtype because that cases dtype.type + # to integer + if (issubclass(dtype.type, (np.integer, np.floating, np.complex)) and + not issubclass(dtype.type, np.bool_)): if util.is_bool_object(value): raise ValueError("Cannot assign bool to float/integer series") - if issubclass(arr.dtype.type, (np.integer, np.bool_)): + if issubclass(dtype.type, (np.integer, np.bool_)): if util.is_float_object(value) and value != value: raise ValueError("Cannot assign nan to integer series") - return value cdef class BaseMultiIndexCodesEngine: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f3a0cf3841b5b..399bd01fc0090 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -40,7 +40,7 @@ from pandas._config import get_option -from pandas._libs import algos as libalgos, lib, properties +from pandas._libs import algos as libalgos, lib, properties, index as libindex from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Label, Level, Renamer from pandas.compat import PY37 from pandas.compat._optional import import_optional_dependency @@ -3025,10 +3025,14 @@ def _set_value(self, index, col, value, takeable: bool = False): series = self._get_item_cache(col) engine = self.index._engine - engine.set_value(series._values, index, value) + loc = engine.get_loc(index) + libindex.validate_numeric_casting(series.dtype, value) + + series._values[loc] = value + # Note: trying to use series._set_value breaks tests in + # tests.frame.indexing.test_indexing and tests.indexing.test_partial return self except (KeyError, TypeError): - # set using a non-recursive method & reset the cache if takeable: self.iloc[index, col] = value diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 10d9552e6f5a7..0d6d4d5b875f2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4695,9 +4695,9 @@ def set_value(self, arr, key, value): FutureWarning, stacklevel=2, ) - self._engine.set_value( - com.values_from_object(arr), com.values_from_object(key), value - ) + loc = self._engine.get_loc(key) + libindex.validate_numeric_casting(arr.dtype, value) + arr[loc] = value _index_shared_docs[ "get_indexer_non_unique" diff --git a/pandas/core/series.py b/pandas/core/series.py index 0aaa583885bc3..ed40a9797ecef 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1020,17 +1020,10 @@ def __setitem__(self, key, value): self._maybe_update_cacher() def _set_with_engine(self, key, value): - values = self._values - if is_extension_array_dtype(values.dtype): - # The cython indexing engine does not support ExtensionArrays. - values[self.index.get_loc(key)] = value - return - try: - self.index._engine.set_value(values, key, value) - return - except KeyError: - values[self.index.get_loc(key)] = value - return + # fails with AttributeError for IntervalIndex + loc = self.index._engine.get_loc(key) + libindex.validate_numeric_casting(self.dtype, value) + self._values[loc] = value def _set_with(self, key, value): # other: fancy integer or otherwise @@ -1110,11 +1103,10 @@ def _set_value(self, label, value, takeable: bool = False): try: if takeable: self._values[label] = value - elif isinstance(self._values, np.ndarray): - # i.e. not EA, so we can use _engine - self.index._engine.set_value(self._values, label, value) else: - self.loc[label] = value + loc = self.index.get_loc(label) + libindex.validate_numeric_casting(self.dtype, value) + self._values[loc] = value except KeyError: # set using a non-recursive method diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 1913caae93932..ae32274c02dcd 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -137,7 +137,7 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id): r"Buffer has wrong number of dimensions \(expected 1, " r"got 3\)|" "'pandas._libs.interval.IntervalTree' object has no attribute " - "'set_value'|" # AttributeError + "'get_loc'|" # AttributeError "unhashable type: 'numpy.ndarray'|" # TypeError "No matching signature found|" # TypeError r"^\[\[\[|" # pandas.core.indexing.IndexingError From 2d3fe03b94bfaaef0f62d25c7fda522d8c8188b5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 31 Jan 2020 10:56:09 -0800 Subject: [PATCH 2/4] whitespace fixup --- pandas/_libs/index.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index de16e2adc3269..eb98dab64c9ef 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -590,7 +590,6 @@ cpdef validate_numeric_casting(dtype, object value): raise ValueError("Cannot assign nan to integer series") - cdef class BaseMultiIndexCodesEngine: """ Base class for MultiIndexUIntEngine and MultiIndexPyIntEngine, which From 783ba6f87db2c5049ee501975d828a2478d2e9b7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 31 Jan 2020 12:11:39 -0800 Subject: [PATCH 3/4] isort fixup --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cee0fc3dbde6b..5f57b6abb9300 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -40,7 +40,7 @@ from pandas._config import get_option -from pandas._libs import algos as libalgos, lib, properties, index as libindex +from pandas._libs import algos as libalgos, index as libindex, lib, properties from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Label, Level, Renamer from pandas.compat import PY37 from pandas.compat._optional import import_optional_dependency From 88b7340bf34be55a74caa65e857a4a43c05cbd75 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 2 Feb 2020 11:39:45 -0800 Subject: [PATCH 4/4] rearrange checks --- pandas/_libs/index.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index eb98dab64c9ef..b39afc57f34f6 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -578,17 +578,17 @@ cpdef convert_scalar(ndarray arr, object value): cpdef validate_numeric_casting(dtype, object value): - # Note: we can't type dtype as cnp.dtype because that cases dtype.type + # Note: we can't annotate dtype as cnp.dtype because that cases dtype.type # to integer + if issubclass(dtype.type, (np.integer, np.bool_)): + if util.is_float_object(value) and value != value: + raise ValueError("Cannot assign nan to integer series") + if (issubclass(dtype.type, (np.integer, np.floating, np.complex)) and not issubclass(dtype.type, np.bool_)): if util.is_bool_object(value): raise ValueError("Cannot assign bool to float/integer series") - if issubclass(dtype.type, (np.integer, np.bool_)): - if util.is_float_object(value) and value != value: - raise ValueError("Cannot assign nan to integer series") - cdef class BaseMultiIndexCodesEngine: """