From cd93e9aa46fdefc0ded000a581b5c973f7e38d89 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 31 Dec 2021 11:58:52 -0800 Subject: [PATCH 1/4] BUG: Series(floatlike, dtype=intlike) inconsistent with non-ndarray data --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/construction.py | 18 ++++++++- pandas/tests/series/test_constructors.py | 50 +++++++++++++++++++++--- 3 files changed, 62 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 06b11827bca4a..b9e3ce5b9d801 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -790,6 +790,7 @@ Conversion - Bug in :meth:`DataFrame.convert_dtypes` result losing ``columns.names`` (:issue:`41435`) - Bug in constructing a ``IntegerArray`` from pyarrow data failing to validate dtypes (:issue:`44891`) - Bug in :meth:`Series.astype` not allowing converting from a ``PeriodDtype`` to ``datetime64`` dtype, inconsistent with the :class:`PeriodIndex` behavior (:issue:`45038`) +- Bug in constructing a :class:`Series` from a float-containing list or a floating-dtype ndarray-like (e.g. ``dask.Array``) and an integer dtype raising instead of casting like we would with an ``np.ndarray`` (:issue:`40110`) - Strings diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e496125683c09..a0db2c2157f8f 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -588,7 +588,23 @@ def sanitize_array( data = list(data) if dtype is not None or len(data) == 0: - subarr = _try_cast(data, dtype, copy, raise_cast_failure) + try: + subarr = _try_cast(data, dtype, copy, raise_cast_failure) + except ValueError: + casted = np.array(data, copy=False) + if casted.dtype.kind == "f" and is_integer_dtype(dtype): + # GH#40110 match the behavior we have if we passed + # a ndarray[float] to begin with + return sanitize_array( + casted, + index, + dtype, + copy=False, + raise_cast_failure=raise_cast_failure, + allow_2d=allow_2d, + ) + else: + raise else: subarr = maybe_convert_platform(data) if subarr.dtype == object: diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 7300b8f03ade6..3ab897f8bdbb6 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -754,11 +754,42 @@ def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype) with pytest.raises(OverflowError, match=msg): Series([-1], dtype=any_unsigned_int_numpy_dtype) + @td.skip_if_no("dask") + def test_construct_dask_float_array_int_dtype_match_ndarray(self): + # GH#40110 make sure we treat a float-dtype dask array with the same + # rules we would for an ndarray + import dask.dataframe as dd + + arr = np.array([1, 2.5, 3]) + darr = dd.from_array(arr) + + res = Series(darr) + expected = Series(arr) + tm.assert_series_equal(res, expected) + + res = Series(darr, dtype="i8") + expected = Series(arr, dtype="i8") + tm.assert_series_equal(res, expected) + + msg = "In a future version, passing float-dtype values containing NaN" + arr[2] = np.nan + with tm.assert_produces_warning(FutureWarning, match=msg): + res = Series(darr, dtype="i8") + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = Series(arr, dtype="i8") + tm.assert_series_equal(res, expected) + def test_constructor_coerce_float_fail(self, any_int_numpy_dtype): # see gh-15832 - msg = "Trying to coerce float values to integers" - with pytest.raises(ValueError, match=msg): - Series([1, 2, 3.5], dtype=any_int_numpy_dtype) + # Updated: make sure we treat this list the same as we would treat + # the equivalent ndarray + vals = [1, 2, 3.5] + + res = Series(vals, dtype=any_int_numpy_dtype) + expected = Series(np.array(vals), dtype=any_int_numpy_dtype) + tm.assert_series_equal(res, expected) + alt = Series(np.array(vals)) # i.e. we ignore the dtype kwd + tm.assert_series_equal(alt, expected) def test_constructor_coerce_float_valid(self, float_numpy_dtype): s = Series([1, 2, 3.5], dtype=float_numpy_dtype) @@ -767,10 +798,17 @@ def test_constructor_coerce_float_valid(self, float_numpy_dtype): def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtype): # GH 22585 + # Updated: make sure we treat this list the same as we would treat the + # equivalent ndarray + vals = [1, 2, np.nan] - msg = "cannot convert float NaN to integer" - with pytest.raises(ValueError, match=msg): - Series([1, 2, np.nan], dtype=any_int_numpy_dtype) + msg = "In a future version, passing float-dtype values containing NaN" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = Series(vals, dtype=any_int_numpy_dtype) + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = Series(np.array(vals), dtype=any_int_numpy_dtype) + tm.assert_series_equal(res, expected) + assert np.isnan(expected.iloc[-1]) def test_constructor_dtype_no_cast(self): # see gh-1572 From 315bcdf1b7a37b3d76e37e9df0c8a82fab6680b2 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 31 Dec 2021 15:22:13 -0800 Subject: [PATCH 2/4] suppress warning --- pandas/tests/series/test_constructors.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 3ab897f8bdbb6..85c180ce77fd9 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -796,6 +796,10 @@ def test_constructor_coerce_float_valid(self, float_numpy_dtype): expected = Series([1, 2, 3.5]).astype(float_numpy_dtype) tm.assert_series_equal(s, expected) + # RuntimeWarning issued for unsigned dtypes with numpy==1.18.5 + @pytest.mark.filterwarnings( + "ignore:invalid value encountered in less:RuntimeWarning" + ) def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtype): # GH 22585 # Updated: make sure we treat this list the same as we would treat the From fbbf0481e9ec30492f508f067070ecbc4d29fe1a Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 31 Dec 2021 18:18:59 -0800 Subject: [PATCH 3/4] xfail on old numpy --- pandas/tests/series/test_constructors.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 85c180ce77fd9..2411b47e9dd7e 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -796,14 +796,16 @@ def test_constructor_coerce_float_valid(self, float_numpy_dtype): expected = Series([1, 2, 3.5]).astype(float_numpy_dtype) tm.assert_series_equal(s, expected) - # RuntimeWarning issued for unsigned dtypes with numpy==1.18.5 - @pytest.mark.filterwarnings( - "ignore:invalid value encountered in less:RuntimeWarning" - ) - def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtype): + def test_constructor_invalid_coerce_ints_with_float_nan( + self, any_int_numpy_dtype, request + ): # GH 22585 # Updated: make sure we treat this list the same as we would treat the # equivalent ndarray + if np_version_under1p19 and np.dtype(any_int_numpy_dtype).kind == "u": + mark = pytest.mark.xfail(reason="Produces an extra RuntimeWarning") + request.node.add_marker(mark) + vals = [1, 2, np.nan] msg = "In a future version, passing float-dtype values containing NaN" From 6b71cef88f2bfe32b399d75425a24a33b920baeb Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 31 Dec 2021 19:47:40 -0800 Subject: [PATCH 4/4] update doctest --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f18f1c760ca28..144f785515417 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2029,7 +2029,7 @@ def maybe_cast_to_integer_array( Also, if you try to coerce float values to integers, it raises: - >>> pd.Series([1, 2, 3.5], dtype="int64") + >>> maybe_cast_to_integer_array([1, 2, 3.5], dtype=np.dtype("int64")) Traceback (most recent call last): ... ValueError: Trying to coerce float values to integers