From 32ce2f3889857ea0a4ee37e0ddadae291aea15ed Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 21 Aug 2025 14:26:58 -0700 Subject: [PATCH 1/3] REF: remove unnecessary case from maybe_downcast_to_dtype --- pandas/core/dtypes/cast.py | 26 +------------ pandas/tests/dtypes/cast/test_downcast.py | 47 +---------------------- 2 files changed, 3 insertions(+), 70 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index afe359b3faede..5556f39ad03be 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -257,7 +257,7 @@ def maybe_downcast_to_dtype( ) -> ArrayLike: ... -def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLike: +def maybe_downcast_to_dtype(result: ArrayLike, dtype: np.dtype) -> ArrayLike: """ try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 @@ -266,30 +266,6 @@ def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLi result = result._values do_round = False - if isinstance(dtype, str): - if dtype == "infer": - inferred_type = lib.infer_dtype(result, skipna=False) - if inferred_type == "boolean": - dtype = "bool" - elif inferred_type == "integer": - dtype = "int64" - elif inferred_type == "datetime64": - dtype = "datetime64[ns]" - elif inferred_type in ["timedelta", "timedelta64"]: - dtype = "timedelta64[ns]" - - # try to upcast here - elif inferred_type == "floating": - dtype = "int64" - if issubclass(result.dtype.type, np.number): - do_round = True - - else: - # TODO: complex? what if result is already non-object? - dtype = "object" - - dtype = np.dtype(dtype) - if not isinstance(dtype, np.dtype): # enforce our signature annotation raise TypeError(dtype) # pragma: no cover diff --git a/pandas/tests/dtypes/cast/test_downcast.py b/pandas/tests/dtypes/cast/test_downcast.py index 69200b2e5fc96..763a6a54dc4e1 100644 --- a/pandas/tests/dtypes/cast/test_downcast.py +++ b/pandas/tests/dtypes/cast/test_downcast.py @@ -7,7 +7,6 @@ from pandas import ( Series, - Timedelta, ) import pandas._testing as tm @@ -15,35 +14,13 @@ @pytest.mark.parametrize( "arr,dtype,expected", [ - ( - np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]), - "infer", - np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]), - ), - ( - np.array([8.0, 8.0, 8.0, 8.0, 8.9999999999995]), - "infer", - np.array([8, 8, 8, 8, 9], dtype=np.int64), - ), - ( - np.array([8.0, 8.0, 8.0, 8.0, 9.0000000000005]), - "infer", - np.array([8, 8, 8, 8, 9], dtype=np.int64), - ), ( # This is a judgement call, but we do _not_ downcast Decimal # objects np.array([decimal.Decimal("0.0")]), - "int64", + np.dtype("int64"), np.array([decimal.Decimal("0.0")]), ), - ( - # GH#45837 - np.array([Timedelta(days=1), Timedelta(days=2)], dtype=object), - "infer", - np.array([1, 2], dtype="m8[D]").astype("m8[ns]"), - ), - # TODO: similar for dt64, dt64tz, Period, Interval? ], ) def test_downcast(arr, expected, dtype): @@ -60,26 +37,6 @@ def test_downcast_booleans(): tm.assert_numpy_array_equal(result, expected) -def test_downcast_conversion_no_nan(any_real_numpy_dtype): - dtype = any_real_numpy_dtype - expected = np.array([1, 2]) - arr = np.array([1.0, 2.0], dtype=dtype) - - result = maybe_downcast_to_dtype(arr, "infer") - tm.assert_almost_equal(result, expected, check_dtype=False) - - -def test_downcast_conversion_nan(float_numpy_dtype): - dtype = float_numpy_dtype - data = [1.0, 2.0, np.nan] - - expected = np.array(data, dtype=dtype) - arr = np.array(data, dtype=dtype) - - result = maybe_downcast_to_dtype(arr, "infer") - tm.assert_almost_equal(result, expected) - - def test_downcast_conversion_empty(any_real_numpy_dtype): dtype = any_real_numpy_dtype arr = np.array([], dtype=dtype) @@ -89,7 +46,7 @@ def test_downcast_conversion_empty(any_real_numpy_dtype): @pytest.mark.parametrize("klass", [np.datetime64, np.timedelta64]) def test_datetime_likes_nan(klass): - dtype = klass.__name__ + "[ns]" + dtype = np.dtype(klass.__name__ + "[ns]") arr = np.array([1, 2, np.nan]) exp = np.array([1, 2, klass("NaT")], dtype) From 264f64b1e15b0e7b24f3d5a96745aa9cf1a63834 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 21 Aug 2025 17:55:45 -0700 Subject: [PATCH 2/3] remove overloads --- pandas/core/dtypes/cast.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5556f39ad03be..13e9ae8104aa3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -245,18 +245,6 @@ def _disallow_mismatched_datetimelike(value, dtype: DtypeObj) -> None: raise TypeError(f"Cannot cast {value!r} to {dtype}") -@overload -def maybe_downcast_to_dtype( - result: np.ndarray, dtype: str | np.dtype -) -> np.ndarray: ... - - -@overload -def maybe_downcast_to_dtype( - result: ExtensionArray, dtype: str | np.dtype -) -> ArrayLike: ... - - def maybe_downcast_to_dtype(result: ArrayLike, dtype: np.dtype) -> ArrayLike: """ try to cast to the specified dtype (e.g. convert back to bool/int From 1fac0f4d3459e207047a259298248f6054aef7d5 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 22 Aug 2025 08:28:16 -0700 Subject: [PATCH 3/3] mypy fixup --- pandas/core/dtypes/cast.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 13e9ae8104aa3..4067980937fb3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -245,6 +245,16 @@ def _disallow_mismatched_datetimelike(value, dtype: DtypeObj) -> None: raise TypeError(f"Cannot cast {value!r} to {dtype}") +@overload +def maybe_downcast_to_dtype(result: np.ndarray, dtype: np.dtype) -> np.ndarray: ... + + +@overload +def maybe_downcast_to_dtype( + result: ExtensionArray, dtype: np.dtype +) -> ExtensionArray: ... + + def maybe_downcast_to_dtype(result: ArrayLike, dtype: np.dtype) -> ArrayLike: """ try to cast to the specified dtype (e.g. convert back to bool/int