From 71f7c04d517e667d628bf6ff18259ee6d10547b6 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 20 Nov 2025 09:11:46 -0800 Subject: [PATCH 1/2] TYP: stronger typing for unit, as_unit --- pandas/_libs/tslibs/nattype.pyi | 3 ++- pandas/_libs/tslibs/timedeltas.pyi | 5 +++-- pandas/_libs/tslibs/timestamps.pyi | 9 ++++++--- pandas/core/dtypes/dtypes.py | 3 ++- pandas/core/window/rolling.py | 3 +++ 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyi b/pandas/_libs/tslibs/nattype.pyi index ab236dafe1450..6d94fa6593b65 100644 --- a/pandas/_libs/tslibs/nattype.pyi +++ b/pandas/_libs/tslibs/nattype.pyi @@ -17,6 +17,7 @@ from pandas._libs.tslibs.period import Period from pandas._typing import ( Frequency, TimestampNonexistent, + TimeUnit, ) NaT: NaTType @@ -180,4 +181,4 @@ class NaTType: def __floordiv__(self, other: float, /) -> Self: ... # other def __hash__(self) -> int: ... - def as_unit(self, unit: str, round_ok: bool = ...) -> NaTType: ... + def as_unit(self, unit: TimeUnit, round_ok: bool = ...) -> NaTType: ... diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index 2200f9ebbbbb5..3348075e1b051 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -15,6 +15,7 @@ from pandas._libs.tslibs import ( ) from pandas._typing import ( Frequency, + TimeUnit, npt, ) @@ -162,5 +163,5 @@ class Timedelta(timedelta): ) -> np.timedelta64: ... def view(self, dtype: npt.DTypeLike) -> object: ... @property - def unit(self) -> str: ... - def as_unit(self, unit: str, round_ok: bool = ...) -> Timedelta: ... + def unit(self) -> TimeUnit: ... + def as_unit(self, unit: TimeUnit, round_ok: bool = ...) -> Timedelta: ... diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi index 3195ce9641f2b..d06c78b22626a 100644 --- a/pandas/_libs/tslibs/timestamps.pyi +++ b/pandas/_libs/tslibs/timestamps.pyi @@ -23,7 +23,10 @@ from pandas._libs.tslibs import ( Tick, Timedelta, ) -from pandas._typing import TimestampNonexistent +from pandas._typing import ( + TimestampNonexistent, + TimeUnit, +) _TimeZones: TypeAlias = str | _tzinfo | None | int @@ -235,5 +238,5 @@ class Timestamp(datetime): @property def daysinmonth(self) -> int: ... @property - def unit(self) -> str: ... - def as_unit(self, unit: str, round_ok: bool = ...) -> Timestamp: ... + def unit(self) -> TimeUnit: ... + def as_unit(self, unit: TimeUnit, round_ok: bool = ...) -> Timestamp: ... diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 2e3d73edcdf4f..17bd183d9f87c 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -85,6 +85,7 @@ IntervalClosedType, Ordered, Scalar, + TimeUnit, npt, type_t, ) @@ -820,7 +821,7 @@ def _creso(self) -> int: return abbrev_to_npy_unit(self.unit) @property - def unit(self) -> str_type: + def unit(self) -> TimeUnit: """ The precision of the datetime data. diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 17b189e222299..a0bdc5e48b15f 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -15,6 +15,7 @@ Concatenate, Literal, Self, + cast, final, overload, ) @@ -95,6 +96,7 @@ NDFrameT, QuantileInterpolation, P, + TimeUnit, T, WindowingRankType, npt, @@ -2001,6 +2003,7 @@ def _validate(self) -> None: except TypeError: # if not a datetime dtype, eg for empty dataframes unit = "ns" + unit = cast("TimeUnit", unit) self._win_freq_i8 = Timedelta(freq.nanos).as_unit(unit)._value # min_periods must be an integer From 61ad913a7b4e529ada1b6686baed84bc63b68440 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 20 Nov 2025 09:39:39 -0800 Subject: [PATCH 2/2] cast to TimeUnit --- pandas/core/arrays/_ranges.py | 9 ++++++--- pandas/core/arrays/datetimes.py | 9 +++++---- pandas/core/dtypes/cast.py | 2 ++ pandas/core/dtypes/dtypes.py | 8 +++++--- pandas/core/reshape/tile.py | 5 ++++- pandas/core/tools/datetimes.py | 4 ++++ pandas/core/window/ewm.py | 7 ++++++- pandas/io/pytables.py | 4 ++++ 8 files changed, 36 insertions(+), 12 deletions(-) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 9d491220c9899..a00c36497aa99 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -22,7 +22,10 @@ from pandas.core.construction import range_to_ndarray if TYPE_CHECKING: - from pandas._typing import npt + from pandas._typing import ( + TimeUnit, + npt, + ) def generate_regular_range( @@ -30,7 +33,7 @@ def generate_regular_range( end: Timestamp | Timedelta | None, periods: int | None, freq: BaseOffset, - unit: str = "ns", + unit: TimeUnit = "ns", ) -> npt.NDArray[np.intp]: """ Generate a range of dates or timestamps with the spans between dates @@ -46,7 +49,7 @@ def generate_regular_range( Number of periods in produced date range. freq : Tick Describes space between dates in produced date range. - unit : str, default "ns" + unit : {'s', 'ms', 'us', 'ns'}, default "ns" The resolution the output is meant to represent. Returns diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 08ac11106e2df..a3a590f2ad2f8 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -106,15 +106,15 @@ @overload -def tz_to_dtype(tz: tzinfo, unit: str = ...) -> DatetimeTZDtype: ... +def tz_to_dtype(tz: tzinfo, unit: TimeUnit = ...) -> DatetimeTZDtype: ... @overload -def tz_to_dtype(tz: None, unit: str = ...) -> np.dtype[np.datetime64]: ... +def tz_to_dtype(tz: None, unit: TimeUnit = ...) -> np.dtype[np.datetime64]: ... def tz_to_dtype( - tz: tzinfo | None, unit: str = "ns" + tz: tzinfo | None, unit: TimeUnit = "ns" ) -> np.dtype[np.datetime64] | DatetimeTZDtype: """ Return a datetime64[ns] dtype appropriate for the given timezone. @@ -393,6 +393,7 @@ def _from_sequence_not_strict( ) data_unit = np.datetime_data(subarr.dtype)[0] + data_unit = cast("TimeUnit", data_unit) data_dtype = tz_to_dtype(tz, data_unit) result = cls._simple_new(subarr, freq=inferred_freq, dtype=data_dtype) if unit is not None and unit != result.unit: @@ -2935,7 +2936,7 @@ def _generate_range( periods: int | None, offset: BaseOffset, *, - unit: str, + unit: TimeUnit, ) -> Generator[Timestamp]: """ Generates a sequence of dates corresponding to the specified time diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6ff80a0ffc790..d91edcf418c45 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -100,6 +100,7 @@ DtypeObj, NumpyIndexT, Scalar, + TimeUnit, ) from pandas import Index @@ -567,6 +568,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan): # different unit, e.g. passed np.timedelta64(24, "h") with dtype=m8[ns] # see if we can losslessly cast it to our dtype unit = np.datetime_data(dtype)[0] + unit = cast("TimeUnit", unit) try: td = Timedelta(fill_value).as_unit(unit, round_ok=False) except OutOfBoundsTimedelta: diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 17bd183d9f87c..90cc88f165445 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -781,10 +781,10 @@ def base(self) -> DtypeObj: # type: ignore[override] def str(self) -> str: # type: ignore[override] return f"|M8[{self.unit}]" - def __init__(self, unit: str_type | DatetimeTZDtype = "ns", tz=None) -> None: + def __init__(self, unit: TimeUnit | DatetimeTZDtype = "ns", tz=None) -> None: if isinstance(unit, DatetimeTZDtype): # error: "str" has no attribute "tz" - unit, tz = unit.unit, unit.tz # type: ignore[attr-defined] + unit, tz = unit.unit, unit.tz # type: ignore[union-attr] if unit != "ns": if isinstance(unit, str) and tz is None: @@ -895,7 +895,8 @@ def construct_from_string(cls, string: str_type) -> DatetimeTZDtype: if match: d = match.groupdict() try: - return cls(unit=d["unit"], tz=d["tz"]) + unit = cast("TimeUnit", d["unit"]) + return cls(unit=unit, tz=d["tz"]) except (KeyError, TypeError, ValueError) as err: # KeyError if maybe_get_tz tries and fails to get a # zoneinfo timezone (actually zoneinfo.ZoneInfoNotFoundError). @@ -972,6 +973,7 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: if all(isinstance(t, DatetimeTZDtype) and t.tz == self.tz for t in dtypes): np_dtype = np.max([cast(DatetimeTZDtype, t).base for t in [self, *dtypes]]) unit = np.datetime_data(np_dtype)[0] + unit = cast("TimeUnit", unit) return type(self)(unit=unit, tz=self.tz) return super()._get_common_dtype(dtypes) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index b13da83084e5c..2a45bfc5dc237 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -8,6 +8,7 @@ TYPE_CHECKING, Any, Literal, + cast, ) import numpy as np @@ -49,6 +50,7 @@ from pandas._typing import ( DtypeObj, IntervalLeftRight, + TimeUnit, ) @@ -412,7 +414,7 @@ def _nbins_to_bins(x_idx: Index, nbins: int, right: bool) -> Index: # error: Argument 1 to "dtype_to_unit" has incompatible type # "dtype[Any] | ExtensionDtype"; expected "DatetimeTZDtype | dtype[Any]" unit = dtype_to_unit(x_idx.dtype) # type: ignore[arg-type] - td = Timedelta(seconds=1).as_unit(unit) + td = Timedelta(seconds=1).as_unit(cast("TimeUnit", unit)) # Use DatetimeArray/TimedeltaArray method instead of linspace # error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]" # has no attribute "_generate_range" @@ -595,6 +597,7 @@ def _format_labels( # error: Argument 1 to "dtype_to_unit" has incompatible type # "dtype[Any] | ExtensionDtype"; expected "DatetimeTZDtype | dtype[Any]" unit = dtype_to_unit(bins.dtype) # type: ignore[arg-type] + unit = cast("TimeUnit", unit) formatter = lambda x: x adjust = lambda x: x - Timedelta(1, unit=unit).as_unit(unit) else: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index a2c18ccb59899..c5c0aa4d61187 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -87,6 +87,7 @@ from pandas._libs.tslibs.nattype import NaTType from pandas._libs.tslibs.timedeltas import UnitChoices + from pandas._typing import TimeUnit from pandas import ( DataFrame, @@ -447,6 +448,7 @@ def _convert_listlike_datetimes( # We can take a shortcut since the datetime64 numpy array # is in UTC out_unit = np.datetime_data(result.dtype)[0] + out_unit = cast("TimeUnit", out_unit) dtype = tz_to_dtype(tz_parsed, out_unit) dt64_values = result.view(f"M8[{dtype.unit}]") dta = DatetimeArray._simple_new(dt64_values, dtype=dtype) @@ -469,6 +471,7 @@ def _array_strptime_with_fallback( result, tz_out = array_strptime(arg, fmt, exact=exact, errors=errors, utc=utc) if tz_out is not None: unit = np.datetime_data(result.dtype)[0] + unit = cast("TimeUnit", unit) dtype = DatetimeTZDtype(tz=tz_out, unit=unit) dta = DatetimeArray._simple_new(result, dtype=dtype) if utc: @@ -476,6 +479,7 @@ def _array_strptime_with_fallback( return Index(dta, name=name) elif result.dtype != object and utc: unit = np.datetime_data(result.dtype)[0] + unit = cast("TimeUnit", unit) res = Index(result, dtype=f"M8[{unit}, UTC]", name=name) return res return Index(result, dtype=result.dtype, name=name) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 0d6eb230714c0..d3819249b74d8 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -3,7 +3,10 @@ import datetime from functools import partial from textwrap import dedent -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + cast, +) import numpy as np @@ -57,6 +60,7 @@ if TYPE_CHECKING: from pandas._typing import ( TimedeltaConvertibleTypes, + TimeUnit, npt, ) @@ -122,6 +126,7 @@ def _calculate_deltas( Diff of the times divided by the half-life """ unit = dtype_to_unit(times.dtype) + unit = cast("TimeUnit", unit) if isinstance(times, ABCSeries): times = times._values _times = np.asarray(times.view(np.int64), dtype=np.float64) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index ab10935a60918..6d6efdb6b5b03 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -134,6 +134,7 @@ AxisInt, DtypeArg, FilePath, + TimeUnit, npt, ) @@ -5093,6 +5094,9 @@ def _set_tz( # Argument "tz" to "tz_to_dtype" has incompatible type "str | tzinfo | None"; # expected "tzinfo" unit, _ = np.datetime_data(datetime64_dtype) # parsing dtype: unit, count + unit = cast("TimeUnit", unit) + # error: Argument "tz" to "tz_to_dtype" has incompatible type + # "str | tzinfo | None"; expected "tzinfo" dtype = tz_to_dtype(tz=tz, unit=unit) # type: ignore[arg-type] dta = DatetimeArray._from_sequence(values, dtype=dtype) return dta