From a998da890db696af5db732dfa85fe7ea36e6072f Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Tue, 19 Aug 2025 13:35:16 -0400 Subject: [PATCH 1/5] WIP: mypy support 1.17.1 --- pandas/core/_numba/executor.py | 6 +++++- pandas/core/util/hashing.py | 5 ++++- pandas/io/common.py | 18 +++++++++--------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/pandas/core/_numba/executor.py b/pandas/core/_numba/executor.py index 3f3ebe8dbe023..030edebccc5cd 100644 --- a/pandas/core/_numba/executor.py +++ b/pandas/core/_numba/executor.py @@ -87,7 +87,11 @@ def column_looper( else: @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) - def column_looper( + # error: Incompatible redefinition (redefinition with type + # "Callable[[ndarray[Any, Any], ndarray[Any, Any], ndarray[Any, Any], + # int, VarArg(Any)], Any]", original type "Callable[[ndarray[Any, Any], + # ndarray[Any, Any], int, int, VarArg(Any)], Any]") + def column_looper( # type: ignore[misc] values: np.ndarray, start: np.ndarray, end: np.ndarray, diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index e120e69dc27cf..895706f501c15 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -344,4 +344,7 @@ def _hash_ndarray( vals ^= vals >> 27 vals *= np.uint64(0x94D049BB133111EB) vals ^= vals >> 31 - return vals + # error: Incompatible return value type (got "Any | ndarray[tuple[int, ...], + # dtype[signedinteger[Any]]]", expected "ndarray[tuple[int, ...], + # dtype[unsignedinteger[_64Bit]]]") + return vals # type: ignore[return-value] diff --git a/pandas/io/common.py b/pandas/io/common.py index d7aca27aa7643..507a7d0ea83db 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -951,9 +951,7 @@ def get_handle( ) -# error: Definition of "__enter__" in base class "IOBase" is incompatible -# with definition in base class "BinaryIO" -class _BufferedWriter(BytesIO, ABC): # type: ignore[misc] +class _BufferedWriter(BytesIO, ABC): """ Some objects do not support multiple .write() calls (TarFile and ZipFile). This wrapper writes to the underlying buffer on close. @@ -990,9 +988,10 @@ def __init__( super().__init__() self.archive_name = archive_name self.name = name - # error: Incompatible types in assignment (expression has type "TarFile", - # base class "_BufferedWriter" defined the type as "BytesIO") - self.buffer: tarfile.TarFile = tarfile.TarFile.open( # type: ignore[assignment] + # error: No overload variant of "open" of "TarFile" matches argument + # types "str | None", "str", "ReadBuffer[bytes] | WriteBuffer[bytes] | None", + # "dict[str, Any]" + self.buffer: tarfile.TarFile = tarfile.TarFile.open( # type: ignore[call-overload] name=name, mode=self.extend_mode(mode), fileobj=fileobj, @@ -1045,9 +1044,10 @@ def __init__( self.archive_name = archive_name kwargs.setdefault("compression", zipfile.ZIP_DEFLATED) - # error: Incompatible types in assignment (expression has type "ZipFile", - # base class "_BufferedWriter" defined the type as "BytesIO") - self.buffer: zipfile.ZipFile = zipfile.ZipFile( # type: ignore[assignment] + # error: No overload variant of "ZipFile" matches argument types + # "str | PathLike[str] | ReadBuffer[bytes] | WriteBuffer[bytes]", + # "str", "dict[str, Any]" + self.buffer: zipfile.ZipFile = zipfile.ZipFile( # type: ignore[call-overload] file, mode, **kwargs ) From a01c099bb2f3a3e78baf4c51af53d96c2f57a6d3 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 30 Aug 2025 18:37:44 -0400 Subject: [PATCH 2/5] upgrade mypy to 1.17.1 and pyright to 1.1.404 and make changes to support that --- .pre-commit-config.yaml | 2 +- environment.yml | 2 +- pandas/_libs/tslibs/dtypes.pyi | 77 ++++++++++---------- pandas/_testing/__init__.py | 3 +- pandas/_testing/_io.py | 12 ++- pandas/core/algorithms.py | 11 ++- pandas/core/apply.py | 5 +- pandas/core/array_algos/quantile.py | 2 +- pandas/core/arrays/_mixins.py | 4 +- pandas/core/arrays/boolean.py | 2 +- pandas/core/arrays/categorical.py | 8 +- pandas/core/arrays/datetimelike.py | 6 +- pandas/core/arrays/datetimes.py | 14 ++-- pandas/core/arrays/floating.py | 11 ++- pandas/core/arrays/integer.py | 11 ++- pandas/core/arrays/masked.py | 7 +- pandas/core/arrays/numpy_.py | 4 +- pandas/core/arrays/period.py | 11 +-- pandas/core/arrays/timedeltas.py | 6 +- pandas/core/common.py | 4 +- pandas/core/computation/expr.py | 12 ++- pandas/core/groupby/groupby.py | 26 +++---- pandas/core/indexes/base.py | 11 ++- pandas/core/indexes/datetimelike.py | 3 +- pandas/core/indexes/frozen.py | 10 +-- pandas/core/indexes/multi.py | 6 +- pandas/core/interchange/column.py | 2 +- pandas/core/internals/blocks.py | 34 ++++----- pandas/core/internals/construction.py | 20 +++-- pandas/core/nanops.py | 2 +- pandas/core/reshape/merge.py | 13 ++-- pandas/core/reshape/reshape.py | 5 +- pandas/core/series.py | 4 +- pandas/core/sorting.py | 10 ++- pandas/core/tools/datetimes.py | 2 +- pandas/core/window/common.py | 4 +- pandas/core/window/rolling.py | 2 +- pandas/io/common.py | 8 +- pandas/io/excel/_openpyxl.py | 4 +- pandas/io/formats/csvs.py | 4 +- pandas/io/parsers/c_parser_wrapper.py | 77 ++++++++------------ pandas/io/parsers/python_parser.py | 27 ++----- pandas/io/parsers/readers.py | 19 ++--- pandas/io/pytables.py | 2 +- pandas/io/sas/sas7bdat.py | 4 +- pandas/plotting/_matplotlib/core.py | 6 +- pandas/tests/frame/test_ufunc.py | 4 +- pandas/tests/tseries/holiday/test_holiday.py | 10 +-- requirements-dev.txt | 2 +- scripts/run_stubtest.py | 40 +++++++++- 50 files changed, 303 insertions(+), 272 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 51be4c3f77973..54ded38e77ef1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -121,7 +121,7 @@ repos: types: [python] stages: [manual] additional_dependencies: &pyright_dependencies - - pyright@1.1.383 + - pyright@1.1.404 - id: pyright # note: assumes python env is setup and activated name: pyright reportGeneralTypeIssues diff --git a/environment.yml b/environment.yml index 29ce9e8a03446..5f73e4ccf6463 100644 --- a/environment.yml +++ b/environment.yml @@ -77,7 +77,7 @@ dependencies: # code checks - flake8=7.1.0 # run in subprocess over docstring examples - - mypy=1.13.0 # pre-commit uses locally installed mypy + - mypy=1.17.1 # pre-commit uses locally installed mypy - tokenize-rt # scripts/check_for_inconsistent_pandas_namespace.py - pre-commit>=4.2.0 diff --git a/pandas/_libs/tslibs/dtypes.pyi b/pandas/_libs/tslibs/dtypes.pyi index 1a87e753b2061..821c465986203 100644 --- a/pandas/_libs/tslibs/dtypes.pyi +++ b/pandas/_libs/tslibs/dtypes.pyi @@ -28,33 +28,35 @@ class PeriodDtypeBase: def _td64_unit(self) -> str: ... class FreqGroup(Enum): - FR_ANN: int - FR_QTR: int - FR_MTH: int - FR_WK: int - FR_BUS: int - FR_DAY: int - FR_HR: int - FR_MIN: int - FR_SEC: int - FR_MS: int - FR_US: int - FR_NS: int - FR_UND: int + _value_: int + FR_ANN = ... + FR_QTR = ... + FR_MTH = ... + FR_WK = ... + FR_BUS = ... + FR_DAY = ... + FR_HR = ... + FR_MIN = ... + FR_SEC = ... + FR_MS = ... + FR_US = ... + FR_NS = ... + FR_UND = ... @staticmethod def from_period_dtype_code(code: int) -> FreqGroup: ... class Resolution(Enum): - RESO_NS: int - RESO_US: int - RESO_MS: int - RESO_SEC: int - RESO_MIN: int - RESO_HR: int - RESO_DAY: int - RESO_MTH: int - RESO_QTR: int - RESO_YR: int + _value_: int + RESO_NS = ... + RESO_US = ... + RESO_MS = ... + RESO_SEC = ... + RESO_MIN = ... + RESO_HR = ... + RESO_DAY = ... + RESO_MTH = ... + RESO_QTR = ... + RESO_YR = ... def __lt__(self, other: Resolution) -> bool: ... def __ge__(self, other: Resolution) -> bool: ... @property @@ -67,17 +69,18 @@ class Resolution(Enum): def attr_abbrev(self) -> str: ... class NpyDatetimeUnit(Enum): - NPY_FR_Y: int - NPY_FR_M: int - NPY_FR_W: int - NPY_FR_D: int - NPY_FR_h: int - NPY_FR_m: int - NPY_FR_s: int - NPY_FR_ms: int - NPY_FR_us: int - NPY_FR_ns: int - NPY_FR_ps: int - NPY_FR_fs: int - NPY_FR_as: int - NPY_FR_GENERIC: int + _value_: int + NPY_FR_Y = ... + NPY_FR_M = ... + NPY_FR_W = ... + NPY_FR_D = ... + NPY_FR_h = ... + NPY_FR_m = ... + NPY_FR_s = ... + NPY_FR_ms = ... + NPY_FR_us = ... + NPY_FR_ns = ... + NPY_FR_ps = ... + NPY_FR_fs = ... + NPY_FR_as = ... + NPY_FR_GENERIC = ... diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index fc447aaba37db..f19aa0bb487e4 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -348,8 +348,9 @@ class SubclassedDataFrame(DataFrame): def _constructor(self): return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs) + # error: Cannot override writeable attribute with read-only property @property - def _constructor_sliced(self): + def _constructor_sliced(self): # type: ignore[override] return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs) diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py index e1841c95dcdfe..aaa40541c3edb 100644 --- a/pandas/_testing/_io.py +++ b/pandas/_testing/_io.py @@ -81,12 +81,12 @@ def round_trip_pathlib(writer, reader, path: str | None = None): if path is None: path = "___pathlib___" with ensure_clean(path) as path: - writer(Path(path)) # type: ignore[arg-type] - obj = reader(Path(path)) # type: ignore[arg-type] + writer(Path(path)) + obj = reader(Path(path)) return obj -def write_to_compressed(compression, path, data, dest: str = "test") -> None: +def write_to_compressed(compression, path: str, data, dest: str = "test") -> None: """ Write data to a compressed file. @@ -138,5 +138,9 @@ def write_to_compressed(compression, path, data, dest: str = "test") -> None: else: raise ValueError(f"Unrecognized compression type: {compression}") - with compress_method(path, mode=mode) as f: + # error: No overload variant of "ZipFile" matches argument types "str", "str" + # error: No overload variant of "BZ2File" matches argument types "str", "str" + # error: Argument "mode" to "TarFile" has incompatible type "str"; + # expected "Literal['r', 'a', 'w', 'x'] + with compress_method(path, mode=mode) as f: # type: ignore[call-overload, arg-type] getattr(f, method)(*args) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 533b9b689af0b..170f2dbf8b1fc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -216,7 +216,7 @@ def _reconstruct_data( values = cls._from_sequence(values, dtype=dtype) # type: ignore[assignment] else: - values = values.astype(dtype, copy=False) # type: ignore[assignment] + values = values.astype(dtype, copy=False) return values @@ -904,7 +904,10 @@ def value_counts_internal( .size() ) result.index.names = values.names - counts = result._values + # error: Incompatible types in assignment (expression has type + # "ndarray[Any, Any] | DatetimeArray | TimedeltaArray | PeriodArray | Any", + # variable has type "ndarray[tuple[int, ...], dtype[Any]]") + counts = result._values # type: ignore[assignment] else: values = _ensure_arraylike(values, func_name="value_counts") @@ -1311,7 +1314,7 @@ def searchsorted( _diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"} -def diff(arr, n: int, axis: AxisInt = 0): +def diff(arr, n: int | float | np.integer, axis: AxisInt = 0): """ difference of n between self, analogous to s-s.shift(n) @@ -1400,7 +1403,7 @@ def diff(arr, n: int, axis: AxisInt = 0): if arr.dtype.name in _diff_special: # TODO: can diff_2d dtype specialization troubles be fixed by defining # out_arr inside diff_2d? - algos.diff_2d(arr, out_arr, n, axis, datetimelike=is_timedelta) + algos.diff_2d(arr, out_arr, int(n), axis, datetimelike=is_timedelta) else: # To keep mypy happy, _res_indexer is a list while res_indexer is # a tuple, ditto for lag_indexer. diff --git a/pandas/core/apply.py b/pandas/core/apply.py index e228d20b359c6..9e37239c287b5 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1645,7 +1645,7 @@ def agg_or_apply_dict_like( assert op_name in ["agg", "apply"] obj = self.obj - kwargs = {} + kwargs: dict[str, Any] = {} if op_name == "apply": by_row = "_compat" if self.by_row else False kwargs.update({"by_row": by_row}) @@ -2012,7 +2012,8 @@ def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]: for aggfunc in aggfuncs: if com.get_callable_name(aggfunc) == "": aggfunc = partial(aggfunc) - aggfunc.__name__ = f"" + # error: "partial[Any]" has no attribute "__name__"; maybe "__new__"? + aggfunc.__name__ = f"" # type: ignore[attr-defined] i += 1 mangled_aggfuncs.append(aggfunc) diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py index eb5026454552c..e15a986950cce 100644 --- a/pandas/core/array_algos/quantile.py +++ b/pandas/core/array_algos/quantile.py @@ -196,7 +196,7 @@ def _nanquantile( # Caller is responsible for ensuring mask shape match assert mask.shape == values.shape result = [ - _nanquantile_1d(val, m, qs, na_value, interpolation=interpolation) # type: ignore[arg-type] + _nanquantile_1d(val, m, qs, na_value, interpolation=interpolation) for (val, m) in zip(list(values), list(mask)) ] if values.dtype.kind == "f": diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index c4ad941970bd4..5fb5874371120 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -88,9 +88,7 @@ def method(self, *args, **kwargs): return cast(F, method) -# error: Definition of "delete/ravel/T/repeat/copy" in base class "NDArrayBacked" -# is incompatible with definition in base class "ExtensionArray" -class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray): # type: ignore[misc] +class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray): """ ExtensionArray that is backed by a single NumPy ndarray. """ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 10e695c1e221d..298e5ddb049ec 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -368,7 +368,7 @@ def _coerce_to_array( assert dtype == "boolean" return coerce_to_array(value, copy=copy) - def _logical_method(self, other, op): # type: ignore[override] + def _logical_method(self, other, op): assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"} other_is_scalar = lib.is_scalar(other) mask = None diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 78928713166f4..b7387f3200e38 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -241,9 +241,7 @@ def contains(cat, key, container) -> bool: return any(loc_ in container for loc_ in loc) -# error: Definition of "delete/ravel/T/repeat/copy" in base class "NDArrayBacked" -# is incompatible with definition in base class "ExtensionArray" -class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin): # type: ignore[misc] +class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin): """ Represent a categorical variable in classic R / S-plus fashion. @@ -2942,9 +2940,7 @@ def _validate(data) -> None: def _delegate_property_get(self, name: str): return getattr(self._parent, name) - # error: Signature of "_delegate_property_set" incompatible with supertype - # "PandasDelegate" - def _delegate_property_set(self, name: str, new_values) -> None: # type: ignore[override] + def _delegate_property_set(self, name: str, new_values) -> None: setattr(self._parent, name, new_values) @property diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 7e57b40e42430..3a2a6c1dde7c8 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -197,11 +197,7 @@ def new_meth(self, *args, **kwargs): return cast(F, new_meth) -# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is -# incompatible with definition in base class "ExtensionArray" -class DatetimeLikeArrayMixin( # type: ignore[misc] - OpsMixin, NDArrayBackedExtensionArray -): +class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray): """ Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 67ee16fd3a34e..38be038efcaa5 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -76,6 +76,7 @@ if TYPE_CHECKING: from collections.abc import ( + Callable, Generator, Iterator, ) @@ -83,6 +84,7 @@ from pandas._typing import ( ArrayLike, DateTimeErrorChoices, + DtypeObj, IntervalClosedType, TimeAmbiguous, TimeNonexistent, @@ -168,9 +170,7 @@ def f(self): return property(f) -# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is -# incompatible with definition in base class "ExtensionArray" -class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): # type: ignore[misc] +class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): """ Pandas ExtensionArray for tz-naive or tz-aware datetime data. @@ -225,9 +225,9 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): # type: ignore[misc] _typ = "datetimearray" _internal_fill_value = np.datetime64("NaT", "ns") _recognized_scalars = (datetime, np.datetime64) - _is_recognized_dtype = lambda x: lib.is_np_dtype(x, "M") or isinstance( - x, DatetimeTZDtype - ) + _is_recognized_dtype: Callable[[DtypeObj], bool] = lambda x: lib.is_np_dtype( + x, "M" + ) or isinstance(x, DatetimeTZDtype) _infer_matches = ("datetime", "datetime64", "date") @property @@ -322,7 +322,7 @@ def _simple_new( # type: ignore[override] else: # DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC], # then values.dtype should be M8[us]. - assert dtype._creso == get_unit_from_dtype(values.dtype) # type: ignore[union-attr] + assert dtype._creso == get_unit_from_dtype(values.dtype) result = super()._simple_new(values, dtype) result._freq = freq diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 2db95b4baee75..8a04fca42c082 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -1,6 +1,10 @@ from __future__ import annotations -from typing import ClassVar +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, +) import numpy as np @@ -12,6 +16,9 @@ NumericDtype, ) +if TYPE_CHECKING: + from collections.abc import Callable + class FloatingDtype(NumericDtype): """ @@ -26,7 +33,7 @@ class FloatingDtype(NumericDtype): # The value used to fill '_data' to avoid upcasting _internal_fill_value = np.nan _default_np_dtype = np.dtype(np.float64) - _checker = is_float_dtype + _checker: Callable[[Any], bool] = is_float_dtype def construct_array_type(self) -> type[FloatingArray]: """ diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index b14044de287fa..9ed776317bd8e 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -1,6 +1,10 @@ from __future__ import annotations -from typing import ClassVar +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, +) import numpy as np @@ -12,6 +16,9 @@ NumericDtype, ) +if TYPE_CHECKING: + from collections.abc import Callable + class IntegerDtype(NumericDtype): """ @@ -26,7 +33,7 @@ class IntegerDtype(NumericDtype): # The value used to fill '_data' to avoid upcasting _internal_fill_value = 1 _default_np_dtype = np.dtype(np.int64) - _checker = is_integer_dtype + _checker: Callable[[Any], bool] = is_integer_dtype def construct_array_type(self) -> type[IntegerArray]: """ diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 0402452e484ea..6bc545d08828e 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -713,8 +713,7 @@ def _hasna(self) -> bool: # make this faster by having an optional mask, but not have to change # source code using it.. - # error: Incompatible return value type (got "bool_", expected "bool") - return self._mask.any() # type: ignore[return-value] + return cast(bool, self._mask.any()) def _propagate_mask( self, mask: npt.NDArray[np.bool_] | None, other @@ -728,9 +727,7 @@ def _propagate_mask( mask = mask | isna(other) else: mask = self._mask | mask - # Incompatible return value type (got "Optional[ndarray[Any, dtype[bool_]]]", - # expected "ndarray[Any, dtype[bool_]]") - return mask # type: ignore[return-value] + return mask def _arith_method(self, other, op): op_name = op.__name__ diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index cef20da195f43..ef64bda3dc504 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -50,9 +50,7 @@ from pandas import Index -# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is -# incompatible with definition in base class "ExtensionArray" -class NumpyExtensionArray( # type: ignore[misc] +class NumpyExtensionArray( OpsMixin, NDArrayBackedExtensionArray, ObjectStringArrayMixin, diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 1670c0f5da605..180080da4cd00 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -84,6 +84,7 @@ from pandas._typing import ( AnyArrayLike, Dtype, + DtypeObj, FillnaOptions, NpDtype, NumpySorter, @@ -181,7 +182,7 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc] _typ = "periodarray" # ABCPeriodArray _internal_fill_value = np.int64(iNaT) _recognized_scalars = (Period,) - _is_recognized_dtype = lambda x: isinstance( + _is_recognized_dtype: Callable[[DtypeObj], bool] = lambda x: isinstance( x, PeriodDtype ) # check_compatible_with checks freq match _infer_matches = ("period",) @@ -376,8 +377,8 @@ def dtype(self) -> PeriodDtype: return self._dtype # error: Cannot override writeable attribute with read-only property - @property # type: ignore[override] - def freq(self) -> BaseOffset: + @property + def freq(self) -> BaseOffset: # type: ignore[override] """ Return the frequency object for this PeriodArray. """ @@ -1435,11 +1436,11 @@ def _range_from_fields( if quarter is not None: if freq is None: freq = to_offset("Q", is_period=True) - base = FreqGroup.FR_QTR.value + base = cast(int, FreqGroup.FR_QTR.value) else: freq = to_offset(freq, is_period=True) base = libperiod.freq_to_dtype_code(freq) - if base != FreqGroup.FR_QTR.value: + if base != cast(int, FreqGroup.FR_QTR.value): raise AssertionError("base must equal FR_QTR") freqstr = freq.freqstr diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index aa4a77e9eff16..79c3d74b39666 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -65,7 +65,7 @@ from pandas.core.ops.common import unpack_zerodim_and_defer if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Callable, Iterator from pandas._typing import ( AxisInt, @@ -149,7 +149,7 @@ class TimedeltaArray(dtl.TimelikeOps): _typ = "timedeltaarray" _internal_fill_value = np.timedelta64("NaT", "ns") _recognized_scalars = (timedelta, np.timedelta64, Tick) - _is_recognized_dtype = lambda x: lib.is_np_dtype(x, "m") + _is_recognized_dtype: Callable[[DtypeObj], bool] = lambda x: lib.is_np_dtype(x, "m") _infer_matches = ("timedelta", "timedelta64") @property @@ -553,7 +553,7 @@ def _scalar_divlike_op(self, other, op): freq = to_offset(Timedelta(days=self.freq.n)) / other else: freq = self.freq / other - if freq.nanos == 0 and self.freq.nanos != 0: # type: ignore[union-attr] + if freq.nanos == 0 and self.freq.nanos != 0: # e.g. if self.freq is Nano(1) then dividing by 2 # rounds down to zero freq = None diff --git a/pandas/core/common.py b/pandas/core/common.py index c25d910b73100..72b15b6e1bf4e 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -286,9 +286,9 @@ def index_labels_to_array( except TypeError: # non-iterable labels = [labels] - labels = asarray_tuplesafe(labels, dtype=dtype) + rlabels = asarray_tuplesafe(labels, dtype=dtype) - return labels + return rlabels def maybe_make_list(obj): diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index b53596fe28e70..71ff14167ae20 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -168,7 +168,10 @@ def _preparse( the ``tokenize`` module and ``tokval`` is a string. """ assert callable(f), "f must be callable" - return tokenize.untokenize(f(x) for x in tokenize_string(source)) # pyright: ignore[reportArgumentType] + return tokenize.untokenize( + f(x) + for x in tokenize_string(source) # pyright: ignore[reportArgumentType] + ) def _is_type(t): @@ -680,7 +683,7 @@ def visit_Call(self, node, side=None, **kwargs): if res is None: # error: "expr" has no attribute "id" raise ValueError( - f"Invalid function call {node.func.id}" # type: ignore[attr-defined] + f"Invalid function call {node.func.id}" # type: ignore[union-attr] ) if hasattr(res, "value"): res = res.value @@ -700,9 +703,10 @@ def visit_Call(self, node, side=None, **kwargs): for key in node.keywords: if not isinstance(key, ast.keyword): - # error: "expr" has no attribute "id" + # error: Item "Attribute" of "Attribute | Name" has no + # attribute "id" raise ValueError( - f"keyword error in function call '{node.func.id}'" # type: ignore[attr-defined] + f"keyword error in function call '{node.func.id}'" # type: ignore[union-attr] ) if key.arg: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ce347140edd04..c056c05b9b135 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2922,9 +2922,7 @@ def size(self) -> DataFrame | Series: ) if not self.as_index: - # error: Incompatible types in assignment (expression has - # type "DataFrame", variable has type "Series") - result = result.rename("size").reset_index() # type: ignore[assignment] + result = result.rename("size").reset_index() return result @final @@ -5754,25 +5752,27 @@ def get_groupby( grouper: ops.BaseGrouper | None = None, group_keys: bool = True, ) -> GroupBy: - klass: type[GroupBy] if isinstance(obj, Series): from pandas.core.groupby.generic import SeriesGroupBy - klass = SeriesGroupBy + return SeriesGroupBy( + obj=obj, + keys=by, + grouper=grouper, + group_keys=group_keys, + ) elif isinstance(obj, DataFrame): from pandas.core.groupby.generic import DataFrameGroupBy - klass = DataFrameGroupBy + return DataFrameGroupBy( + obj=obj, + keys=by, + grouper=grouper, + group_keys=group_keys, + ) else: # pragma: no cover raise TypeError(f"invalid type: {obj}") - return klass( - obj=obj, - keys=by, - grouper=grouper, - group_keys=group_keys, - ) - def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiIndex: """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2ec774af15934..55dfb41b8c894 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6083,7 +6083,8 @@ def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray] # "Index" has no attribute "freq" and key.freq is None # type: ignore[attr-defined] ): - keyarr = keyarr._with_freq(None) + # error: "Index" has no attribute "_with_freq"; maybe "_with_infer"? + keyarr = keyarr._with_freq(None) # type: ignore[attr-defined] return keyarr, indexer @@ -6424,9 +6425,11 @@ def _transform_index(self, func, *, level=None) -> Index: """ if isinstance(self, ABCMultiIndex): values = [ - self.get_level_values(i).map(func) - if i == level or level is None - else self.get_level_values(i) + ( + self.get_level_values(i).map(func) + if i == level or level is None + else self.get_level_values(i) + ) for i in range(self.nlevels) ] return type(self).from_arrays(values) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 62831f941b535..7e6461f0fab5e 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -599,7 +599,8 @@ def _intersection(self, other: Index, sort: bool = False) -> Index: # At this point we should have result.dtype == self.dtype # and type(result) is type(self._data) result = self._wrap_setop_result(other, result) - return result._with_freq(None)._with_freq("infer") + # error: "Index" has no attribute "_with_freq"; maybe "_with_infer"? + return result._with_freq(None)._with_freq("infer") # type: ignore[attr-defined] else: return self._fast_intersect(other, sort) diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index bdecd8e3fefe5..f75699a9b745f 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -66,10 +66,8 @@ def difference(self, other) -> FrozenList: return type(self)(temp) # TODO: Consider deprecating these in favor of `union` (xref gh-15506) - # error: Incompatible types in assignment (expression has type - # "Callable[[FrozenList, Any], FrozenList]", base class "list" defined the - # type as overloaded function) - __add__ = __iadd__ = union # type: ignore[assignment] + + __add__ = __iadd__ = union # pyright: ignore[reportAssignmentType] def __getitem__(self, n): if isinstance(n, slice): @@ -114,7 +112,7 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"{type(self).__name__}({self!s})" - __setitem__ = __setslice__ = _disabled # type: ignore[assignment] + __setitem__ = __setslice__ = _disabled __delitem__ = __delslice__ = _disabled pop = append = extend = _disabled - remove = sort = insert = _disabled # type: ignore[assignment] + remove = sort = insert = _disabled # pyright: ignore[reportAssignmentType] diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0290fae46c9a1..7d18c85c98bbe 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1798,8 +1798,7 @@ def _get_level_values(self, level: int, unique: bool = False) -> Index: filled = algos.take_nd(lev._values, level_codes, fill_value=lev._na_value) return lev._shallow_copy(filled, name=name) - # error: Signature of "get_level_values" incompatible with supertype "Index" - def get_level_values(self, level) -> Index: # type: ignore[override] + def get_level_values(self, level) -> Index: """ Return vector of label values for requested level. @@ -3937,8 +3936,7 @@ def _get_reconciled_name_object(self, other) -> MultiIndex: """ names = self._maybe_match_names(other) if self.names != names: - # error: Cannot determine type of "rename" - return self.rename(names) # type: ignore[has-type] + return self.rename(names) return self def _maybe_match_names(self, other): diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py index c27a9d8141712..1fbffe09278fc 100644 --- a/pandas/core/interchange/column.py +++ b/pandas/core/interchange/column.py @@ -335,7 +335,7 @@ def _get_data_buffer( # this is already single-chunk by the time we get here. arr = arr._pa_array.chunks[0] # type: ignore[attr-defined] buffer = PandasBufferPyarrow( - arr.buffers()[1], # type: ignore[attr-defined] + arr.buffers()[1], length=len(arr), ) return buffer, dtype diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 5468a0de2a41d..5a11eb88160bb 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -24,18 +24,6 @@ BlockValuesRefs, ) from pandas._libs.missing import NA -from pandas._typing import ( - ArrayLike, - AxisInt, - DtypeBackend, - DtypeObj, - FillnaOptions, - IgnoreRaise, - InterpolateOptions, - QuantileInterpolation, - Shape, - npt, -) from pandas.errors import ( AbstractMethodError, OutOfBoundsDatetime, @@ -127,6 +115,19 @@ Sequence, ) + from pandas._typing import ( + ArrayLike, + AxisInt, + DtypeBackend, + DtypeObj, + FillnaOptions, + IgnoreRaise, + InterpolateOptions, + QuantileInterpolation, + Shape, + npt, + ) + from pandas.core.api import Index from pandas.core.arrays._mixins import NDArrayBackedExtensionArray @@ -816,12 +817,7 @@ def replace_list( na_mask = ~isna(values) masks: Iterable[npt.NDArray[np.bool_]] = ( extract_bool_array( - cast( - ArrayLike, - compare_or_regex_search( - values, s[0], regex=regex, mask=na_mask - ), - ) + compare_or_regex_search(values, s[0], regex=regex, mask=na_mask), ) for s in pairs ) @@ -2106,7 +2102,7 @@ def _unstack( self.values.take( indices, allow_fill=needs_masking[i], fill_value=fill_value ), - BlockPlacement(place), # type: ignore[arg-type] + BlockPlacement(place), ndim=2, ) for i, (indices, place) in enumerate(zip(new_values, new_placement)) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index a3e2bbbacac4e..a8c143bda7190 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -422,14 +422,18 @@ def dict_to_mgr( # We only need to copy arrays that will not get consolidated, i.e. # only EA arrays arrays = [ - x.copy() - if isinstance(x, ExtensionArray) - else x.copy(deep=True) - if ( - isinstance(x, Index) - or (isinstance(x, ABCSeries) and is_1d_only_ea_dtype(x.dtype)) + ( + x.copy() + if isinstance(x, ExtensionArray) + else ( + x.copy(deep=True) + if ( + isinstance(x, Index) + or (isinstance(x, ABCSeries) and is_1d_only_ea_dtype(x.dtype)) + ) + else x + ) ) - else x for x in arrays ] @@ -644,7 +648,7 @@ def reorder_arrays( arr = np.empty(length, dtype=object) arr.fill(np.nan) else: - arr = arrays[k] # type: ignore[assignment] + arr = arrays[k] new_arrays.append(arr) arrays = new_arrays diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 666b108717837..bb0c3759fae34 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -603,7 +603,7 @@ def nansum( skipna: bool = True, min_count: int = 0, mask: npt.NDArray[np.bool_] | None = None, -) -> float: +) -> npt.NDArray[np.floating] | float | NaTType: """ Sum the elements along an axis ignoring NaNs diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 285256ac7b16a..8786ce361c900 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1356,9 +1356,11 @@ def _maybe_add_join_keys( if isinstance(result.index, MultiIndex): key_col.name = name idx_list = [ - result.index.get_level_values(level_name) - if level_name != name - else key_col + ( + result.index.get_level_values(level_name) + if level_name != name + else key_col + ) for level_name in result.index.names ] @@ -2185,9 +2187,8 @@ def _convert_to_multiindex(index: Index) -> MultiIndex: else: restore_codes = algos.take_nd(codes, indexer, fill_value=-1) - # error: Cannot determine type of "__add__" - join_levels = join_levels + [restore_levels] # type: ignore[has-type] - join_codes = join_codes + [restore_codes] # type: ignore[has-type] + join_levels = join_levels + [restore_levels] + join_codes = join_codes + [restore_codes] join_names = join_names + [dropped_level_name] return join_levels, join_codes, join_names diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index d2a838b616426..dd22f900be926 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -351,10 +351,7 @@ def get_new_columns(self, value_columns: Index | None): new_levels: FrozenList | list[Index] if isinstance(value_columns, MultiIndex): - # error: Cannot determine type of "__add__" [has-type] - new_levels = value_columns.levels + ( # type: ignore[has-type] - self.removed_level_full, - ) + new_levels = value_columns.levels + (self.removed_level_full,) # pyright: ignore[reportOperatorIssue] new_names = value_columns.names + (self.removed_name,) new_codes = [lab.take(propagator) for lab in value_columns.codes] diff --git a/pandas/core/series.py b/pandas/core/series.py index 63c9963fb7eac..7190461adf431 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -351,9 +351,7 @@ class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] __pandas_priority__ = 3000 # Override cache_readonly bc Series is mutable - # error: Incompatible types in assignment (expression has type "property", - # base class "IndexOpsMixin" defined the type as "Callable[[IndexOpsMixin], bool]") - hasnans = property( # type: ignore[assignment] + hasnans = property( # error: "Callable[[IndexOpsMixin], bool]" has no attribute "fget" base.IndexOpsMixin.hasnans.fget, # type: ignore[attr-defined] doc=base.IndexOpsMixin.hasnans.__doc__, diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 18983af12976c..a9827767455f7 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -476,7 +476,7 @@ def nargminmax(values: ExtensionArray, method: str, axis: AxisInt = 0): zipped = zip(arr_values, mask) else: zipped = zip(arr_values.T, mask.T) - return np.array([_nanargminmax(v, m, func) for v, m in zipped]) # type: ignore[arg-type] + return np.array([_nanargminmax(v, m, func) for v, m in zipped]) return func(arr_values, axis=axis) return _nanargminmax(arr_values, mask, func) @@ -533,9 +533,11 @@ def _ensure_key_mapped_multiindex( sort_levels = range(index.nlevels) mapped = [ - ensure_key_mapped(index._get_level_values(level), key) - if level in sort_levels - else index._get_level_values(level) + ( + ensure_key_mapped(index._get_level_values(level), key) + if level in sort_levels + else index._get_level_values(level) + ) for level in range(index.nlevels) ] diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 1b236deff330d..040fcd02ab211 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -528,7 +528,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: utc=utc, errors=errors, unit_for_numerics=unit, - creso=NpyDatetimeUnit.NPY_FR_ns.value, + creso=cast(int, NpyDatetimeUnit.NPY_FR_ns.value), ) result = DatetimeIndex(arr, name=name) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 004a3555f0212..32af6e8b96584 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -106,7 +106,9 @@ def dataframe_from_int_dict(data, frame_template) -> DataFrame: [*arg2_levels, result_level], names=result_names ) # GH 34440 - num_levels = len(result.index.levels) + num_levels = len( + result.index.levels # pyright: ignore[reportAttributeAccessIssue] + ) new_order = [num_levels - 1] + list(range(num_levels - 1)) result = result.reorder_levels(new_order).sort_index() else: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b2dd92c9c7d60..ed9181eb600d3 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -665,7 +665,7 @@ def aggregate(self, func=None, *args, **kwargs): result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() if isinstance(result, ABCDataFrame) and relabeling: result = result.iloc[:, order] - result.columns = columns # type: ignore[union-attr] + result.columns = columns if result is None: return self.apply(func, raw=False, args=args, kwargs=kwargs) return result diff --git a/pandas/io/common.py b/pandas/io/common.py index 507a7d0ea83db..b192e3bcc8e68 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -991,7 +991,9 @@ def __init__( # error: No overload variant of "open" of "TarFile" matches argument # types "str | None", "str", "ReadBuffer[bytes] | WriteBuffer[bytes] | None", # "dict[str, Any]" - self.buffer: tarfile.TarFile = tarfile.TarFile.open( # type: ignore[call-overload] + # error: Incompatible types in assignment (expression has type "TarFile", + # base class "_BufferedWriter" defined the type as "BytesIO") + self.buffer: tarfile.TarFile = tarfile.TarFile.open( # type: ignore[call-overload, assignment] name=name, mode=self.extend_mode(mode), fileobj=fileobj, @@ -1047,7 +1049,9 @@ def __init__( # error: No overload variant of "ZipFile" matches argument types # "str | PathLike[str] | ReadBuffer[bytes] | WriteBuffer[bytes]", # "str", "dict[str, Any]" - self.buffer: zipfile.ZipFile = zipfile.ZipFile( # type: ignore[call-overload] + # error: Incompatible types in assignment (expression has type "ZipFile", + # base class "_BufferedWriter" defined the type as "BytesIO") + self.buffer: zipfile.ZipFile = zipfile.ZipFile( # type: ignore[call-overload, assignment] file, mode, **kwargs ) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 3055c68a93cbc..867d11583dcc0 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -114,7 +114,9 @@ def _save(self) -> None: self._handles.handle.truncate() @classmethod - def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]: + def _convert_to_style_kwargs( + cls, style_dict: dict[str, Serialisable] + ) -> dict[str, Serialisable]: """ Convert a style_dict to a set of kwargs suitable for initializing or updating-on-copy an openpyxl v2 style object. diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 1b9eb6303fe74..762fee465c008 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -257,11 +257,13 @@ def save(self) -> None: storage_options=self.storage_options, ) as handles: # Note: self.encoding is irrelevant here + # error: Argument "quoting" to "writer" has incompatible type "int"; + # expected "Literal[0, 1, 2, 3]" self.writer = csvlib.writer( handles.handle, lineterminator=self.lineterminator, delimiter=self.sep, - quoting=self.quoting, + quoting=self.quoting, # type: ignore[arg-type] doublequote=self.doublequote, escapechar=self.escapechar, quotechar=self.quotechar, diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index aa9f3556c8f62..4304d27d74ea9 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -70,10 +70,7 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None: self.low_memory = kwds.pop("low_memory", False) # #2442 - # error: Cannot determine type of 'index_col' - kwds["allow_leading_cols"] = ( - self.index_col is not False # type: ignore[has-type] - ) + kwds["allow_leading_cols"] = self.index_col is not False # GH20529, validate usecol arg before TextReader kwds["usecols"] = self.usecols @@ -99,27 +96,23 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None: self.unnamed_cols = self._reader.unnamed_cols - # error: Cannot determine type of 'names' - passed_names = self.names is None # type: ignore[has-type] + passed_names = self.names is None if self._reader.header is None: self.names = None else: - # error: Cannot determine type of 'names' - # error: Cannot determine type of 'index_names' ( - self.names, # type: ignore[has-type] + self.names, self.index_names, self.col_names, passed_names, ) = self._extract_multi_indexer_columns( self._reader.header, - self.index_names, # type: ignore[has-type] + self.index_names, passed_names, ) - # error: Cannot determine type of 'names' - if self.names is None: # type: ignore[has-type] + if self.names is None: self.names = list(range(self._reader.table_width)) # gh-9755 @@ -130,8 +123,7 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None: # # once names has been filtered, we will # then set orig_names again to names - # error: Cannot determine type of 'names' - self.orig_names = self.names[:] # type: ignore[has-type] + self.orig_names = self.names[:] if self.usecols: usecols = evaluate_callable_usecols(self.usecols, self.orig_names) @@ -144,45 +136,32 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None: ): self._validate_usecols_names(usecols, self.orig_names) - # error: Cannot determine type of 'names' - if len(self.names) > len(usecols): # type: ignore[has-type] - # error: Cannot determine type of 'names' - self.names = [ # type: ignore[has-type] + if len(self.names) > len(usecols): + self.names = [ n - # error: Cannot determine type of 'names' - for i, n in enumerate(self.names) # type: ignore[has-type] + for i, n in enumerate(self.names) if (i in usecols or n in usecols) ] - # error: Cannot determine type of 'names' - if len(self.names) < len(usecols): # type: ignore[has-type] - # error: Cannot determine type of 'names' + if len(self.names) < len(usecols): self._validate_usecols_names( usecols, - self.names, # type: ignore[has-type] + self.names, ) - # error: Cannot determine type of 'names' - validate_parse_dates_presence(self.parse_dates, self.names) # type: ignore[has-type] + validate_parse_dates_presence(self.parse_dates, self.names) self._set_noconvert_columns() - # error: Cannot determine type of 'names' - self.orig_names = self.names # type: ignore[has-type] + self.orig_names = self.names - # error: Cannot determine type of 'index_col' - if self._reader.leading_cols == 0 and is_index_col( - self.index_col # type: ignore[has-type] - ): + if self._reader.leading_cols == 0 and is_index_col(self.index_col): ( index_names, - # error: Cannot determine type of 'names' - self.names, # type: ignore[has-type] + self.names, self.index_col, ) = self._clean_index_names( - # error: Cannot determine type of 'names' - self.names, # type: ignore[has-type] - # error: Cannot determine type of 'index_col' - self.index_col, # type: ignore[has-type] + self.names, + self.index_col, ) if self.index_names is None: @@ -213,11 +192,10 @@ def _set_noconvert_columns(self) -> None: # much faster than using orig_names.index(x) xref GH#44106 names_dict = {x: i for i, x in enumerate(self.orig_names)} - col_indices = [names_dict[x] for x in self.names] # type: ignore[has-type] - # error: Cannot determine type of 'names' + col_indices = [names_dict[x] for x in self.names] noconvert_columns = self._set_noconvert_dtype_columns( col_indices, - self.names, # type: ignore[has-type] + self.names, ) for col in noconvert_columns: self._reader.set_noconvert(col) @@ -236,13 +214,15 @@ def read( if self.low_memory: chunks = self._reader.read_low_memory(nrows) # destructive to chunks - data = _concatenate_chunks(chunks, self.names) # type: ignore[has-type] - + data = _concatenate_chunks(chunks, self.names) else: data = self._reader.read(nrows) except StopIteration: if self._first_chunk: self._first_chunk = False + # assert for mypy, orig_names is List or None, None would error in + # list(...) in dedup_names + assert self.orig_names is not None names = dedup_names( self.orig_names, is_potential_multi_index(self.orig_names, self.index_col), @@ -271,8 +251,7 @@ def read( # Done with first read, next time raise StopIteration self._first_chunk = False - # error: Cannot determine type of 'names' - names = self.names # type: ignore[has-type] + names = self.names if self._reader.leading_cols: # implicit index, no index names @@ -294,9 +273,11 @@ def read( if self._should_parse_dates(i): values = date_converter( values, - col=self.index_names[i] - if self.index_names is not None - else None, + col=( + self.index_names[i] + if self.index_names is not None + else None + ), dayfirst=self.dayfirst, cache_dates=self.cache_dates, date_format=self.date_format, diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 70f0eefc55fd9..2b538f5e3cef4 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -165,7 +165,7 @@ def __init__(self, f: ReadCsvBuffer[str] | list, **kwds) -> None: _, ) = self._extract_multi_indexer_columns( columns, - self.index_names, # type: ignore[has-type] + self.index_names, ) # get popped off for index @@ -288,7 +288,7 @@ def read( self.orig_names, is_potential_multi_index( self.orig_names, - self.index_col, # type: ignore[has-type] + self.index_col, ), ) index, columns, col_dict = self._get_empty_meta( @@ -325,14 +325,13 @@ def _exclude_implicit_index( self.orig_names, is_potential_multi_index( self.orig_names, - self.index_col, # type: ignore[has-type] + self.index_col, ), ) offset = 0 if self._implicit_index: - # error: Cannot determine type of 'index_col' - offset = len(self.index_col) # type: ignore[has-type] + offset = len(self.index_col) len_alldata = len(alldata) self._check_data_length(names, alldata) @@ -667,8 +666,7 @@ def _infer_columns( # line for the rest of the parsing code if hr == header[-1]: lc = len(this_columns) - # error: Cannot determine type of 'index_col' - sic = self.index_col # type: ignore[has-type] + sic = self.index_col ic = len(sic) if sic is not None else 0 unnamed_count = len(this_unnamed_cols) @@ -1131,8 +1129,7 @@ def _get_index_name( if line is not None: # leave it 0, #2442 # Case 1 - # error: Cannot determine type of 'index_col' - index_col = self.index_col # type: ignore[has-type] + index_col = self.index_col if index_col is not False: implicit_first_cols = len(line) - self.num_original_columns @@ -1182,13 +1179,7 @@ def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]: # Check that there are no rows with too many # elements in their row (rows with too few # elements are padded with NaN). - # error: Non-overlapping identity check (left operand type: "List[int]", - # right operand type: "Literal[False]") - if ( - max_len > col_len - and self.index_col is not False # type: ignore[comparison-overlap] - and self.usecols is None - ): + if max_len > col_len and self.index_col is not False and self.usecols is None: footers = self.skipfooter if self.skipfooter else 0 bad_lines = [] @@ -1468,11 +1459,9 @@ def detect_colspecs( shifted[0] = 0 edges = np.where((mask ^ shifted) == 1)[0] edge_pairs = list(zip(edges[::2], edges[1::2])) - return edge_pairs # type: ignore[return-value] + return edge_pairs def __next__(self) -> list[str]: - # Argument 1 to "next" has incompatible type "Union[IO[str], - # ReadCsvBuffer[str]]"; expected "SupportsNext[str]" if self.buffer is not None: try: line = next(self.buffer) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index b872464147311..fa11bfefb9a6d 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -141,6 +141,7 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): float_precision: Literal["high", "legacy", "round_trip"] | None storage_options: StorageOptions | None dtype_backend: DtypeBackend | lib.NoDefault + else: _read_shared = dict @@ -796,10 +797,9 @@ def read_csv( skipfooter: int = 0, nrows: int | None = None, # NA and Missing Data Handling - na_values: Hashable - | Iterable[Hashable] - | Mapping[Hashable, Iterable[Hashable]] - | None = None, + na_values: ( + Hashable | Iterable[Hashable] | Mapping[Hashable, Iterable[Hashable]] | None + ) = None, keep_default_na: bool = True, na_filter: bool = True, skip_blank_lines: bool = True, @@ -932,10 +932,9 @@ def read_table( skipfooter: int = 0, nrows: int | None = None, # NA and Missing Data Handling - na_values: Hashable - | Iterable[Hashable] - | Mapping[Hashable, Iterable[Hashable]] - | None = None, + na_values: ( + Hashable | Iterable[Hashable] | Mapping[Hashable, Iterable[Hashable]] | None + ) = None, keep_default_na: bool = True, na_filter: bool = True, skip_blank_lines: bool = True, @@ -1835,6 +1834,8 @@ def _extract_dialect(kwds: dict[str, Any]) -> csv.Dialect | None: dialect = csv.get_dialect(dialect) _validate_dialect(dialect) + # For pyright, _validate_dialect makes sure it is a dialect + assert isinstance(dialect, csv.Dialect) return dialect @@ -1849,7 +1850,7 @@ def _extract_dialect(kwds: dict[str, Any]) -> csv.Dialect | None: ) -def _validate_dialect(dialect: csv.Dialect) -> None: +def _validate_dialect(dialect: csv.Dialect | Any) -> None: """ Validate csv dialect instance. diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index cfd5b3ac1f33f..0ec235de05855 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3384,7 +3384,7 @@ class BlockManagerFixed(GenericFixed): nblocks: int @property - def shape(self) -> Shape | None: + def shape(self) -> Shape | list[int] | None: try: ndim = self.ndim diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 792af5ff713a3..089576e5680cb 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -293,9 +293,7 @@ def _get_properties(self) -> None: # Read the rest of the header into cached_page. buf = self._path_or_buf.read(self.header_length - 288) self._cached_page += buf - # error: Argument 1 to "len" has incompatible type "Optional[bytes]"; - # expected "Sized" - if len(self._cached_page) != self.header_length: # type: ignore[arg-type] + if len(self._cached_page) != self.header_length: raise ValueError("The SAS7BDAT file appears to be truncated.") self._page_length = self._read_uint( diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index a7944ba301e68..b0817a1b4a04e 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -930,7 +930,9 @@ def _get_ax_legend(ax: Axes): """ leg = ax.get_legend() - other_ax = getattr(ax, "left_ax", None) or getattr(ax, "right_ax", None) + other_ax = cast( + "Axes", getattr(ax, "left_ax", None) or getattr(ax, "right_ax", None) + ) other_leg = None if other_ax is not None: other_leg = other_ax.get_legend() @@ -1977,7 +1979,7 @@ def _make_plot(self, fig: Figure) -> None: if i in _stacked_subplots_ind: offset_index = _stacked_subplots_ind[i] - pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] # type:ignore[assignment] + pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] # type: ignore[assignment] mask = y >= 0 start = np.where(mask, pos_prior, neg_prior) + self._start_base w = self.bar_width / 2 diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py index ce66d448a0d40..8d5a227652462 100644 --- a/pandas/tests/frame/test_ufunc.py +++ b/pandas/tests/frame/test_ufunc.py @@ -66,14 +66,14 @@ def test_binary_input_dispatch_binop(dtype): [ (np.add, 1, [2, 3, 4, 5]), ( - partial(np.add, where=[[False, True], [True, False]]), # type: ignore[misc] + partial(np.add, where=[[False, True], [True, False]]), np.array([[1, 1], [1, 1]]), [0, 3, 4, 0], ), (np.power, np.array([[1, 1], [2, 2]]), [1, 2, 9, 16]), (np.subtract, 2, [-1, 0, 1, 2]), ( - partial(np.negative, where=np.array([[False, True], [True, False]])), # type: ignore[misc] + partial(np.negative, where=np.array([[False, True], [True, False]])), None, [0, -2, -3, 0], ), diff --git a/pandas/tests/tseries/holiday/test_holiday.py b/pandas/tests/tseries/holiday/test_holiday.py index eeca75c4e1095..966d6bad46fbf 100644 --- a/pandas/tests/tseries/holiday/test_holiday.py +++ b/pandas/tests/tseries/holiday/test_holiday.py @@ -3,24 +3,24 @@ timezone, ) +from dateutil.relativedelta import MO import pytest from pandas import ( + DateOffset, DatetimeIndex, Series, + Timestamp, ) import pandas._testing as tm from pandas.tseries.holiday import ( - MO, SA, AbstractHolidayCalendar, - DateOffset, EasterMonday, GoodFriday, Holiday, HolidayCalendarFactory, - Timestamp, USColumbusDay, USFederalHolidayCalendar, USLaborDay, @@ -361,7 +361,7 @@ def test_holiday_with_exclusion(): end = Timestamp("2025-05-31") exclude = DatetimeIndex([Timestamp("2022-05-30")]) # Queen's platinum Jubilee - queens_jubilee_uk_spring_bank_holiday: Holiday = Holiday( + queens_jubilee_uk_spring_bank_holiday = Holiday( "Queen's Jubilee UK Spring Bank Holiday", month=5, day=31, @@ -394,7 +394,7 @@ def test_holiday_with_multiple_exclusions(): ] ) # Yakudoshi new year - yakudoshi_new_year: Holiday = Holiday( + yakudoshi_new_year = Holiday( "Yakudoshi New Year", month=1, day=1, exclude_dates=exclude ) diff --git a/requirements-dev.txt b/requirements-dev.txt index ce0ff91b2c8b3..e4e485ad31333 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -54,7 +54,7 @@ seaborn moto asv>=0.6.1 flake8==7.1.0 -mypy==1.13.0 +mypy==1.17.1 tokenize-rt pre-commit>=4.2.0 gitpython diff --git a/scripts/run_stubtest.py b/scripts/run_stubtest.py index e87a7d53f4ff3..83ec2967b1bfd 100644 --- a/scripts/run_stubtest.py +++ b/scripts/run_stubtest.py @@ -48,8 +48,6 @@ # stubtest might be too sensitive "pandas._libs.lib.NoDefault", "pandas._libs.lib._NoDefault.no_default", - # stubtest/Cython is not recognizing the default value for the dtype parameter - "pandas._libs.lib.map_infer_mask", # internal type alias (should probably be private) "pandas._libs.lib.ndarray_obj_2d", # runtime argument "owner" has a default value but stub argument does not @@ -74,6 +72,44 @@ "pandas._libs.tslibs.offsets.BusinessHour.rollforward ", # type alias "pandas._libs.tslibs.timedeltas.UnitChoices", + # enum types in cython vs PYI + "pandas._libs.tslibs.dtypes.FreqGroup.FR_ANN", + "pandas._libs.tslibs.dtypes.FreqGroup.FR_BUS", + "pandas._libs.tslibs.dtypes.FreqGroup.FR_DAY", + "pandas._libs.tslibs.dtypes.FreqGroup.FR_HR", + "pandas._libs.tslibs.dtypes.FreqGroup.FR_MIN", + "pandas._libs.tslibs.dtypes.FreqGroup.FR_MS", + "pandas._libs.tslibs.dtypes.FreqGroup.FR_MTH", + "pandas._libs.tslibs.dtypes.FreqGroup.FR_NS", + "pandas._libs.tslibs.dtypes.FreqGroup.FR_QTR", + "pandas._libs.tslibs.dtypes.FreqGroup.FR_SEC", + "pandas._libs.tslibs.dtypes.FreqGroup.FR_UND", + "pandas._libs.tslibs.dtypes.FreqGroup.FR_US", + "pandas._libs.tslibs.dtypes.FreqGroup.FR_WK", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_D", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_GENERIC", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_M", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_W", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_Y", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_as", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_fs", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_h", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_m", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_ms", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_ns", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_ps", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_s", + "pandas._libs.tslibs.dtypes.NpyDatetimeUnit.NPY_FR_us", + "pandas._libs.tslibs.dtypes.Resolution.RESO_DAY", + "pandas._libs.tslibs.dtypes.Resolution.RESO_HR", + "pandas._libs.tslibs.dtypes.Resolution.RESO_MIN", + "pandas._libs.tslibs.dtypes.Resolution.RESO_MS", + "pandas._libs.tslibs.dtypes.Resolution.RESO_MTH", + "pandas._libs.tslibs.dtypes.Resolution.RESO_NS", + "pandas._libs.tslibs.dtypes.Resolution.RESO_QTR", + "pandas._libs.tslibs.dtypes.Resolution.RESO_SEC", + "pandas._libs.tslibs.dtypes.Resolution.RESO_US", + "pandas._libs.tslibs.dtypes.Resolution.RESO_YR", ] if __name__ == "__main__": From 69c0f764f5e82fa604a3c004d653cced56d6f356 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 30 Aug 2025 19:28:01 -0400 Subject: [PATCH 3/5] fix dialect issue for pyright --- pandas/io/parsers/readers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index fa11bfefb9a6d..83a239caffc15 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1834,10 +1834,8 @@ def _extract_dialect(kwds: dict[str, Any]) -> csv.Dialect | None: dialect = csv.get_dialect(dialect) _validate_dialect(dialect) - # For pyright, _validate_dialect makes sure it is a dialect - assert isinstance(dialect, csv.Dialect) - return dialect + return dialect # pyright: ignore[reportReturnType] MANDATORY_DIALECT_ATTRS = ( From 3a4473d2def913d268069aa6a1c4e9784fc51787 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 31 Aug 2025 12:16:54 -0400 Subject: [PATCH 4/5] fixes based on PR review --- pandas/core/algorithms.py | 2 +- pandas/core/arrays/masked.py | 2 +- pandas/io/parsers/readers.py | 14 +++++++++----- pandas/io/pytables.py | 3 +-- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 170f2dbf8b1fc..8126bd072a8dc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1314,7 +1314,7 @@ def searchsorted( _diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"} -def diff(arr, n: int | float | np.integer, axis: AxisInt = 0): +def diff(arr, n: int | float | np.integer | np.floating, axis: AxisInt = 0): """ difference of n between self, analogous to s-s.shift(n) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 6bc545d08828e..ce7f288fc0238 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -713,7 +713,7 @@ def _hasna(self) -> bool: # make this faster by having an optional mask, but not have to change # source code using it.. - return cast(bool, self._mask.any()) + return bool(self._mask.any()) def _propagate_mask( self, mask: npt.NDArray[np.bool_] | None, other diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 83a239caffc15..532eead0faca2 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -22,6 +22,7 @@ Self, TypedDict, Unpack, + cast, overload, ) import warnings @@ -1818,7 +1819,7 @@ def _refine_defaults_read( return kwds -def _extract_dialect(kwds: dict[str, Any]) -> csv.Dialect | None: +def _extract_dialect(kwds: dict[str, str]) -> csv.Dialect | None: """ Extract concrete csv dialect instance. @@ -1831,11 +1832,14 @@ def _extract_dialect(kwds: dict[str, Any]) -> csv.Dialect | None: dialect = kwds["dialect"] if dialect in csv.list_dialects(): - dialect = csv.get_dialect(dialect) + # get_dialect is typed to return a `_csv.Dialect` for some reason in typeshed + tdialect = cast(csv.Dialect, csv.get_dialect(dialect)) - _validate_dialect(dialect) + _validate_dialect(tdialect) + else: + tdialect = None - return dialect # pyright: ignore[reportReturnType] + return tdialect MANDATORY_DIALECT_ATTRS = ( @@ -1848,7 +1852,7 @@ def _extract_dialect(kwds: dict[str, Any]) -> csv.Dialect | None: ) -def _validate_dialect(dialect: csv.Dialect | Any) -> None: +def _validate_dialect(dialect: csv.Dialect | str) -> None: """ Validate csv dialect instance. diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 0ec235de05855..b04f8e4137357 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -128,7 +128,6 @@ AxisInt, DtypeArg, FilePath, - Shape, npt, ) @@ -3384,7 +3383,7 @@ class BlockManagerFixed(GenericFixed): nblocks: int @property - def shape(self) -> Shape | list[int] | None: + def shape(self) -> list[int] | None: try: ndim = self.ndim From 99d6faed6832f9e6a4aec0e89dd08721c3c36678 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 31 Aug 2025 12:42:18 -0400 Subject: [PATCH 5/5] fix dialect issue again --- pandas/io/parsers/readers.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 532eead0faca2..458c8dd201d0a 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1819,7 +1819,7 @@ def _refine_defaults_read( return kwds -def _extract_dialect(kwds: dict[str, str]) -> csv.Dialect | None: +def _extract_dialect(kwds: dict[str, str | csv.Dialect]) -> csv.Dialect | None: """ Extract concrete csv dialect instance. @@ -1831,13 +1831,14 @@ def _extract_dialect(kwds: dict[str, str]) -> csv.Dialect | None: return None dialect = kwds["dialect"] - if dialect in csv.list_dialects(): + if isinstance(dialect, str) and dialect in csv.list_dialects(): # get_dialect is typed to return a `_csv.Dialect` for some reason in typeshed tdialect = cast(csv.Dialect, csv.get_dialect(dialect)) - _validate_dialect(tdialect) + else: - tdialect = None + _validate_dialect(dialect) + tdialect = cast(csv.Dialect, dialect) return tdialect