From b114d47b536c975520efc053d1a3e19409734ef5 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 16 Mar 2025 20:34:28 +0000 Subject: [PATCH 1/4] chore(typing): Add constrained `TypeVar`(s) for `pandas` --- narwhals/_pandas_like/typing.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/narwhals/_pandas_like/typing.py b/narwhals/_pandas_like/typing.py index 9def250676..6e7d28cac4 100644 --- a/narwhals/_pandas_like/typing.py +++ b/narwhals/_pandas_like/typing.py @@ -5,13 +5,40 @@ if TYPE_CHECKING: import sys + from typing import Any + + if sys.version_info >= (3, 13): + from typing import TypeVar + else: + from typing_extensions import TypeVar if sys.version_info >= (3, 10): from typing import TypeAlias else: from typing_extensions import TypeAlias + import cudf + import modin.pandas as mpd + import pandas as pd + from narwhals._pandas_like.expr import PandasLikeExpr from narwhals._pandas_like.series import PandasLikeSeries IntoPandasLikeExpr: TypeAlias = Union[PandasLikeExpr, PandasLikeSeries] + + DataFrameT = TypeVar( + "DataFrameT", pd.DataFrame, mpd.DataFrame, cudf.DataFrame, default=pd.DataFrame + ) + SeriesT = TypeVar( + "SeriesT", pd.Series[Any], mpd.Series, cudf.Series[Any], default=pd.Series[Any] + ) + NDFrameT = TypeVar( + "NDFrameT", + pd.DataFrame, + mpd.DataFrame, + cudf.DataFrame, + pd.Series[Any], + mpd.Series, + cudf.Series[Any], + default=pd.DataFrame, + ) From 3dc7c032329d0e65060152866a91213ff0f740d0 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 16 Mar 2025 20:35:33 +0000 Subject: [PATCH 2/4] fix(typing): Resolve `mypy` errors Still have 7 for `pyright` --- narwhals/_pandas_like/utils.py | 46 ++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 0fb7b352b3..162f77c9df 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -27,6 +27,8 @@ from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.expr import PandasLikeExpr from narwhals._pandas_like.series import PandasLikeSeries + from narwhals._pandas_like.typing import DataFrameT + from narwhals._pandas_like.typing import NDFrameT from narwhals.dtypes import DType from narwhals.typing import DTypeBackend from narwhals.typing import TimeUnit @@ -257,19 +259,19 @@ def native_series_from_iterable( def set_index( - obj: T, + obj: NDFrameT, index: Any, *, implementation: Implementation, backend_version: tuple[int, ...], -) -> T: +) -> NDFrameT: """Wrapper around pandas' set_axis to set object index. We can set `copy` / `inplace` based on implementation/version. """ if implementation is Implementation.CUDF: # pragma: no cover - obj = obj.copy(deep=False) # type: ignore[attr-defined] - obj.index = index # type: ignore[attr-defined] + obj = obj.copy(deep=False) + obj.index = index return obj if implementation is Implementation.PANDAS and ( backend_version < (1,) @@ -283,23 +285,23 @@ def set_index( kwargs["copy"] = False else: # pragma: no cover pass - return obj.set_axis(index, axis=0, **kwargs) # type: ignore[attr-defined] + return obj.set_axis(index, axis=0, **kwargs) def set_columns( - obj: T, + obj: NDFrameT, columns: list[str], *, implementation: Implementation, backend_version: tuple[int, ...], -) -> T: +) -> NDFrameT: """Wrapper around pandas' set_axis to set object columns. We can set `copy` / `inplace` based on implementation/version. """ if implementation is Implementation.CUDF: # pragma: no cover - obj = obj.copy(deep=False) # type: ignore[attr-defined] - obj.columns = columns # type: ignore[attr-defined] + obj = obj.copy(deep=False) + obj.columns = cast("pd.Index[str]", columns) return obj if implementation is Implementation.PANDAS and ( backend_version < (1,) @@ -313,22 +315,22 @@ def set_columns( kwargs["copy"] = False else: # pragma: no cover pass - return obj.set_axis(columns, axis=1, **kwargs) # type: ignore[attr-defined] + return obj.set_axis(columns, axis=1, **kwargs) def rename( - obj: T, + obj: NDFrameT, *args: Any, implementation: Implementation, backend_version: tuple[int, ...], **kwargs: Any, -) -> T: +) -> NDFrameT: """Wrapper around pandas' rename so that we can set `copy` based on implementation/version.""" if implementation is Implementation.PANDAS and ( backend_version >= (3,) ): # pragma: no cover - return obj.rename(*args, **kwargs) # type: ignore[attr-defined] - return obj.rename(*args, **kwargs, copy=False) # type: ignore[attr-defined] + return obj.rename(*args, **kwargs) + return obj.rename(*args, **kwargs, copy=False) @functools.lru_cache(maxsize=16) @@ -740,34 +742,34 @@ def calculate_timestamp_date(s: pd.Series[int], time_unit: str) -> pd.Series[int def select_columns_by_name( - df: T, + df: DataFrameT, column_names: list[str] | _1DArray, # NOTE: Cannot be a tuple! backend_version: tuple[int, ...], implementation: Implementation, -) -> T: +) -> DataFrameT: """Select columns by name. Prefer this over `df.loc[:, column_names]` as it's generally more performant. """ - if len(column_names) == df.shape[1] and all(column_names == df.columns): # type: ignore[attr-defined] + if len(column_names) == df.shape[1] and (df.columns == column_names).all(): return df - if (df.columns.dtype.kind == "b") or ( # type: ignore[attr-defined] + if (df.columns.dtype.kind == "b") or ( implementation is Implementation.PANDAS and backend_version < (1, 5) ): # See https://github.com/narwhals-dev/narwhals/issues/1349#issuecomment-2470118122 # for why we need this - available_columns = df.columns.tolist() # type: ignore[attr-defined] + available_columns = df.columns.tolist() missing_columns = [x for x in column_names if x not in available_columns] if missing_columns: # pragma: no cover raise ColumnNotFoundError.from_missing_and_available_column_names( missing_columns, available_columns ) - return df.loc[:, column_names] # type: ignore[attr-defined] + return df.loc[:, column_names] try: - return df[column_names] # type: ignore[index] + return df[column_names] except KeyError as e: - available_columns = df.columns.tolist() # type: ignore[attr-defined] + available_columns = df.columns.tolist() missing_columns = [x for x in column_names if x not in available_columns] raise ColumnNotFoundError.from_missing_and_available_column_names( missing_columns, available_columns From dc62f1d310b5b3cb45b63fbe11e92cf84311e95a Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 16 Mar 2025 22:13:03 +0000 Subject: [PATCH 3/4] fix(typing): Resolve `pyright` errors` --- narwhals/_pandas_like/utils.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 90344f03d7..b9ef8ec503 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -275,9 +275,9 @@ def set_index( We can set `copy` / `inplace` based on implementation/version. """ if implementation is Implementation.CUDF: # pragma: no cover - obj = obj.copy(deep=False) - obj.index = index - return obj + cudf_frame = obj.copy(deep=False) + cudf_frame.index = index + return cast("NDFrameT", cudf_frame) if implementation is Implementation.PANDAS and ( backend_version < (1,) ): # pragma: no cover @@ -290,7 +290,8 @@ def set_index( kwargs["copy"] = False else: # pragma: no cover pass - return obj.set_axis(index, axis=0, **kwargs) + nd_frame = obj.set_axis(index, axis=0, **kwargs) + return cast("NDFrameT", nd_frame) def set_columns( @@ -305,9 +306,9 @@ def set_columns( We can set `copy` / `inplace` based on implementation/version. """ if implementation is Implementation.CUDF: # pragma: no cover - obj = obj.copy(deep=False) - obj.columns = cast("pd.Index[str]", columns) - return obj + cudf_frame = obj.copy(deep=False) + cudf_frame.columns = cast("pd.Index[str]", columns) + return cast("NDFrameT", cudf_frame) if implementation is Implementation.PANDAS and ( backend_version < (1,) ): # pragma: no cover @@ -320,7 +321,8 @@ def set_columns( kwargs["copy"] = False else: # pragma: no cover pass - return obj.set_axis(columns, axis=1, **kwargs) + nd_frame = obj.set_axis(columns, axis=1, **kwargs) + return cast("NDFrameT", nd_frame) def rename( @@ -331,11 +333,12 @@ def rename( **kwargs: Any, ) -> NDFrameT: """Wrapper around pandas' rename so that we can set `copy` based on implementation/version.""" - if implementation is Implementation.PANDAS and ( - backend_version >= (3,) - ): # pragma: no cover - return obj.rename(*args, **kwargs) - return obj.rename(*args, **kwargs, copy=False) + nd_frame = ( + obj.rename(*args, **kwargs, inplace=False) + if implementation.is_pandas() and (backend_version >= (3,)) + else obj.rename(*args, **kwargs, copy=False, inplace=False) + ) + return cast("NDFrameT", nd_frame) @functools.lru_cache(maxsize=16) @@ -760,7 +763,7 @@ def select_columns_by_name( if len(column_names) == df.shape[1] and (df.columns == column_names).all(): return df if (df.columns.dtype.kind == "b") or ( - implementation is Implementation.PANDAS and backend_version < (1, 5) + implementation.is_pandas() and backend_version < (1, 5) ): # See https://github.com/narwhals-dev/narwhals/issues/1349#issuecomment-2470118122 # for why we need this @@ -770,7 +773,7 @@ def select_columns_by_name( raise ColumnNotFoundError.from_missing_and_available_column_names( missing_columns, available_columns ) - return df.loc[:, column_names] + return cast("DataFrameT", df.loc[:, column_names]) try: return df[column_names] except KeyError as e: From 3648814fe9ea3d9f580c5d2d875da79bd32a8ab5 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 16 Mar 2025 22:14:54 +0000 Subject: [PATCH 4/4] ignore `mypy` getting this wrong The stubs don't return `Self` for all types, so `pyright` is right --- narwhals/_pandas_like/utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index b9ef8ec503..62623937cc 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -277,7 +277,7 @@ def set_index( if implementation is Implementation.CUDF: # pragma: no cover cudf_frame = obj.copy(deep=False) cudf_frame.index = index - return cast("NDFrameT", cudf_frame) + return cast("NDFrameT", cudf_frame) # type: ignore[redundant-cast] if implementation is Implementation.PANDAS and ( backend_version < (1,) ): # pragma: no cover @@ -291,7 +291,7 @@ def set_index( else: # pragma: no cover pass nd_frame = obj.set_axis(index, axis=0, **kwargs) - return cast("NDFrameT", nd_frame) + return cast("NDFrameT", nd_frame) # type: ignore[redundant-cast] def set_columns( @@ -308,7 +308,7 @@ def set_columns( if implementation is Implementation.CUDF: # pragma: no cover cudf_frame = obj.copy(deep=False) cudf_frame.columns = cast("pd.Index[str]", columns) - return cast("NDFrameT", cudf_frame) + return cast("NDFrameT", cudf_frame) # type: ignore[redundant-cast] if implementation is Implementation.PANDAS and ( backend_version < (1,) ): # pragma: no cover @@ -322,7 +322,7 @@ def set_columns( else: # pragma: no cover pass nd_frame = obj.set_axis(columns, axis=1, **kwargs) - return cast("NDFrameT", nd_frame) + return cast("NDFrameT", nd_frame) # type: ignore[redundant-cast] def rename( @@ -338,7 +338,7 @@ def rename( if implementation.is_pandas() and (backend_version >= (3,)) else obj.rename(*args, **kwargs, copy=False, inplace=False) ) - return cast("NDFrameT", nd_frame) + return cast("NDFrameT", nd_frame) # type: ignore[redundant-cast] @functools.lru_cache(maxsize=16) @@ -773,7 +773,7 @@ def select_columns_by_name( raise ColumnNotFoundError.from_missing_and_available_column_names( missing_columns, available_columns ) - return cast("DataFrameT", df.loc[:, column_names]) + return cast("DataFrameT", df.loc[:, column_names]) # type: ignore[redundant-cast] try: return df[column_names] except KeyError as e: