From aa6578ef89ec6b399837f674e6b59ef0d8e1bd4a Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 13:31:26 +0100 Subject: [PATCH 01/19] fix(typing): Narrow `IntoDataFrame` Will close #2344 --- narwhals/dependencies.py | 5 ++--- narwhals/translate.py | 5 +++++ narwhals/typing.py | 2 +- tests/translate/from_native_test.py | 21 +++++++++++++++++++++ 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py index e32e544abc..1f49ef9642 100644 --- a/narwhals/dependencies.py +++ b/narwhals/dependencies.py @@ -26,7 +26,6 @@ from narwhals.dataframe import DataFrame from narwhals.dataframe import LazyFrame from narwhals.series import Series - from narwhals.typing import DataFrameT from narwhals.typing import FrameT from narwhals.typing import IntoDataFrameT from narwhals.typing import IntoSeriesT @@ -364,8 +363,8 @@ def is_into_dataframe(native_dataframe: Any | IntoDataFrameT) -> TypeIs[IntoData def is_narwhals_dataframe( - df: Any | DataFrame[DataFrameT], -) -> TypeIs[DataFrame[DataFrameT]]: + df: DataFrame[IntoDataFrameT] | Any, +) -> TypeIs[DataFrame[IntoDataFrameT]]: """Check whether `df` is a Narwhals DataFrame. This is useful if you expect a user to pass in a Narwhals diff --git a/narwhals/translate.py b/narwhals/translate.py index 4c7ca1193e..34509e3d7c 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -44,6 +44,7 @@ from narwhals.dataframe import DataFrame from narwhals.dataframe import LazyFrame from narwhals.series import Series + from narwhals.typing import DataFrameT from narwhals.typing import IntoDataFrameT from narwhals.typing import IntoFrame from narwhals.typing import IntoFrameT @@ -133,6 +134,10 @@ def to_native( def from_native(native_object: SeriesT, **kwds: Any) -> SeriesT: ... +@overload +def from_native(native_object: DataFrameT, **kwds: Any) -> DataFrameT: ... + + @overload def from_native( native_object: IntoDataFrameT | IntoSeries, diff --git a/narwhals/typing.py b/narwhals/typing.py index 6f481768a5..e0803e0c56 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -57,7 +57,7 @@ def __native_namespace__(self) -> ModuleType: ... which will be interpreted as a `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrame[Any]", "DataFrameLike"] +IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrameLike"] """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py index 9a67c10bc1..f3953a33b4 100644 --- a/tests/translate/from_native_test.py +++ b/tests/translate/from_native_test.py @@ -369,6 +369,27 @@ def test_from_native_lazyframe() -> None: assert isinstance(unstable_lazy, unstable_nw.LazyFrame) +def test_dataframe_recursive() -> None: + pytest.importorskip("polars") + import polars as pl + + pl_frame = pl.DataFrame({"a": [1, 2, 3]}) + nw_frame = unstable_nw.from_native(pl_frame) + with pytest.raises(AssertionError): + unstable_nw.DataFrame(nw_frame, level="full") + + nw_frame_early_return = unstable_nw.from_native(nw_frame) + + if TYPE_CHECKING: + assert_type(pl_frame, pl.DataFrame) + assert_type(nw_frame, unstable_nw.DataFrame[pl.DataFrame]) + + nw_frame_depth_2 = unstable_nw.DataFrame(nw_frame, level="full") # type: ignore[var-annotated] + # NOTE: Checking that the type is `DataFrame[Unknown]` + assert_type(nw_frame_depth_2, unstable_nw.DataFrame) # type: ignore[type-arg] + assert_type(nw_frame_early_return, unstable_nw.DataFrame[pl.DataFrame]) + + def test_series_recursive() -> None: """https://github.com/narwhals-dev/narwhals/issues/2239.""" pytest.importorskip("polars") From 51c5b63ad37ed7b45d234492802616dbc4e5a29d Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 13:34:26 +0100 Subject: [PATCH 02/19] fix(typing): Remove `DataFrame` from `IntoFrame` --- narwhals/typing.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/narwhals/typing.py b/narwhals/typing.py index e0803e0c56..06a57fddb0 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -72,9 +72,7 @@ def __native_namespace__(self) -> ModuleType: ... IntoLazyFrame: TypeAlias = "NativeLazyFrame | LazyFrame[Any]" -IntoFrame: TypeAlias = Union[ - "NativeFrame", "DataFrame[Any]", "LazyFrame[Any]", "DataFrameLike" -] +IntoFrame: TypeAlias = Union["NativeFrame", "LazyFrame[Any]", "DataFrameLike"] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. Use this if your function can accept an object which can be converted to either From 6cbbc74ff5f60de526915ecae45943f8ed9d9457 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 14:05:39 +0100 Subject: [PATCH 03/19] fix(typing): Narrow `IntoLazyFrame`, `IntoFrame` - Revealed two new `[overload-cannot-match]` from `mypy` - I agreed with that and removed the conflict sources Will close #2345 --- narwhals/translate.py | 22 ++-------------------- narwhals/typing.py | 4 ++-- tests/translate/from_native_test.py | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/narwhals/translate.py b/narwhals/translate.py index 34509e3d7c..89544cbf32 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -51,6 +51,7 @@ from narwhals.typing import IntoLazyFrameT from narwhals.typing import IntoSeries from narwhals.typing import IntoSeriesT + from narwhals.typing import LazyFrameT from narwhals.typing import SeriesT T = TypeVar("T") @@ -139,14 +140,7 @@ def from_native(native_object: DataFrameT, **kwds: Any) -> DataFrameT: ... @overload -def from_native( - native_object: IntoDataFrameT | IntoSeries, - *, - pass_through: Literal[True], - eager_only: Literal[False] = ..., - series_only: Literal[False] = ..., - allow_series: Literal[True], -) -> DataFrame[IntoDataFrameT]: ... +def from_native(native_object: LazyFrameT, **kwds: Any) -> LazyFrameT: ... @overload @@ -240,7 +234,6 @@ def from_native( # type: ignore[overload-overlap] ) -> LazyFrame[IntoLazyFrameT]: ... -# NOTE: `pl.LazyFrame` originally matched here @overload def from_native( native_object: IntoDataFrameT, @@ -285,17 +278,6 @@ def from_native( ) -> Series[IntoSeriesT]: ... -@overload -def from_native( - native_object: IntoFrameT | IntoLazyFrameT, - *, - pass_through: Literal[False] = ..., - eager_only: Literal[False] = ..., - series_only: Literal[False] = ..., - allow_series: None = ..., -) -> DataFrame[IntoFrameT] | LazyFrame[IntoLazyFrameT]: ... - - # All params passed in as variables @overload def from_native( diff --git a/narwhals/typing.py b/narwhals/typing.py index 06a57fddb0..ada85333b6 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -70,9 +70,9 @@ def __native_namespace__(self) -> ModuleType: ... ... return df.shape """ -IntoLazyFrame: TypeAlias = "NativeLazyFrame | LazyFrame[Any]" +IntoLazyFrame: TypeAlias = "NativeLazyFrame" -IntoFrame: TypeAlias = Union["NativeFrame", "LazyFrame[Any]", "DataFrameLike"] +IntoFrame: TypeAlias = "IntoDataFrame | IntoLazyFrame" """Anything which can be converted to a Narwhals DataFrame or LazyFrame. Use this if your function can accept an object which can be converted to either diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py index f3953a33b4..d1e1c8dd98 100644 --- a/tests/translate/from_native_test.py +++ b/tests/translate/from_native_test.py @@ -390,6 +390,27 @@ def test_dataframe_recursive() -> None: assert_type(nw_frame_early_return, unstable_nw.DataFrame[pl.DataFrame]) +def test_lazyframe_recursive() -> None: + pytest.importorskip("polars") + import polars as pl + + pl_frame = pl.DataFrame({"a": [1, 2, 3]}).lazy() + nw_frame = unstable_nw.from_native(pl_frame) + with pytest.raises(AssertionError): + unstable_nw.LazyFrame(nw_frame, level="lazy") + + nw_frame_early_return = unstable_nw.from_native(nw_frame) + + if TYPE_CHECKING: + assert_type(pl_frame, pl.LazyFrame) + assert_type(nw_frame, unstable_nw.LazyFrame[pl.LazyFrame]) + + nw_frame_depth_2 = unstable_nw.LazyFrame(nw_frame, level="lazy") # type: ignore[var-annotated] + # NOTE: Checking that the type is `LazyFrame[Unknown]` + assert_type(nw_frame_depth_2, unstable_nw.LazyFrame) # type: ignore[type-arg] + assert_type(nw_frame_early_return, unstable_nw.LazyFrame[pl.LazyFrame]) + + def test_series_recursive() -> None: """https://github.com/narwhals-dev/narwhals/issues/2239.""" pytest.importorskip("polars") From 255ab274c6cbf03e9985c64e9937c90ec93b56ea Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 14:49:49 +0100 Subject: [PATCH 04/19] fix(typing): Annotate `DataFrame._compliant_frame` Revealed quite a few other issues --- narwhals/dataframe.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index f7574a95f6..371b77a27e 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -33,10 +33,12 @@ from narwhals.utils import find_stacklevel from narwhals.utils import flatten from narwhals.utils import generate_repr +from narwhals.utils import is_compliant_dataframe from narwhals.utils import is_compliant_lazyframe from narwhals.utils import is_sequence_but_not_str from narwhals.utils import issue_deprecation_warning from narwhals.utils import parse_version +from narwhals.utils import supports_arrow_c_stream if TYPE_CHECKING: from io import BytesIO @@ -50,6 +52,7 @@ from typing_extensions import ParamSpec from typing_extensions import Self + from narwhals._compliant import CompliantDataFrame from narwhals._compliant import IntoCompliantExpr from narwhals._compliant.typing import EagerNamespaceAny from narwhals.group_by import GroupBy @@ -447,8 +450,10 @@ def __init__( level: Literal["full", "lazy", "interchange"], ) -> None: self._level: Literal["full", "lazy", "interchange"] = level - if hasattr(df, "__narwhals_dataframe__"): - self._compliant_frame: Any = df.__narwhals_dataframe__() + if is_compliant_dataframe(df): + self._compliant_frame: CompliantDataFrame[Any, Any, DataFrameT] = ( + df.__narwhals_dataframe__() + ) else: # pragma: no cover msg = f"Expected an object which implements `__narwhals_dataframe__`, got: {type(df)}" raise AssertionError(msg) @@ -498,7 +503,7 @@ def __arrow_c_stream__(self: Self, requested_schema: object | None = None) -> ob for more. """ native_frame = self._compliant_frame._native_frame - if hasattr(native_frame, "__arrow_c_stream__"): + if supports_arrow_c_stream(native_frame): return native_frame.__arrow_c_stream__(requested_schema=requested_schema) try: import pyarrow as pa # ignore-banned-import From ba2f6e1a3ed88c179e376feff70c473cf2129932 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 14:51:33 +0100 Subject: [PATCH 05/19] chore: Add missing `CompliantDataFrame.pivot` + fix related quirks --- narwhals/_arrow/dataframe.py | 2 ++ narwhals/_compliant/dataframe.py | 12 ++++++++++++ narwhals/_pandas_like/dataframe.py | 6 +++--- narwhals/_pandas_like/utils.py | 7 ++++--- narwhals/_polars/dataframe.py | 13 ++++++------- 5 files changed, 27 insertions(+), 13 deletions(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index 0f1ec8e31d..131c2e04a1 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -847,3 +847,5 @@ def unpivot( ) # TODO(Unassigned): Even with promote_options="permissive", pyarrow does not # upcast numeric to non-numeric (e.g. string) datatypes + + pivot = not_implemented() diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index 60d5397ed9..87658fdb87 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -161,6 +161,18 @@ def join_asof( suffix: str, ) -> Self: ... def lazy(self, *, backend: Implementation | None) -> CompliantLazyFrame[Any, Any]: ... + def pivot( + self, + on: str | Sequence[str], + *, + index: str | Sequence[str] | None, + values: str | Sequence[str] | None, + aggregate_function: Literal[ + "min", "max", "first", "last", "sum", "mean", "median", "len", None + ], + sort_columns: bool, + separator: str, + ) -> Self: ... def rename(self, mapping: Mapping[str, str]) -> Self: ... def row(self, index: int) -> tuple[Any, ...]: ... def rows( diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 82314ceb98..f1f8974de9 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -1028,10 +1028,10 @@ def gather_every(self: Self, n: int, offset: int) -> Self: def pivot( self: Self, - on: list[str], + on: Sequence[str], *, - index: list[str] | None, - values: list[str] | None, + index: Sequence[str] | None, + values: Sequence[str] | None, aggregate_function: Any | None, sort_columns: bool, separator: str, diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 6c7c0d1db5..ba0366ebd1 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -6,6 +6,7 @@ from contextlib import suppress from typing import TYPE_CHECKING from typing import Any +from typing import Sequence from typing import Sized from typing import TypeVar from typing import cast @@ -742,9 +743,9 @@ def select_columns_by_name( def pivot_table( df: PandasLikeDataFrame, - values: list[str], - index: list[str], - columns: list[str], + values: Sequence[str], + index: Sequence[str], + columns: Sequence[str], aggregate_function: str | None, ) -> Any: dtypes = import_dtypes_module(df._version) diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index f1a70fe0e6..ada365b5eb 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -411,15 +411,14 @@ def unpivot( ) def pivot( - self: Self, - on: list[str], + self, + on: str | Sequence[str], *, - index: list[str] | None, - values: list[str] | None, + index: str | Sequence[str] | None, + values: str | Sequence[str] | None, aggregate_function: Literal[ - "min", "max", "first", "last", "sum", "mean", "median", "len" - ] - | None, + "min", "max", "first", "last", "sum", "mean", "median", "len", None + ], sort_columns: bool, separator: str, ) -> Self: From 18157520c9752470f617068150662de5199ac130 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 14:52:57 +0100 Subject: [PATCH 06/19] fix(typing): Ensure `__iter__` is available on group_by --- narwhals/_compliant/dataframe.py | 3 ++- narwhals/_compliant/group_by.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index 87658fdb87..2bd0faefaf 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -37,6 +37,7 @@ from typing_extensions import TypeAlias from narwhals._compliant.group_by import CompliantGroupBy + from narwhals._compliant.group_by import DataFrameGroupBy from narwhals._translate import IntoArrowTable from narwhals.dtypes import DType from narwhals.schema import Schema @@ -132,7 +133,7 @@ def gather_every(self, n: int, offset: int) -> Self: ... def get_column(self, name: str) -> CompliantSeriesT: ... def group_by( self, *keys: str, drop_null_keys: bool - ) -> CompliantGroupBy[Self, Any]: ... + ) -> DataFrameGroupBy[Self, Any]: ... def head(self, n: int) -> Self: ... def item(self, row: int | None, column: int | str | None) -> Any: ... def iter_columns(self) -> Iterator[CompliantSeriesT]: ... diff --git a/narwhals/_compliant/group_by.py b/narwhals/_compliant/group_by.py index 477f0d0904..37d37fb259 100644 --- a/narwhals/_compliant/group_by.py +++ b/narwhals/_compliant/group_by.py @@ -76,6 +76,13 @@ def __init__( def agg(self, *exprs: CompliantExprT_contra) -> CompliantFrameT_co: ... +class DataFrameGroupBy( + CompliantGroupBy[CompliantDataFrameT_co, CompliantExprT_contra], + Protocol38[CompliantDataFrameT_co, CompliantExprT_contra], +): + def __iter__(self) -> Iterator[tuple[Any, CompliantDataFrameT_co]]: ... + + class DepthTrackingGroupBy( CompliantGroupBy[CompliantFrameT_co, DepthTrackingExprT_contra], Protocol38[CompliantFrameT_co, DepthTrackingExprT_contra, NativeAggregationT_co], @@ -132,9 +139,9 @@ def _leaf_name(cls, expr: DepthTrackingExprAny, /) -> NarwhalsAggregation | Any: class EagerGroupBy( DepthTrackingGroupBy[CompliantDataFrameT_co, EagerExprT_contra, str], + DataFrameGroupBy[CompliantDataFrameT_co, EagerExprT_contra], Protocol38[CompliantDataFrameT_co, EagerExprT_contra], -): - def __iter__(self) -> Iterator[tuple[Any, CompliantDataFrameT_co]]: ... +): ... class LazyGroupBy( From 07deea2d2b8b1750c3367c2f84fec4d9b08a03a7 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 15:04:36 +0100 Subject: [PATCH 07/19] chore(typing): Fix most of `DataFrame` --- narwhals/dataframe.py | 57 +++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 35 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 371b77a27e..396742fa19 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -450,10 +450,9 @@ def __init__( level: Literal["full", "lazy", "interchange"], ) -> None: self._level: Literal["full", "lazy", "interchange"] = level + self._compliant_frame: CompliantDataFrame[Any, Any, DataFrameT] if is_compliant_dataframe(df): - self._compliant_frame: CompliantDataFrame[Any, Any, DataFrameT] = ( - df.__narwhals_dataframe__() - ) + self._compliant_frame = df.__narwhals_dataframe__() else: # pragma: no cover msg = f"Expected an object which implements `__narwhals_dataframe__`, got: {type(df)}" raise AssertionError(msg) @@ -482,13 +481,13 @@ def implementation(self: Self) -> Implementation: >>> df.implementation.is_polars() False """ - return self._compliant_frame._implementation # type: ignore[no-any-return] + return self._compliant_frame._implementation def __len__(self: Self) -> int: - return self._compliant_frame.__len__() # type: ignore[no-any-return] + return self._compliant_frame.__len__() def __array__(self: Self, dtype: Any = None, copy: bool | None = None) -> _2DArray: # noqa: FBT001 - return self._compliant_frame.__array__(dtype, copy=copy) # type: ignore[no-any-return] + return self._compliant_frame.__array__(dtype, copy=copy) def __repr__(self: Self) -> str: # pragma: no cover return generate_repr("Narwhals DataFrame", self.to_native().__repr__()) @@ -592,8 +591,7 @@ def lazy( ) raise ValueError(msg) return self._lazyframe( - self._compliant_frame.lazy(backend=lazy_backend), - level="lazy", + self._compliant_frame.lazy(backend=lazy_backend), level="lazy" ) def to_native(self: Self) -> DataFrameT: @@ -617,7 +615,7 @@ def to_native(self: Self) -> DataFrameT: 1 2 7.0 b 2 3 8.0 c """ - return self._compliant_frame._native_frame # type: ignore[no-any-return] + return self._compliant_frame._native_frame def to_pandas(self: Self) -> pd.DataFrame: """Convert this DataFrame to a pandas DataFrame. @@ -638,7 +636,7 @@ def to_pandas(self: Self) -> pd.DataFrame: 1 2 7.0 b 2 3 8.0 c """ - return self._compliant_frame.to_pandas() # type: ignore[no-any-return] + return self._compliant_frame.to_pandas() def to_polars(self: Self) -> pl.DataFrame: """Convert this DataFrame to a polars DataFrame. @@ -662,7 +660,7 @@ def to_polars(self: Self) -> pl.DataFrame: │ 2 ┆ 7.0 │ └─────┴─────┘ """ - return self._compliant_frame.to_polars() # type: ignore[no-any-return] + return self._compliant_frame.to_polars() @overload def write_csv(self: Self, file: None = None) -> str: ... @@ -693,7 +691,7 @@ def write_csv(self: Self, file: str | Path | BytesIO | None = None) -> str | Non If we had passed a file name to `write_csv`, it would have been written to that file. """ - return self._compliant_frame.write_csv(file) # type: ignore[no-any-return] + return self._compliant_frame.write_csv(file) def write_parquet(self: Self, file: str | Path | BytesIO) -> None: """Write dataframe to parquet file. @@ -729,7 +727,7 @@ def to_numpy(self: Self) -> _2DArray: array([[1. , 6.5], [2. , 7. ]]) """ - return self._compliant_frame.to_numpy() # type: ignore[no-any-return] + return self._compliant_frame.to_numpy(None, copy=None) @property def shape(self: Self) -> tuple[int, int]: @@ -746,7 +744,7 @@ def shape(self: Self) -> tuple[int, int]: >>> df.shape (2, 1) """ - return self._compliant_frame.shape # type: ignore[no-any-return] + return self._compliant_frame.shape def get_column(self: Self, name: str) -> Series[Any]: """Get a single column by name. @@ -774,10 +772,7 @@ def get_column(self: Self, name: str) -> Series[Any]: 1 2 Name: a, dtype: int64 """ - return self._series( - self._compliant_frame.get_column(name), - level=self._level, - ) + return self._series(self._compliant_frame.get_column(name), level=self._level) def estimated_size(self: Self, unit: SizeUnit = "b") -> int | float: """Return an estimation of the total (heap) allocated size of the `DataFrame`. @@ -799,7 +794,7 @@ def estimated_size(self: Self, unit: SizeUnit = "b") -> int | float: >>> df.estimated_size() 32 """ - return self._compliant_frame.estimated_size(unit=unit) # type: ignore[no-any-return] + return self._compliant_frame.estimated_size(unit=unit) @overload def __getitem__( # type: ignore[overload-overlap] @@ -955,15 +950,12 @@ def to_dict( """ if as_series: return { - key: self._series( - value, - level=self._level, - ) + key: self._series(value, level=self._level) for key, value in self._compliant_frame.to_dict( as_series=as_series ).items() } - return self._compliant_frame.to_dict(as_series=as_series) # type: ignore[no-any-return] + return self._compliant_frame.to_dict(as_series=as_series) def row(self: Self, index: int) -> tuple[Any, ...]: """Get values at given row. @@ -989,7 +981,7 @@ def row(self: Self, index: int) -> tuple[Any, ...]: >>> nw.from_native(df_native).row(1) (, ) """ - return self._compliant_frame.row(index) # type: ignore[no-any-return] + return self._compliant_frame.row(index) # inherited def pipe( @@ -1155,7 +1147,7 @@ def rows( >>> nw.from_native(df_native).rows() [(1, 6.0), (2, 7.0)] """ - return self._compliant_frame.rows(named=named) # type: ignore[no-any-return] + return self._compliant_frame.rows(named=named) # type: ignore[return-value] def iter_columns(self: Self) -> Iterator[Series[Any]]: """Returns an iterator over the columns of this DataFrame. @@ -1232,7 +1224,7 @@ def iter_rows( >>> next(iter_rows) (2, 7.0) """ - return self._compliant_frame.iter_rows(named=named, buffer_size=buffer_size) # type: ignore[no-any-return] + return self._compliant_frame.iter_rows(named=named, buffer_size=buffer_size) # type: ignore[return-value] def with_columns( self: Self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr @@ -1439,9 +1431,7 @@ def unique( if isinstance(subset, str): subset = [subset] return self._with_compliant( - self._compliant_frame.unique( - subset=subset, keep=keep, maintain_order=maintain_order - ) + self._compliant_frame.unique(subset, keep=keep, maintain_order=maintain_order) ) def filter( @@ -1797,10 +1787,7 @@ def is_unique(self: Self) -> Series[Any]: | dtype: bool | └───────────────┘ """ - return self._series( - self._compliant_frame.is_unique(), - level=self._level, - ) + return self._series(self._compliant_frame.is_unique(), level=self._level) def null_count(self: Self) -> Self: r"""Create a new DataFrame that shows the null counts per column. @@ -1993,7 +1980,7 @@ def to_arrow(self: Self) -> pa.Table: foo: [[1,null]] bar: [[2,3]] """ - return self._compliant_frame.to_arrow() # type: ignore[no-any-return] + return self._compliant_frame.to_arrow() def sample( self: Self, From 38818227050e024c483312a3c0353e7f4ff2f3fa Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 15:09:29 +0100 Subject: [PATCH 08/19] chore(typing): Ignore interchange `[type-var]` --- narwhals/dataframe.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 396742fa19..51a3340bec 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -444,13 +444,11 @@ def _lazyframe(self: Self) -> type[LazyFrame[Any]]: return LazyFrame def __init__( - self: Self, - df: Any, - *, - level: Literal["full", "lazy", "interchange"], + self: Self, df: Any, *, level: Literal["full", "lazy", "interchange"] ) -> None: self._level: Literal["full", "lazy", "interchange"] = level - self._compliant_frame: CompliantDataFrame[Any, Any, DataFrameT] + # NOTE: Interchange support (`DataFrameLike`) is the source of the error + self._compliant_frame: CompliantDataFrame[Any, Any, DataFrameT] # type: ignore[type-var] if is_compliant_dataframe(df): self._compliant_frame = df.__narwhals_dataframe__() else: # pragma: no cover From 375fabc1c43e9a1687439df102bcb2d6735f8845 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 15:17:32 +0100 Subject: [PATCH 09/19] test(typing): Barely fix dodgy spark typing - I think this whole test needs rewriting - We shouldn't be depending on the internals like this --- tests/expr_and_series/cast_test.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/expr_and_series/cast_test.py b/tests/expr_and_series/cast_test.py index 560f3529b1..1868f5f49e 100644 --- a/tests/expr_and_series/cast_test.py +++ b/tests/expr_and_series/cast_test.py @@ -4,6 +4,8 @@ from datetime import time from datetime import timedelta from datetime import timezone +from typing import TYPE_CHECKING +from typing import cast import pandas as pd import pytest @@ -16,6 +18,9 @@ from tests.utils import assert_equal_data from tests.utils import is_windows +if TYPE_CHECKING: + from narwhals.typing import NativeLazyFrame + DATA = { "a": [1], "b": [1], @@ -286,24 +291,26 @@ def test_cast_struct(request: pytest.FixtureRequest, constructor: Constructor) - if "spark" in str(constructor): # pragma: no cover # Special handling for pyspark as it natively maps the input to # a column of type MAP - _tmp_nw_compliant_frame = nw.from_native(native_df)._compliant_frame + native_ldf = cast("NativeLazyFrame", native_df) + _tmp_nw_compliant_frame = nw.from_native(native_ldf)._compliant_frame F = _tmp_nw_compliant_frame._F # noqa: N806 T = _tmp_nw_compliant_frame._native_dtypes # noqa: N806 - native_df = native_df.withColumn( # type: ignore[union-attr] + native_ldf = native_ldf.withColumn( # type: ignore[attr-defined] "a", F.struct( F.col("a.movie ").cast(T.StringType()).alias("movie "), F.col("a.rating").cast(T.DoubleType()).alias("rating"), ), ) - assert nw.from_native(native_df).schema == nw.Schema( + assert nw.from_native(native_ldf).schema == nw.Schema( { "a": nw.Struct( [nw.Field("movie ", nw.String()), nw.Field("rating", nw.Float64())] ) } ) + native_df = native_ldf dtype = nw.Struct([nw.Field("movie ", nw.String()), nw.Field("rating", nw.Float32())]) result = nw.from_native(native_df).select(nw.col("a").cast(dtype)).lazy().collect() From 21e80efd5e90f428c828219c31142c7326d3fbc9 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 15:23:40 +0100 Subject: [PATCH 10/19] fix: Implement `to_numpy` to catch args https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.to_numpy.html --- narwhals/_polars/dataframe.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index ada365b5eb..303bfb2609 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -76,7 +76,6 @@ class PolarsDataFrame: select: Method[Self] sort: Method[Self] to_arrow: Method[pa.Table] - to_numpy: Method[_2DArray] to_pandas: Method[pd.DataFrame] unique: Method[Self] with_columns: Method[Self] @@ -231,6 +230,9 @@ def __array__( return self.native.__array__(dtype) return self.native.__array__(dtype) + def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _2DArray: + return self.native.to_numpy() + def collect_schema(self: Self) -> dict[str, DType]: if self._backend_version < (1,): return { From c12498504f04f4b1f24a47a74dfcc6ae83887406 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 16:58:23 +0100 Subject: [PATCH 11/19] fix(typing): Annotate `LazyFrame._compliant_frame` --- narwhals/dataframe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 51a3340bec..370b543257 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -53,6 +53,7 @@ from typing_extensions import Self from narwhals._compliant import CompliantDataFrame + from narwhals._compliant import CompliantLazyFrame from narwhals._compliant import IntoCompliantExpr from narwhals._compliant.typing import EagerNamespaceAny from narwhals.group_by import GroupBy @@ -2182,9 +2183,8 @@ def __init__( level: Literal["full", "lazy", "interchange"], ) -> None: self._level = level + self._compliant_frame: CompliantLazyFrame[Any, FrameT] # type: ignore[type-var] if is_compliant_lazyframe(df): - # NOTE: Blocked by (#2239) - # self._compliant_frame: CompliantLazyFrame[Any, FrameT] = df.__narwhals_lazyframe__() # noqa: ERA001 self._compliant_frame = df.__narwhals_lazyframe__() else: # pragma: no cover msg = f"Expected Polars LazyFrame or an object that implements `__narwhals_lazyframe__`, got: {type(df)}" @@ -2210,7 +2210,7 @@ def implementation(self: Self) -> Implementation: >>> nw.from_native(lf_native).implementation """ - return self._compliant_frame._implementation # type: ignore[no-any-return] + return self._compliant_frame._implementation def __getitem__(self: Self, item: str | slice) -> NoReturn: msg = "Slicing is not supported on LazyFrame" From 831a6be21860f24e93c8f75c01b389f5f1ace7d1 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 17:04:55 +0100 Subject: [PATCH 12/19] chore(typing): Ignore and add note for `spark_like` cast --- tests/expr_and_series/cast_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/expr_and_series/cast_test.py b/tests/expr_and_series/cast_test.py index 1868f5f49e..e13050a8b5 100644 --- a/tests/expr_and_series/cast_test.py +++ b/tests/expr_and_series/cast_test.py @@ -288,13 +288,14 @@ def test_cast_struct(request: pytest.FixtureRequest, constructor: Constructor) - native_df = constructor(data) + # NOTE: This branch needs to be rewritten to **not depend** on private `SparkLikeLazyFrame` properties if "spark" in str(constructor): # pragma: no cover # Special handling for pyspark as it natively maps the input to # a column of type MAP native_ldf = cast("NativeLazyFrame", native_df) _tmp_nw_compliant_frame = nw.from_native(native_ldf)._compliant_frame - F = _tmp_nw_compliant_frame._F # noqa: N806 - T = _tmp_nw_compliant_frame._native_dtypes # noqa: N806 + F = _tmp_nw_compliant_frame._F # type: ignore[attr-defined] # noqa: N806 + T = _tmp_nw_compliant_frame._native_dtypes # type: ignore[attr-defined] # noqa: N806 native_ldf = native_ldf.withColumn( # type: ignore[attr-defined] "a", From 1725f36c26541b3f3147f186a070a566f308a2c9 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 21:22:56 +0100 Subject: [PATCH 13/19] chore(typing): Partial `v1` backport Spent waaaaaay too long trying to get this working --- narwhals/stable/v1/__init__.py | 10 +++++ tests/translate/from_native_test.py | 57 ++++++++++++++++++++++++++--- 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index fa4f937ca9..bff6ed1157 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -100,6 +100,8 @@ from narwhals.typing import _2DArray FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]") + DataFrameT = TypeVar("DataFrameT", bound="DataFrame[Any]") + LazyFrameT = TypeVar("LazyFrameT", bound="LazyFrame[Any]") SeriesT = TypeVar("SeriesT", bound="Series[Any]") IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries", default=Any) T = TypeVar("T", default=Any) @@ -1159,6 +1161,14 @@ def _stableify( def from_native(native_object: SeriesT, **kwds: Any) -> SeriesT: ... +@overload +def from_native(native_object: DataFrameT, **kwds: Any) -> DataFrameT: ... + + +@overload +def from_native(native_object: LazyFrameT, **kwds: Any) -> LazyFrameT: ... + + @overload def from_native( native_object: IntoDataFrameT | IntoSeriesT, diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py index d1e1c8dd98..50689fa25e 100644 --- a/tests/translate/from_native_test.py +++ b/tests/translate/from_native_test.py @@ -1,5 +1,6 @@ from __future__ import annotations +# mypy: disallow-any-generics=false, disable-error-code="var-annotated" import sys from contextlib import nullcontext as does_not_raise from importlib.util import find_spec @@ -384,9 +385,9 @@ def test_dataframe_recursive() -> None: assert_type(pl_frame, pl.DataFrame) assert_type(nw_frame, unstable_nw.DataFrame[pl.DataFrame]) - nw_frame_depth_2 = unstable_nw.DataFrame(nw_frame, level="full") # type: ignore[var-annotated] + nw_frame_depth_2 = unstable_nw.DataFrame(nw_frame, level="full") # NOTE: Checking that the type is `DataFrame[Unknown]` - assert_type(nw_frame_depth_2, unstable_nw.DataFrame) # type: ignore[type-arg] + assert_type(nw_frame_depth_2, unstable_nw.DataFrame) assert_type(nw_frame_early_return, unstable_nw.DataFrame[pl.DataFrame]) @@ -405,12 +406,56 @@ def test_lazyframe_recursive() -> None: assert_type(pl_frame, pl.LazyFrame) assert_type(nw_frame, unstable_nw.LazyFrame[pl.LazyFrame]) - nw_frame_depth_2 = unstable_nw.LazyFrame(nw_frame, level="lazy") # type: ignore[var-annotated] + nw_frame_depth_2 = unstable_nw.LazyFrame(nw_frame, level="lazy") # NOTE: Checking that the type is `LazyFrame[Unknown]` - assert_type(nw_frame_depth_2, unstable_nw.LazyFrame) # type: ignore[type-arg] + assert_type(nw_frame_depth_2, unstable_nw.LazyFrame) assert_type(nw_frame_early_return, unstable_nw.LazyFrame[pl.LazyFrame]) +def test_dataframe_recursive_v1() -> None: + pytest.importorskip("polars") + import polars as pl + + pl_frame = pl.DataFrame({"a": [1, 2, 3]}) + nw_frame = nw.from_native(pl_frame) + with pytest.raises(AssertionError): + nw.DataFrame(nw_frame, level="full") + + nw_frame_early_return = nw.from_native(nw_frame) + + if TYPE_CHECKING: + assert_type(pl_frame, pl.DataFrame) + # TODO @dangotbanned: Fix without breaking something else (1) + assert_type(nw_frame, nw.DataFrame[pl.DataFrame]) + + nw_frame_depth_2 = nw.DataFrame(nw_frame, level="full") + # NOTE: Checking that the type is `DataFrame[Unknown]` + assert_type(nw_frame_depth_2, nw.DataFrame) + # TODO @dangotbanned: Fix without breaking something else (2) + assert_type(nw_frame_early_return, nw.DataFrame[pl.DataFrame]) + + +def test_lazyframe_recursive_v1() -> None: + pytest.importorskip("polars") + import polars as pl + + pl_frame = pl.DataFrame({"a": [1, 2, 3]}).lazy() + nw_frame = nw.from_native(pl_frame) + with pytest.raises(AssertionError): + nw.LazyFrame(nw_frame, level="lazy") + + nw_frame_early_return = nw.from_native(nw_frame) + + if TYPE_CHECKING: + assert_type(pl_frame, pl.LazyFrame) + assert_type(nw_frame, nw.LazyFrame[pl.LazyFrame]) + + nw_frame_depth_2 = nw.LazyFrame(nw_frame, level="lazy") + # NOTE: Checking that the type is `LazyFrame[Unknown]` + assert_type(nw_frame_depth_2, nw.LazyFrame) + assert_type(nw_frame_early_return, nw.LazyFrame[pl.LazyFrame]) + + def test_series_recursive() -> None: """https://github.com/narwhals-dev/narwhals/issues/2239.""" pytest.importorskip("polars") @@ -427,9 +472,9 @@ def test_series_recursive() -> None: assert_type(pl_series, pl.Series) assert_type(nw_series, unstable_nw.Series[pl.Series]) - nw_series_depth_2 = unstable_nw.Series(nw_series, level="full") # type: ignore[var-annotated] + nw_series_depth_2 = unstable_nw.Series(nw_series, level="full") # NOTE: Checking that the type is `Series[Unknown]` - assert_type(nw_series_depth_2, unstable_nw.Series) # type: ignore[type-arg] + assert_type(nw_series_depth_2, unstable_nw.Series) assert_type(nw_series_early_return, unstable_nw.Series[pl.Series]) From c4bed590041026163b1968cff0f29383221243f5 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 21:53:53 +0100 Subject: [PATCH 14/19] fix(typing): Just preserve `v1` behavior https://github.com/narwhals-dev/narwhals/pull/2356#discussion_r2031969263 --- narwhals/stable/v1/__init__.py | 6 ++++++ tests/translate/from_native_test.py | 8 ++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index bff6ed1157..5a68e337d8 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1169,6 +1169,12 @@ def from_native(native_object: DataFrameT, **kwds: Any) -> DataFrameT: ... def from_native(native_object: LazyFrameT, **kwds: Any) -> LazyFrameT: ... +@overload +def from_native( + native_object: DataFrameT | LazyFrameT, **kwds: Any +) -> DataFrameT | LazyFrameT: ... + + @overload def from_native( native_object: IntoDataFrameT | IntoSeriesT, diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py index 50689fa25e..496c6bbbd0 100644 --- a/tests/translate/from_native_test.py +++ b/tests/translate/from_native_test.py @@ -413,6 +413,7 @@ def test_lazyframe_recursive() -> None: def test_dataframe_recursive_v1() -> None: + """`v1` always returns a Union.""" pytest.importorskip("polars") import polars as pl @@ -426,13 +427,16 @@ def test_dataframe_recursive_v1() -> None: if TYPE_CHECKING: assert_type(pl_frame, pl.DataFrame) # TODO @dangotbanned: Fix without breaking something else (1) - assert_type(nw_frame, nw.DataFrame[pl.DataFrame]) + assert_type(nw_frame, "nw.DataFrame[pl.DataFrame] | nw.LazyFrame[pl.DataFrame]") nw_frame_depth_2 = nw.DataFrame(nw_frame, level="full") # NOTE: Checking that the type is `DataFrame[Unknown]` assert_type(nw_frame_depth_2, nw.DataFrame) # TODO @dangotbanned: Fix without breaking something else (2) - assert_type(nw_frame_early_return, nw.DataFrame[pl.DataFrame]) + assert_type( + nw_frame_early_return, + "nw.DataFrame[pl.DataFrame] | nw.LazyFrame[pl.DataFrame]", + ) def test_lazyframe_recursive_v1() -> None: From 6a9fd91bdb528b0f4c492154331f40788ede8e24 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 22:03:58 +0100 Subject: [PATCH 15/19] simplify --- narwhals/dataframe.py | 5 +---- narwhals/typing.py | 7 +++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 370b543257..0f2bbce5bd 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -2177,10 +2177,7 @@ def _dataframe(self: Self) -> type[DataFrame[Any]]: return DataFrame def __init__( - self: Self, - df: Any, - *, - level: Literal["full", "lazy", "interchange"], + self: Self, df: Any, *, level: Literal["full", "lazy", "interchange"] ) -> None: self._level = level self._compliant_frame: CompliantLazyFrame[Any, FrameT] # type: ignore[type-var] diff --git a/narwhals/typing.py b/narwhals/typing.py index ada85333b6..9388565575 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -5,7 +5,6 @@ from typing import Literal from typing import Protocol from typing import TypeVar -from typing import Union from narwhals._compliant import CompliantDataFrame from narwhals._compliant import CompliantLazyFrame @@ -48,7 +47,7 @@ class SupportsNativeNamespace(Protocol): def __native_namespace__(self) -> ModuleType: ... -IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"] +IntoExpr: TypeAlias = "Expr | str | Series[Any]" """Anything which can be converted to an expression. Use this to mean "either a Narwhals expression, or something which can be converted @@ -57,7 +56,7 @@ def __native_namespace__(self) -> ModuleType: ... which will be interpreted as a `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrameLike"] +IntoDataFrame: TypeAlias = "NativeFrame | DataFrameLike" """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. @@ -86,7 +85,7 @@ def __native_namespace__(self) -> ModuleType: ... ... return df.collect_schema().names() """ -Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"] +Frame: TypeAlias = "DataFrame[Any] | LazyFrame[Any]" """Narwhals DataFrame or Narwhals LazyFrame. Use this if your function can work with either and your function doesn't care From ed65ad246a150145f545837983cbb796a1b72d0a Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 7 Apr 2025 22:35:04 +0100 Subject: [PATCH 16/19] try old `Union` https://github.com/narwhals-dev/narwhals/pull/2356#discussion_r2032036716 --- narwhals/typing.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/narwhals/typing.py b/narwhals/typing.py index 9388565575..9dcf473419 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -5,6 +5,7 @@ from typing import Literal from typing import Protocol from typing import TypeVar +from typing import Union from narwhals._compliant import CompliantDataFrame from narwhals._compliant import CompliantLazyFrame @@ -47,7 +48,7 @@ class SupportsNativeNamespace(Protocol): def __native_namespace__(self) -> ModuleType: ... -IntoExpr: TypeAlias = "Expr | str | Series[Any]" +IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"] """Anything which can be converted to an expression. Use this to mean "either a Narwhals expression, or something which can be converted @@ -56,7 +57,7 @@ def __native_namespace__(self) -> ModuleType: ... which will be interpreted as a `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = "NativeFrame | DataFrameLike" +IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrameLike"] """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. @@ -71,7 +72,7 @@ def __native_namespace__(self) -> ModuleType: ... IntoLazyFrame: TypeAlias = "NativeLazyFrame" -IntoFrame: TypeAlias = "IntoDataFrame | IntoLazyFrame" +IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. Use this if your function can accept an object which can be converted to either @@ -85,7 +86,7 @@ def __native_namespace__(self) -> ModuleType: ... ... return df.collect_schema().names() """ -Frame: TypeAlias = "DataFrame[Any] | LazyFrame[Any]" +Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"] """Narwhals DataFrame or Narwhals LazyFrame. Use this if your function can work with either and your function doesn't care From 6a6677968b58af40da13640b724c2d905f1097d8 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 8 Apr 2025 12:08:37 +0100 Subject: [PATCH 17/19] docs(typing): Provide more context on what and why Expanded on (https://github.com/narwhals-dev/narwhals/pull/2356#discussion_r2032011267) --- tests/translate/from_native_test.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py index 496c6bbbd0..5dc0eed7b0 100644 --- a/tests/translate/from_native_test.py +++ b/tests/translate/from_native_test.py @@ -1,3 +1,22 @@ +"""`from_native` runtime and static typing tests. + +# Static Typing +The style of the tests is *intentionally* repetitive, aiming to provide an individual scope +for each attempted `@overload` match. + +## `mypy` ignores +[inline config] is used to prevent [mypy specific errors] from hiding `pyright` diagnostics. + +[`--disallow-any-generics`] and [`var-annotated`] are ignored to verify we don't regress to +**prior false positive** behaviors identified in [#2239]. + +[inline config]: https://mypy.readthedocs.io/en/stable/inline_config.html +[mypy specific errors]: https://discuss.python.org/t/ignore-mypy-specific-type-errors/58535 +[`--disallow-any-generics`]: https://mypy.readthedocs.io/en/stable/error_code_list2.html#check-that-type-arguments-exist-type-arg +[`var-annotated`]: https://mypy.readthedocs.io/en/stable/error_code_list.html#require-annotation-if-variable-type-is-unclear-var-annotated +[#2239]: https://github.com/narwhals-dev/narwhals/issues/2239 +""" + from __future__ import annotations # mypy: disallow-any-generics=false, disable-error-code="var-annotated" @@ -413,7 +432,7 @@ def test_lazyframe_recursive() -> None: def test_dataframe_recursive_v1() -> None: - """`v1` always returns a Union.""" + """`v1` always returns a `Union` for `DataFrame`.""" pytest.importorskip("polars") import polars as pl @@ -426,13 +445,11 @@ def test_dataframe_recursive_v1() -> None: if TYPE_CHECKING: assert_type(pl_frame, pl.DataFrame) - # TODO @dangotbanned: Fix without breaking something else (1) assert_type(nw_frame, "nw.DataFrame[pl.DataFrame] | nw.LazyFrame[pl.DataFrame]") nw_frame_depth_2 = nw.DataFrame(nw_frame, level="full") # NOTE: Checking that the type is `DataFrame[Unknown]` assert_type(nw_frame_depth_2, nw.DataFrame) - # TODO @dangotbanned: Fix without breaking something else (2) assert_type( nw_frame_early_return, "nw.DataFrame[pl.DataFrame] | nw.LazyFrame[pl.DataFrame]", From d6cb16bb8b0da400cde4b8e0847f7570f0cb1223 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 12 Apr 2025 19:43:11 +0100 Subject: [PATCH 18/19] chore(typing): Use `Sequence[str]` in `pivot` https://github.com/narwhals-dev/narwhals/pull/2356/files/5dd782522f23ed2aef3554a2aa89fc9903abd094#r2040702116 --- narwhals/_compliant/dataframe.py | 6 +++--- narwhals/_pandas_like/dataframe.py | 2 +- narwhals/_polars/dataframe.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index e155bf1bf2..b34861dee2 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -168,10 +168,10 @@ def join_asof( def lazy(self, *, backend: Implementation | None) -> CompliantLazyFrame[Any, Any]: ... def pivot( self, - on: str | Sequence[str], + on: Sequence[str], *, - index: str | Sequence[str] | None, - values: str | Sequence[str] | None, + index: Sequence[str] | None, + values: Sequence[str] | None, aggregate_function: Literal[ "min", "max", "first", "last", "sum", "mean", "median", "len", None ], diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index a28a6197fc..55f0f59dac 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -1017,7 +1017,7 @@ def gather_every(self: Self, n: int, offset: int) -> Self: return self._with_native(self.native.iloc[offset::n], validate_column_names=False) def pivot( - self: Self, + self, on: Sequence[str], *, index: Sequence[str] | None, diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index aa26840307..d3588961af 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -415,10 +415,10 @@ def unpivot( def pivot( self, - on: str | Sequence[str], + on: Sequence[str], *, - index: str | Sequence[str] | None, - values: str | Sequence[str] | None, + index: Sequence[str] | None, + values: Sequence[str] | None, aggregate_function: Literal[ "min", "max", "first", "last", "sum", "mean", "median", "len", None ], From cbd60d966180c0055530a00f4dddd0f7c714e948 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 12 Apr 2025 19:45:42 +0100 Subject: [PATCH 19/19] refactor(typing): Use `PivotAgg` #2352 --- narwhals/_compliant/dataframe.py | 5 ++--- narwhals/_pandas_like/dataframe.py | 3 ++- narwhals/_polars/dataframe.py | 5 ++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index b34861dee2..08ac415380 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -44,6 +44,7 @@ from narwhals.typing import AsofJoinStrategy from narwhals.typing import JoinStrategy from narwhals.typing import LazyUniqueKeepStrategy + from narwhals.typing import PivotAgg from narwhals.typing import SizeUnit from narwhals.typing import UniqueKeepStrategy from narwhals.typing import _2DArray @@ -172,9 +173,7 @@ def pivot( *, index: Sequence[str] | None, values: Sequence[str] | None, - aggregate_function: Literal[ - "min", "max", "first", "last", "sum", "mean", "median", "len", None - ], + aggregate_function: PivotAgg | None, sort_columns: bool, separator: str, ) -> Self: ... diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 55f0f59dac..72c90c6595 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -65,6 +65,7 @@ from narwhals.typing import CompliantLazyFrame from narwhals.typing import DTypeBackend from narwhals.typing import JoinStrategy + from narwhals.typing import PivotAgg from narwhals.typing import SizeUnit from narwhals.typing import UniqueKeepStrategy from narwhals.typing import _1DArray @@ -1022,7 +1023,7 @@ def pivot( *, index: Sequence[str] | None, values: Sequence[str] | None, - aggregate_function: Any | None, + aggregate_function: PivotAgg | None, sort_columns: bool, separator: str, ) -> Self: diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index d3588961af..6258e4218d 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -44,6 +44,7 @@ from narwhals.typing import CompliantDataFrame from narwhals.typing import CompliantLazyFrame from narwhals.typing import JoinStrategy + from narwhals.typing import PivotAgg from narwhals.typing import _2DArray from narwhals.utils import Version from narwhals.utils import _FullContext @@ -419,9 +420,7 @@ def pivot( *, index: Sequence[str] | None, values: Sequence[str] | None, - aggregate_function: Literal[ - "min", "max", "first", "last", "sum", "mean", "median", "len", None - ], + aggregate_function: PivotAgg | None, sort_columns: bool, separator: str, ) -> Self: