From 42850e38950663cd8982646efcbd5f9d3497a880 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 4 Sep 2025 11:32:50 +0000 Subject: [PATCH 01/18] refactor(typing): Add `_native.py` w/ protocols, aliases Follow-up to (https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296651319) --- narwhals/_native.py | 141 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 narwhals/_native.py diff --git a/narwhals/_native.py b/narwhals/_native.py new file mode 100644 index 0000000000..3a26cbea18 --- /dev/null +++ b/narwhals/_native.py @@ -0,0 +1,141 @@ +from __future__ import annotations + +from collections.abc import Collection, Iterable, Sized +from typing import TYPE_CHECKING, Any, Protocol + +if TYPE_CHECKING: + import duckdb + import pandas as pd + import polars as pl + import pyarrow as pa + from sqlframe.base.dataframe import BaseDataFrame as _BaseDataFrame + from typing_extensions import Self, TypeAlias + + SQLFrameDataFrame = _BaseDataFrame[Any, Any, Any, Any, Any] + +__all__ = [ + "NativeAny", + "NativeArrow", + "NativeCuDF", + "NativeDask", + "NativeDataFrame", + "NativeDuckDB", + "NativeFrame", + "NativeIbis", + "NativeKnown", + "NativeLazyFrame", + "NativeModin", + "NativePandas", + "NativePandasLike", + "NativePandasLikeDataFrame", + "NativePandasLikeSeries", + "NativePolars", + "NativePySpark", + "NativePySparkConnect", + "NativeSQLFrame", + "NativeSeries", + "NativeSparkLike", + "NativeUnknown", +] + + +Incomplete: TypeAlias = Any + + +# All dataframes supported by Narwhals have a +# `columns` property. Their similarities don't extend +# _that_ much further unfortunately... +class NativeFrame(Protocol): + @property + def columns(self) -> Any: ... + def join(self, *args: Any, **kwargs: Any) -> Any: ... + + +class NativeDataFrame(Sized, NativeFrame, Protocol): ... + + +class NativeLazyFrame(NativeFrame, Protocol): + def explain(self, *args: Any, **kwargs: Any) -> Any: ... + + +class NativeSeries(Sized, Iterable[Any], Protocol): + def filter(self, *args: Any, **kwargs: Any) -> Any: ... + + +class _BasePandasLike(Sized, Protocol): + index: Any + """`mypy` doesn't like the asymmetric `property` setter in `pandas`.""" + + def __getitem__(self, key: Any, /) -> Any: ... + def __mul__(self, other: float | Collection[float] | Self, /) -> Self: ... + def __floordiv__(self, other: float | Collection[float] | Self, /) -> Self: ... + @property + def loc(self) -> Any: ... + @property + def shape(self) -> tuple[int, ...]: ... + def set_axis(self, labels: Any, *, axis: Any = ..., copy: bool = ...) -> Self: ... + def copy(self, deep: bool = ...) -> Self: ... # noqa: FBT001 + def rename(self, *args: Any, **kwds: Any) -> Self | Incomplete: + """`mypy` & `pyright` disagree on overloads. + + `Incomplete` used to fix [more important issue](https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296139744). + """ + + +class _BasePandasLikeFrame(NativeDataFrame, _BasePandasLike, Protocol): ... + + +class _BasePandasLikeSeries(NativeSeries, _BasePandasLike, Protocol): + def where(self, cond: Any, other: Any = ..., /) -> Self | Incomplete: ... + + +class NativeDask(NativeLazyFrame, Protocol): + _partition_type: type[pd.DataFrame] + + +class _CuDFDataFrame(_BasePandasLikeFrame, Protocol): + def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ... + + +class _CuDFSeries(_BasePandasLikeSeries, Protocol): + def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ... + + +class NativeIbis(Protocol): + def sql(self, *args: Any, **kwds: Any) -> Any: ... + def __pyarrow_result__(self, *args: Any, **kwds: Any) -> Any: ... + def __pandas_result__(self, *args: Any, **kwds: Any) -> Any: ... + def __polars_result__(self, *args: Any, **kwds: Any) -> Any: ... + + +class _ModinDataFrame(_BasePandasLikeFrame, Protocol): + _pandas_class: type[pd.DataFrame] + + +class _ModinSeries(_BasePandasLikeSeries, Protocol): + _pandas_class: type[pd.Series[Any]] + + +# NOTE: Using `pyspark.sql.DataFrame` creates false positives in overloads when not installed +class _PySparkDataFrame(NativeLazyFrame, Protocol): + # Arbitrary method that `sqlframe` doesn't have and unlikely to appear anywhere else + # https://github.com/apache/spark/blob/8530444e25b83971da4314c608aa7d763adeceb3/python/pyspark/sql/dataframe.py#L4875 + def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # noqa: N802 + + +NativePolars: TypeAlias = "pl.DataFrame | pl.LazyFrame | pl.Series" +NativeArrow: TypeAlias = "pa.Table | pa.ChunkedArray[Any]" +NativeDuckDB: TypeAlias = "duckdb.DuckDBPyRelation" +NativePandas: TypeAlias = "pd.DataFrame | pd.Series[Any]" +NativeModin: TypeAlias = "_ModinDataFrame | _ModinSeries" +NativeCuDF: TypeAlias = "_CuDFDataFrame | _CuDFSeries" +NativePandasLikeSeries: TypeAlias = "pd.Series[Any] | _CuDFSeries | _ModinSeries" +NativePandasLikeDataFrame: TypeAlias = "pd.DataFrame | _CuDFDataFrame | _ModinDataFrame" +NativePandasLike: TypeAlias = "NativePandasLikeDataFrame | NativePandasLikeSeries" +NativeSQLFrame: TypeAlias = "_BaseDataFrame[Any, Any, Any, Any, Any]" +NativePySpark: TypeAlias = _PySparkDataFrame +NativePySparkConnect: TypeAlias = _PySparkDataFrame +NativeSparkLike: TypeAlias = "NativeSQLFrame | NativePySpark | NativePySparkConnect" +NativeKnown: TypeAlias = "NativePolars | NativeArrow | NativePandasLike | NativeSparkLike | NativeDuckDB | NativeDask | NativeIbis" +NativeUnknown: TypeAlias = "NativeDataFrame | NativeSeries | NativeLazyFrame" +NativeAny: TypeAlias = "NativeKnown | NativeUnknown" From 38bcb0ab60e19203b1d6b9f6a40fc51b34545548 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 4 Sep 2025 11:50:04 +0000 Subject: [PATCH 02/18] refactor: Add guards --- narwhals/_native.py | 93 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 3 deletions(-) diff --git a/narwhals/_native.py b/narwhals/_native.py index 3a26cbea18..f84ea950a8 100644 --- a/narwhals/_native.py +++ b/narwhals/_native.py @@ -1,7 +1,21 @@ from __future__ import annotations -from collections.abc import Collection, Iterable, Sized -from typing import TYPE_CHECKING, Any, Protocol +from collections.abc import Callable, Collection, Iterable, Sized +from typing import TYPE_CHECKING, Any, Protocol, TypeVar, cast + +from narwhals.dependencies import ( + get_cudf, + get_modin, + get_pandas, + get_polars, + get_pyarrow, + is_dask_dataframe, + is_duckdb_relation, + is_ibis_table, + is_pyspark_connect_dataframe, + is_pyspark_dataframe, + is_sqlframe_dataframe, +) if TYPE_CHECKING: import duckdb @@ -9,9 +23,11 @@ import polars as pl import pyarrow as pa from sqlframe.base.dataframe import BaseDataFrame as _BaseDataFrame - from typing_extensions import Self, TypeAlias + from typing_extensions import Self, TypeAlias, TypeIs SQLFrameDataFrame = _BaseDataFrame[Any, Any, Any, Any, Any] + T = TypeVar("T") + _Guard: TypeAlias = "Callable[[Any], TypeIs[T]]" __all__ = [ "NativeAny", @@ -36,6 +52,19 @@ "NativeSeries", "NativeSparkLike", "NativeUnknown", + "is_native_arrow", + "is_native_cudf", + "is_native_dask", + "is_native_duckdb", + "is_native_ibis", + "is_native_modin", + "is_native_pandas", + "is_native_pandas_like", + "is_native_polars", + "is_native_pyspark", + "is_native_pyspark_connect", + "is_native_spark_like", + "is_native_sqlframe", ] @@ -139,3 +168,61 @@ def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # NativeKnown: TypeAlias = "NativePolars | NativeArrow | NativePandasLike | NativeSparkLike | NativeDuckDB | NativeDask | NativeIbis" NativeUnknown: TypeAlias = "NativeDataFrame | NativeSeries | NativeLazyFrame" NativeAny: TypeAlias = "NativeKnown | NativeUnknown" + + +def is_native_polars(obj: Any) -> TypeIs[NativePolars]: + return (pl := get_polars()) is not None and isinstance( + obj, (pl.DataFrame, pl.Series, pl.LazyFrame) + ) + + +def is_native_arrow(obj: Any) -> TypeIs[NativeArrow]: + return (pa := get_pyarrow()) is not None and isinstance( + obj, (pa.Table, pa.ChunkedArray) + ) + + +def is_native_dask(obj: Any) -> TypeIs[NativeDask]: + return is_dask_dataframe(obj) + + +is_native_duckdb: _Guard[NativeDuckDB] = is_duckdb_relation +is_native_sqlframe: _Guard[NativeSQLFrame] = is_sqlframe_dataframe +is_native_pyspark = cast("_Guard[NativePySpark]", is_pyspark_dataframe) +is_native_pyspark_connect = cast( + "_Guard[NativePySparkConnect]", is_pyspark_connect_dataframe +) + + +def is_native_pandas(obj: Any) -> TypeIs[NativePandas]: + return (pd := get_pandas()) is not None and isinstance(obj, (pd.DataFrame, pd.Series)) + + +def is_native_modin(obj: Any) -> TypeIs[NativeModin]: + return (mpd := get_modin()) is not None and isinstance( + obj, (mpd.DataFrame, mpd.Series) + ) # pragma: no cover + + +def is_native_cudf(obj: Any) -> TypeIs[NativeCuDF]: + return (cudf := get_cudf()) is not None and isinstance( + obj, (cudf.DataFrame, cudf.Series) + ) # pragma: no cover + + +def is_native_pandas_like(obj: Any) -> TypeIs[NativePandasLike]: + return ( + is_native_pandas(obj) or is_native_cudf(obj) or is_native_modin(obj) + ) # pragma: no cover + + +def is_native_spark_like(obj: Any) -> TypeIs[NativeSparkLike]: + return ( + is_native_sqlframe(obj) + or is_native_pyspark(obj) + or is_native_pyspark_connect(obj) + ) + + +def is_native_ibis(obj: Any) -> TypeIs[NativeIbis]: + return is_ibis_table(obj) From 88907a0ec40f50d38512e7f7bc16cbd5b5dfffb3 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 4 Sep 2025 11:56:56 +0000 Subject: [PATCH 03/18] refactor: Replace `_namespace` definitions --- narwhals/_namespace.py | 215 ++++++++--------------------------------- narwhals/typing.py | 4 +- 2 files changed, 43 insertions(+), 176 deletions(-) diff --git a/narwhals/_namespace.py b/narwhals/_namespace.py index ffd7a79390..5b1f4a299f 100644 --- a/narwhals/_namespace.py +++ b/narwhals/_namespace.py @@ -2,42 +2,45 @@ from __future__ import annotations -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Generic, - Protocol, - TypeVar, - cast, - overload, -) +from typing import TYPE_CHECKING, Any, Generic, TypeVar, overload from narwhals._compliant.typing import CompliantNamespaceAny, CompliantNamespaceT_co -from narwhals._utils import Implementation, Version -from narwhals.dependencies import ( - get_cudf, - get_modin, - get_pandas, - get_polars, - get_pyarrow, - is_dask_dataframe, - is_duckdb_relation, - is_ibis_table, - is_pyspark_connect_dataframe, - is_pyspark_dataframe, - is_sqlframe_dataframe, +from narwhals._native import ( + NativeAny, + NativeArrow, + NativeCuDF, + NativeDask, + NativeDuckDB, + NativeIbis, + NativeModin, + NativePandas, + NativePandasLike, + NativePolars, + NativeSparkLike, + NativeUnknown, + _CuDFDataFrame, + _CuDFSeries, + _ModinDataFrame, + _ModinSeries, + is_native_arrow, + is_native_cudf, + is_native_dask, + is_native_duckdb, + is_native_ibis, + is_native_modin, + is_native_pandas, + is_native_polars, + is_native_pyspark_connect, + is_native_spark_like, + is_native_sqlframe, ) +from narwhals._utils import Implementation, Version if TYPE_CHECKING: - from collections.abc import Collection, Sized from typing import ClassVar - import duckdb import pandas as pd - import polars as pl - import pyarrow as pa - from typing_extensions import Self, TypeAlias, TypeIs + from typing_extensions import TypeAlias from narwhals._arrow.namespace import ArrowNamespace from narwhals._dask.namespace import DaskNamespace @@ -45,7 +48,6 @@ from narwhals._ibis.namespace import IbisNamespace from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals._polars.namespace import PolarsNamespace - from narwhals._spark_like.dataframe import SQLFrameDataFrame from narwhals._spark_like.namespace import SparkLikeNamespace from narwhals._typing import ( Arrow, @@ -59,87 +61,10 @@ Polars, SparkLike, ) - from narwhals.typing import NativeDataFrame, NativeLazyFrame, NativeSeries T = TypeVar("T") - _Guard: TypeAlias = "Callable[[Any], TypeIs[T]]" - EagerAllowedNamespace: TypeAlias = "Namespace[PandasLikeNamespace] | Namespace[ArrowNamespace] | Namespace[PolarsNamespace]" - Incomplete: TypeAlias = Any - - class _BasePandasLike(Sized, Protocol): - index: Any - """`mypy` doesn't like the asymmetric `property` setter in `pandas`.""" - - def __getitem__(self, key: Any, /) -> Any: ... - def __mul__(self, other: float | Collection[float] | Self, /) -> Self: ... - def __floordiv__(self, other: float | Collection[float] | Self, /) -> Self: ... - @property - def loc(self) -> Any: ... - @property - def shape(self) -> tuple[int, ...]: ... - def set_axis(self, labels: Any, *, axis: Any = ..., copy: bool = ...) -> Self: ... - def copy(self, deep: bool = ...) -> Self: ... # noqa: FBT001 - def rename(self, *args: Any, **kwds: Any) -> Self | Incomplete: - """`mypy` & `pyright` disagree on overloads. - - `Incomplete` used to fix [more important issue](https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296139744). - """ - - class _BasePandasLikeFrame(NativeDataFrame, _BasePandasLike, Protocol): ... - - class _BasePandasLikeSeries(NativeSeries, _BasePandasLike, Protocol): - def where(self, cond: Any, other: Any = ..., /) -> Self | Incomplete: ... - - class _NativeDask(NativeLazyFrame, Protocol): - _partition_type: type[pd.DataFrame] - - class _CuDFDataFrame(_BasePandasLikeFrame, Protocol): - def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ... - - class _CuDFSeries(_BasePandasLikeSeries, Protocol): - def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ... - - class _NativeIbis(Protocol): - def sql(self, *args: Any, **kwds: Any) -> Any: ... - def __pyarrow_result__(self, *args: Any, **kwds: Any) -> Any: ... - def __pandas_result__(self, *args: Any, **kwds: Any) -> Any: ... - def __polars_result__(self, *args: Any, **kwds: Any) -> Any: ... - - class _ModinDataFrame(_BasePandasLikeFrame, Protocol): - _pandas_class: type[pd.DataFrame] - - class _ModinSeries(_BasePandasLikeSeries, Protocol): - _pandas_class: type[pd.Series[Any]] - - # NOTE: Using `pyspark.sql.DataFrame` creates false positives in overloads when not installed - class _PySparkDataFrame(NativeLazyFrame, Protocol): - # Arbitrary method that `sqlframe` doesn't have and unlikely to appear anywhere else - # https://github.com/apache/spark/blob/8530444e25b83971da4314c608aa7d763adeceb3/python/pyspark/sql/dataframe.py#L4875 - def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # noqa: N802 - - _NativePolars: TypeAlias = "pl.DataFrame | pl.LazyFrame | pl.Series" - _NativeArrow: TypeAlias = "pa.Table | pa.ChunkedArray[Any]" - _NativeDuckDB: TypeAlias = "duckdb.DuckDBPyRelation" - _NativePandas: TypeAlias = "pd.DataFrame | pd.Series[Any]" - _NativeModin: TypeAlias = "_ModinDataFrame | _ModinSeries" - _NativeCuDF: TypeAlias = "_CuDFDataFrame | _CuDFSeries" - _NativePandasLikeSeries: TypeAlias = "pd.Series[Any] | _CuDFSeries | _ModinSeries" - _NativePandasLikeDataFrame: TypeAlias = ( - "pd.DataFrame | _CuDFDataFrame | _ModinDataFrame" - ) - _NativePandasLike: TypeAlias = "_NativePandasLikeDataFrame |_NativePandasLikeSeries" - _NativeSQLFrame: TypeAlias = "SQLFrameDataFrame" - _NativePySpark: TypeAlias = _PySparkDataFrame - _NativePySparkConnect: TypeAlias = _PySparkDataFrame - _NativeSparkLike: TypeAlias = ( - "_NativeSQLFrame | _NativePySpark | _NativePySparkConnect" - ) - - NativeKnown: TypeAlias = "_NativePolars | _NativeArrow | _NativePandasLike | _NativeSparkLike | _NativeDuckDB | _NativeDask | _NativeIbis" - NativeUnknown: TypeAlias = "NativeDataFrame | NativeSeries | NativeLazyFrame" - NativeAny: TypeAlias = "NativeKnown | NativeUnknown" __all__ = ["Namespace"] @@ -268,55 +193,55 @@ def from_backend( @overload @classmethod def from_native_object( - cls, native: _NativePolars, / + cls, native: NativePolars, / ) -> Namespace[PolarsNamespace]: ... @overload @classmethod def from_native_object( - cls, native: _NativePandas, / + cls, native: NativePandas, / ) -> Namespace[PandasLikeNamespace[pd.DataFrame, pd.Series[Any]]]: ... @overload @classmethod - def from_native_object(cls, native: _NativeArrow, /) -> Namespace[ArrowNamespace]: ... + def from_native_object(cls, native: NativeArrow, /) -> Namespace[ArrowNamespace]: ... @overload @classmethod def from_native_object( - cls, native: _NativeSparkLike, / + cls, native: NativeSparkLike, / ) -> Namespace[SparkLikeNamespace]: ... @overload @classmethod def from_native_object( - cls, native: _NativeDuckDB, / + cls, native: NativeDuckDB, / ) -> Namespace[DuckDBNamespace]: ... @overload @classmethod - def from_native_object(cls, native: _NativeDask, /) -> Namespace[DaskNamespace]: ... + def from_native_object(cls, native: NativeDask, /) -> Namespace[DaskNamespace]: ... @overload @classmethod - def from_native_object(cls, native: _NativeIbis, /) -> Namespace[IbisNamespace]: ... + def from_native_object(cls, native: NativeIbis, /) -> Namespace[IbisNamespace]: ... @overload @classmethod def from_native_object( - cls, native: _NativeModin, / + cls, native: NativeModin, / ) -> Namespace[PandasLikeNamespace[_ModinDataFrame, _ModinSeries]]: ... @overload @classmethod def from_native_object( - cls, native: _NativeCuDF, / + cls, native: NativeCuDF, / ) -> Namespace[PandasLikeNamespace[_CuDFDataFrame, _CuDFSeries]]: ... @overload @classmethod def from_native_object( - cls, native: _NativePandasLike, / + cls, native: NativePandasLike, / ) -> Namespace[PandasLikeNamespace[Any, Any]]: ... @overload @@ -358,61 +283,3 @@ def from_native_object( msg = f"Unsupported type: {type(native).__qualname__!r}" raise TypeError(msg) return cls.from_backend(impl) - - -def is_native_polars(obj: Any) -> TypeIs[_NativePolars]: - return (pl := get_polars()) is not None and isinstance( - obj, (pl.DataFrame, pl.Series, pl.LazyFrame) - ) - - -def is_native_arrow(obj: Any) -> TypeIs[_NativeArrow]: - return (pa := get_pyarrow()) is not None and isinstance( - obj, (pa.Table, pa.ChunkedArray) - ) - - -def is_native_dask(obj: Any) -> TypeIs[_NativeDask]: - return is_dask_dataframe(obj) - - -is_native_duckdb: _Guard[_NativeDuckDB] = is_duckdb_relation -is_native_sqlframe: _Guard[_NativeSQLFrame] = is_sqlframe_dataframe -is_native_pyspark = cast("_Guard[_NativePySpark]", is_pyspark_dataframe) -is_native_pyspark_connect = cast( - "_Guard[_NativePySparkConnect]", is_pyspark_connect_dataframe -) - - -def is_native_pandas(obj: Any) -> TypeIs[_NativePandas]: - return (pd := get_pandas()) is not None and isinstance(obj, (pd.DataFrame, pd.Series)) - - -def is_native_modin(obj: Any) -> TypeIs[_NativeModin]: - return (mpd := get_modin()) is not None and isinstance( - obj, (mpd.DataFrame, mpd.Series) - ) # pragma: no cover - - -def is_native_cudf(obj: Any) -> TypeIs[_NativeCuDF]: - return (cudf := get_cudf()) is not None and isinstance( - obj, (cudf.DataFrame, cudf.Series) - ) # pragma: no cover - - -def is_native_pandas_like(obj: Any) -> TypeIs[_NativePandasLike]: - return ( - is_native_pandas(obj) or is_native_cudf(obj) or is_native_modin(obj) - ) # pragma: no cover - - -def is_native_spark_like(obj: Any) -> TypeIs[_NativeSparkLike]: - return ( - is_native_sqlframe(obj) - or is_native_pyspark(obj) - or is_native_pyspark_connect(obj) - ) - - -def is_native_ibis(obj: Any) -> TypeIs[_NativeIbis]: - return is_ibis_table(obj) diff --git a/narwhals/typing.py b/narwhals/typing.py index 7b3cf43778..b4fec67b72 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -19,7 +19,7 @@ from typing_extensions import TypeAlias from narwhals import dtypes - from narwhals._namespace import _NativeIbis + from narwhals._native import NativeIbis from narwhals.dataframe import DataFrame, LazyFrame from narwhals.expr import Expr from narwhals.schema import Schema @@ -127,7 +127,7 @@ def Binary(self) -> type[dtypes.Binary]: ... ... return df.shape """ -IntoLazyFrame: TypeAlias = Union["NativeLazyFrame", "_NativeIbis"] +IntoLazyFrame: TypeAlias = Union["NativeLazyFrame", "NativeIbis"] IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. From 9584778d1b8aaa550ffc31f1476e53a450f9daae Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 4 Sep 2025 12:00:01 +0000 Subject: [PATCH 04/18] refactor: Replace imports from `_namespace` --- narwhals/_pandas_like/typing.py | 6 ++-- narwhals/_spark_like/dataframe.py | 2 +- narwhals/_utils.py | 52 +++++++++++++++---------------- narwhals/translate.py | 2 +- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/narwhals/_pandas_like/typing.py b/narwhals/_pandas_like/typing.py index 054b011eac..55c644bb5e 100644 --- a/narwhals/_pandas_like/typing.py +++ b/narwhals/_pandas_like/typing.py @@ -10,12 +10,12 @@ import pandas as pd from typing_extensions import TypeAlias - from narwhals._namespace import ( + from narwhals._native import ( + NativePandasLikeDataFrame, _CuDFDataFrame, _CuDFSeries, _ModinDataFrame, _ModinSeries, - _NativePandasLikeDataFrame, ) from narwhals._pandas_like.expr import PandasLikeExpr from narwhals._pandas_like.series import PandasLikeSeries @@ -30,7 +30,7 @@ default="pd.Series[Any]", ) NativeDataFrameT = TypeVar( - "NativeDataFrameT", bound="_NativePandasLikeDataFrame", default="pd.DataFrame" + "NativeDataFrameT", bound="NativePandasLikeDataFrame", default="pd.DataFrame" ) NativeNDFrameT = TypeVar( "NativeNDFrameT", diff --git a/narwhals/_spark_like/dataframe.py b/narwhals/_spark_like/dataframe.py index eba3001e7b..c8c02f3a79 100644 --- a/narwhals/_spark_like/dataframe.py +++ b/narwhals/_spark_like/dataframe.py @@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any from narwhals._exceptions import issue_warning -from narwhals._namespace import is_native_spark_like +from narwhals._native import is_native_spark_like from narwhals._spark_like.utils import ( catch_pyspark_connect_exception, catch_pyspark_sql_exception, diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 4d26dd5673..a4d0bd8be9 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -73,20 +73,20 @@ NativeSeriesT_co, ) from narwhals._compliant.typing import EvalNames, NativeDataFrameT, NativeLazyFrameT - from narwhals._namespace import ( - Namespace, - _NativeArrow, - _NativeCuDF, - _NativeDask, - _NativeDuckDB, - _NativeIbis, - _NativeModin, - _NativePandas, - _NativePandasLike, - _NativePolars, - _NativePySpark, - _NativePySparkConnect, - _NativeSQLFrame, + from narwhals._namespace import Namespace + from narwhals._native import ( + NativeArrow, + NativeCuDF, + NativeDask, + NativeDuckDB, + NativeIbis, + NativeModin, + NativePandas, + NativePandasLike, + NativePolars, + NativePySpark, + NativePySparkConnect, + NativeSQLFrame, ) from narwhals._translate import ArrowStreamExportable, IntoArrowTable, ToNarwhalsT_co from narwhals._typing import ( @@ -2106,36 +2106,36 @@ def __set_name__(self, owner: type[Any], name: str) -> None: self.__name__: str = name @overload - def __get__(self, instance: Narwhals[_NativePolars], owner: Any) -> _PolarsImpl: ... + def __get__(self, instance: Narwhals[NativePolars], owner: Any) -> _PolarsImpl: ... @overload - def __get__(self, instance: Narwhals[_NativePandas], owner: Any) -> _PandasImpl: ... + def __get__(self, instance: Narwhals[NativePandas], owner: Any) -> _PandasImpl: ... @overload - def __get__(self, instance: Narwhals[_NativeModin], owner: Any) -> _ModinImpl: ... + def __get__(self, instance: Narwhals[NativeModin], owner: Any) -> _ModinImpl: ... @overload # TODO @dangotbanned: Rename `_typing` `*Cudf*` aliases to `*CuDF*` - def __get__(self, instance: Narwhals[_NativeCuDF], owner: Any) -> _CudfImpl: ... + def __get__(self, instance: Narwhals[NativeCuDF], owner: Any) -> _CudfImpl: ... @overload def __get__( - self, instance: Narwhals[_NativePandasLike], owner: Any + self, instance: Narwhals[NativePandasLike], owner: Any ) -> _PandasLikeImpl: ... @overload - def __get__(self, instance: Narwhals[_NativeArrow], owner: Any) -> _ArrowImpl: ... + def __get__(self, instance: Narwhals[NativeArrow], owner: Any) -> _ArrowImpl: ... @overload def __get__( - self, instance: Narwhals[_NativePolars | _NativeArrow | _NativePandas], owner: Any + self, instance: Narwhals[NativePolars | NativeArrow | NativePandas], owner: Any ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... @overload - def __get__(self, instance: Narwhals[_NativeDuckDB], owner: Any) -> _DuckDBImpl: ... + def __get__(self, instance: Narwhals[NativeDuckDB], owner: Any) -> _DuckDBImpl: ... @overload def __get__( - self, instance: Narwhals[_NativeSQLFrame], owner: Any + self, instance: Narwhals[NativeSQLFrame], owner: Any ) -> _SQLFrameImpl: ... @overload - def __get__(self, instance: Narwhals[_NativeDask], owner: Any) -> _DaskImpl: ... + def __get__(self, instance: Narwhals[NativeDask], owner: Any) -> _DaskImpl: ... @overload - def __get__(self, instance: Narwhals[_NativeIbis], owner: Any) -> _IbisImpl: ... + def __get__(self, instance: Narwhals[NativeIbis], owner: Any) -> _IbisImpl: ... @overload def __get__( - self, instance: Narwhals[_NativePySpark | _NativePySparkConnect], owner: Any + self, instance: Narwhals[NativePySpark | NativePySparkConnect], owner: Any ) -> _PySparkImpl | _PySparkConnectImpl: ... # NOTE: https://docs.python.org/3/howto/descriptor.html#invocation-from-a-class @overload diff --git a/narwhals/translate.py b/narwhals/translate.py index fdace19827..ecfc3fd8f6 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, overload from narwhals._constants import EPOCH, MS_PER_SECOND -from narwhals._namespace import ( +from narwhals._native import ( is_native_arrow, is_native_pandas_like, is_native_polars, From 9acfd4e98e56348c8d62b40dad88bd2abaa7c0e3 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 4 Sep 2025 12:35:43 +0000 Subject: [PATCH 05/18] refactor: Replace imports from `typing` --- narwhals/_compliant/typing.py | 4 +--- narwhals/functions.py | 4 +--- narwhals/typing.py | 26 ++++++-------------- tests/conftest.py | 38 ++++++++++++++---------------- tests/expr_and_series/cast_test.py | 6 ++--- tests/frame/join_test.py | 4 ++-- tests/utils.py | 7 +++--- 7 files changed, 36 insertions(+), 53 deletions(-) diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index 6f7f45d548..ce64cc9c28 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -23,13 +23,11 @@ from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace from narwhals._compliant.series import CompliantSeries, EagerSeries from narwhals._compliant.window import WindowInputs + from narwhals._native import NativeDataFrame, NativeFrame, NativeSeries from narwhals.typing import ( FillNullStrategy, IntoLazyFrame, ModeKeepStrategy, - NativeDataFrame, - NativeFrame, - NativeSeries, RankMethod, RollingInterpolationMethod, ) diff --git a/narwhals/functions.py b/narwhals/functions.py index f4fad9ed67..a139c17cfe 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -41,6 +41,7 @@ from typing_extensions import TypeAlias, TypeIs from narwhals._compliant import CompliantExpr, CompliantNamespace + from narwhals._native import NativeDataFrame, NativeLazyFrame, NativeSeries from narwhals._translate import IntoArrowTable from narwhals._typing import Backend, EagerAllowed, IntoBackend from narwhals.dataframe import DataFrame, LazyFrame @@ -50,9 +51,6 @@ IntoDType, IntoExpr, IntoSchema, - NativeDataFrame, - NativeLazyFrame, - NativeSeries, NonNestedLiteral, _1DArray, _2DArray, diff --git a/narwhals/typing.py b/narwhals/typing.py index b4fec67b72..ef1ed67cd4 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -8,7 +8,7 @@ if TYPE_CHECKING: import datetime as dt - from collections.abc import Iterable, Sequence, Sized + from collections.abc import Sequence from decimal import Decimal from types import ModuleType @@ -19,29 +19,17 @@ from typing_extensions import TypeAlias from narwhals import dtypes - from narwhals._native import NativeIbis + from narwhals._native import ( + NativeDataFrame, + NativeIbis, + NativeLazyFrame, + NativeSeries, + ) from narwhals.dataframe import DataFrame, LazyFrame from narwhals.expr import Expr from narwhals.schema import Schema from narwhals.series import Series - # All dataframes supported by Narwhals have a - # `columns` property. Their similarities don't extend - # _that_ much further unfortunately... - class NativeFrame(Protocol): - @property - def columns(self) -> Any: ... - - def join(self, *args: Any, **kwargs: Any) -> Any: ... - - class NativeDataFrame(Sized, NativeFrame, Protocol): ... - - class NativeLazyFrame(NativeFrame, Protocol): - def explain(self, *args: Any, **kwargs: Any) -> Any: ... - - class NativeSeries(Sized, Iterable[Any], Protocol): - def filter(self, *args: Any, **kwargs: Any) -> Any: ... - class SupportsNativeNamespace(Protocol): def __native_namespace__(self) -> ModuleType: ... diff --git a/tests/conftest.py b/tests/conftest.py index c33099bb76..2850cf9de5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,18 +15,16 @@ if TYPE_CHECKING: from collections.abc import Sequence - import duckdb import ibis import pandas as pd import polars as pl import pyarrow as pa from ibis.backends.duckdb import Backend as IbisDuckDBBackend - from pyspark.sql import DataFrame as PySparkDataFrame from typing_extensions import TypeAlias - from narwhals._spark_like.dataframe import SQLFrameDataFrame + from narwhals._native import NativeDask, NativeDuckDB, NativePySpark, NativeSQLFrame from narwhals._typing import EagerAllowed - from narwhals.typing import NativeDataFrame, NativeLazyFrame + from narwhals.typing import IntoDataFrame from tests.utils import Constructor, ConstructorEager, ConstructorLazy Data: TypeAlias = "dict[str, list[Any]]" @@ -100,27 +98,27 @@ def pandas_pyarrow_constructor(obj: Data) -> pd.DataFrame: return pd.DataFrame(obj).convert_dtypes(dtype_backend="pyarrow") -def modin_constructor(obj: Data) -> NativeDataFrame: # pragma: no cover +def modin_constructor(obj: Data) -> IntoDataFrame: # pragma: no cover import modin.pandas as mpd import pandas as pd df = mpd.DataFrame(pd.DataFrame(obj)) - return cast("NativeDataFrame", df) + return cast("IntoDataFrame", df) -def modin_pyarrow_constructor(obj: Data) -> NativeDataFrame: # pragma: no cover +def modin_pyarrow_constructor(obj: Data) -> IntoDataFrame: # pragma: no cover import modin.pandas as mpd import pandas as pd df = mpd.DataFrame(pd.DataFrame(obj)).convert_dtypes(dtype_backend="pyarrow") - return cast("NativeDataFrame", df) + return cast("IntoDataFrame", df) -def cudf_constructor(obj: Data) -> NativeDataFrame: # pragma: no cover +def cudf_constructor(obj: Data) -> IntoDataFrame: # pragma: no cover import cudf df = cudf.DataFrame(obj) - return cast("NativeDataFrame", df) + return cast("IntoDataFrame", df) def polars_eager_constructor(obj: Data) -> pl.DataFrame: @@ -135,7 +133,7 @@ def polars_lazy_constructor(obj: Data) -> pl.LazyFrame: return pl.LazyFrame(obj) -def duckdb_lazy_constructor(obj: Data) -> duckdb.DuckDBPyRelation: +def duckdb_lazy_constructor(obj: Data) -> NativeDuckDB: import duckdb import polars as pl @@ -145,16 +143,16 @@ def duckdb_lazy_constructor(obj: Data) -> duckdb.DuckDBPyRelation: return duckdb.table("_df") -def dask_lazy_p1_constructor(obj: Data) -> NativeLazyFrame: # pragma: no cover +def dask_lazy_p1_constructor(obj: Data) -> NativeDask: # pragma: no cover import dask.dataframe as dd - return cast("NativeLazyFrame", dd.from_dict(obj, npartitions=1)) + return cast("NativeDask", dd.from_dict(obj, npartitions=1)) -def dask_lazy_p2_constructor(obj: Data) -> NativeLazyFrame: # pragma: no cover +def dask_lazy_p2_constructor(obj: Data) -> NativeDask: # pragma: no cover import dask.dataframe as dd - return cast("NativeLazyFrame", dd.from_dict(obj, npartitions=2)) + return cast("NativeDask", dd.from_dict(obj, npartitions=2)) def pyarrow_table_constructor(obj: dict[str, Any]) -> pa.Table: @@ -163,7 +161,7 @@ def pyarrow_table_constructor(obj: dict[str, Any]) -> pa.Table: return pa.table(obj) -def pyspark_lazy_constructor() -> Callable[[Data], PySparkDataFrame]: # pragma: no cover +def pyspark_lazy_constructor() -> Callable[[Data], NativePySpark]: # pragma: no cover pytest.importorskip("pyspark") import warnings from atexit import register @@ -178,22 +176,22 @@ def pyspark_lazy_constructor() -> Callable[[Data], PySparkDataFrame]: # pragma: register(session.stop) - def _constructor(obj: Data) -> PySparkDataFrame: + def _constructor(obj: Data) -> NativePySpark: _obj = deepcopy(obj) index_col_name = generate_temporary_column_name(n_bytes=8, columns=list(_obj)) _obj[index_col_name] = list(range(len(_obj[next(iter(_obj))]))) - - return ( + result = ( session.createDataFrame([*zip(*_obj.values())], schema=[*_obj.keys()]) .repartition(2) .orderBy(index_col_name) .drop(index_col_name) ) + return cast("NativePySpark", result) return _constructor -def sqlframe_pyspark_lazy_constructor(obj: Data) -> SQLFrameDataFrame: # pragma: no cover +def sqlframe_pyspark_lazy_constructor(obj: Data) -> NativeSQLFrame: # pragma: no cover session = sqlframe_session() return session.createDataFrame([*zip(*obj.values())], schema=[*obj.keys()]) diff --git a/tests/expr_and_series/cast_test.py b/tests/expr_and_series/cast_test.py index 0dd00ae5cb..ef1f8a14c2 100644 --- a/tests/expr_and_series/cast_test.py +++ b/tests/expr_and_series/cast_test.py @@ -17,7 +17,7 @@ ) if TYPE_CHECKING: - from narwhals.typing import NativeLazyFrame + from narwhals._native import NativeSQLFrame DATA = { "a": [1], @@ -282,12 +282,12 @@ def test_cast_struct(request: pytest.FixtureRequest, constructor: Constructor) - if "spark" in str(constructor): # pragma: no cover # Special handling for pyspark as it natively maps the input to # a column of type MAP - native_ldf = cast("NativeLazyFrame", native_df) + native_ldf = cast("NativeSQLFrame", native_df) _tmp_nw_compliant_frame = nw.from_native(native_ldf)._compliant_frame F = _tmp_nw_compliant_frame._F # type: ignore[attr-defined] T = _tmp_nw_compliant_frame._native_dtypes # type: ignore[attr-defined] # noqa: N806 - native_ldf = native_ldf.withColumn( # type: ignore[attr-defined] + native_ldf = native_ldf.withColumn( "a", F.struct( F.col("a.movie ").cast(T.StringType()).alias("movie "), diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index ed1e405f86..137866701e 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -17,11 +17,11 @@ ) if TYPE_CHECKING: - from narwhals.typing import IntoLazyFrameT, JoinStrategy, NativeDataFrame + from narwhals.typing import IntoDataFrame, IntoLazyFrameT, JoinStrategy def from_native_lazy( - native: IntoLazyFrameT | NativeDataFrame, + native: IntoLazyFrameT | IntoDataFrame, ) -> nw.LazyFrame[IntoLazyFrameT] | nw.LazyFrame[Any]: """Every join test [needs to use `.lazy()` for typing]*. diff --git a/tests/utils.py b/tests/utils.py index 1af14f98e0..a7efbd0d9e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -22,7 +22,8 @@ from sqlframe.duckdb import DuckDBSession from typing_extensions import TypeAlias - from narwhals.typing import Frame, NativeDataFrame, NativeLazyFrame + from narwhals._native import NativeLazyFrame + from narwhals.typing import Frame, IntoDataFrame def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: @@ -42,8 +43,8 @@ def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: PYSPARK_VERSION: tuple[int, ...] = get_module_version_as_tuple("pyspark") CUDF_VERSION: tuple[int, ...] = get_module_version_as_tuple("cudf") -Constructor: TypeAlias = Callable[[Any], "NativeLazyFrame | NativeDataFrame"] -ConstructorEager: TypeAlias = Callable[[Any], "NativeDataFrame"] +Constructor: TypeAlias = Callable[[Any], "NativeLazyFrame | IntoDataFrame"] +ConstructorEager: TypeAlias = Callable[[Any], "IntoDataFrame"] ConstructorLazy: TypeAlias = Callable[[Any], "NativeLazyFrame"] ConstructorPandasLike: TypeAlias = Callable[[Any], "pd.DataFrame"] From d066eb8499dd6d34745816196feb0fb4fd919b66 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 4 Sep 2025 12:41:39 +0000 Subject: [PATCH 06/18] refactor: Replace duplicated stable typing https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296651319 --- narwhals/stable/v1/typing.py | 30 +++--------------------------- narwhals/stable/v2/typing.py | 32 ++++---------------------------- 2 files changed, 7 insertions(+), 55 deletions(-) diff --git a/narwhals/stable/v1/typing.py b/narwhals/stable/v1/typing.py index 7581910607..634c9e9951 100644 --- a/narwhals/stable/v1/typing.py +++ b/narwhals/stable/v1/typing.py @@ -3,34 +3,10 @@ from typing import TYPE_CHECKING, Any, Protocol, TypeVar, Union if TYPE_CHECKING: - import sys - from collections.abc import Iterable, Sized + from typing_extensions import TypeAlias - from narwhals.stable.v1 import DataFrame, LazyFrame - - if sys.version_info >= (3, 10): - from typing import TypeAlias - else: - from typing_extensions import TypeAlias - - from narwhals.stable.v1 import Expr, Series - - # All dataframes supported by Narwhals have a - # `columns` property. Their similarities don't extend - # _that_ much further unfortunately... - class NativeFrame(Protocol): - @property - def columns(self) -> Any: ... - - def join(self, *args: Any, **kwargs: Any) -> Any: ... - - class NativeDataFrame(Sized, NativeFrame, Protocol): ... - - class NativeLazyFrame(NativeFrame, Protocol): - def explain(self, *args: Any, **kwargs: Any) -> Any: ... - - class NativeSeries(Sized, Iterable[Any], Protocol): - def filter(self, *args: Any, **kwargs: Any) -> Any: ... + from narwhals._native import NativeDataFrame, NativeLazyFrame, NativeSeries + from narwhals.stable.v1 import DataFrame, Expr, LazyFrame, Series class DataFrameLike(Protocol): def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... diff --git a/narwhals/stable/v2/typing.py b/narwhals/stable/v2/typing.py index ac45701fdd..e5eadcce7d 100644 --- a/narwhals/stable/v2/typing.py +++ b/narwhals/stable/v2/typing.py @@ -1,36 +1,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Protocol, TypeVar, Union +from typing import TYPE_CHECKING, Any, TypeVar, Union if TYPE_CHECKING: - import sys - from collections.abc import Iterable, Sized + from typing_extensions import TypeAlias - from narwhals.stable.v2 import DataFrame, LazyFrame - - if sys.version_info >= (3, 10): - from typing import TypeAlias - else: - from typing_extensions import TypeAlias - - from narwhals.stable.v2 import Expr, Series - - # All dataframes supported by Narwhals have a - # `columns` property. Their similarities don't extend - # _that_ much further unfortunately... - class NativeFrame(Protocol): - @property - def columns(self) -> Any: ... - - def join(self, *args: Any, **kwargs: Any) -> Any: ... - - class NativeDataFrame(Sized, NativeFrame, Protocol): ... - - class NativeLazyFrame(NativeFrame, Protocol): - def explain(self, *args: Any, **kwargs: Any) -> Any: ... - - class NativeSeries(Sized, Iterable[Any], Protocol): - def filter(self, *args: Any, **kwargs: Any) -> Any: ... + from narwhals._native import NativeDataFrame, NativeLazyFrame, NativeSeries + from narwhals.stable.v2 import DataFrame, Expr, LazyFrame, Series IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"] From d1f45bb76670e7dbddec2455b2775fa36eb50b19 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 4 Sep 2025 12:46:59 +0000 Subject: [PATCH 07/18] refactor: remove unused `TypeVar` --- narwhals/_namespace.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/narwhals/_namespace.py b/narwhals/_namespace.py index 5b1f4a299f..0d58963825 100644 --- a/narwhals/_namespace.py +++ b/narwhals/_namespace.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Generic, TypeVar, overload +from typing import TYPE_CHECKING, Any, ClassVar, Generic, overload from narwhals._compliant.typing import CompliantNamespaceAny, CompliantNamespaceT_co from narwhals._native import ( @@ -37,8 +37,6 @@ from narwhals._utils import Implementation, Version if TYPE_CHECKING: - from typing import ClassVar - import pandas as pd from typing_extensions import TypeAlias @@ -62,8 +60,6 @@ SparkLike, ) - T = TypeVar("T") - EagerAllowedNamespace: TypeAlias = "Namespace[PandasLikeNamespace] | Namespace[ArrowNamespace] | Namespace[PolarsNamespace]" __all__ = ["Namespace"] From c5ada4a1474532493700f8f991e63d95e23518e6 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 4 Sep 2025 13:03:21 +0000 Subject: [PATCH 08/18] refactor: Deduplicate most `Into*` typing `v1` is different for the frame cases https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296651319 --- narwhals/_native.py | 98 +++++++++++++++++++++++++++++++- narwhals/stable/v1/typing.py | 36 +----------- narwhals/stable/v2/typing.py | 100 +++++--------------------------- narwhals/typing.py | 107 ++++------------------------------- 4 files changed, 125 insertions(+), 216 deletions(-) diff --git a/narwhals/_native.py b/narwhals/_native.py index f84ea950a8..3df0003b6b 100644 --- a/narwhals/_native.py +++ b/narwhals/_native.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Callable, Collection, Iterable, Sized -from typing import TYPE_CHECKING, Any, Protocol, TypeVar, cast +from typing import TYPE_CHECKING, Any, Protocol, TypeVar, Union, cast from narwhals.dependencies import ( get_cudf, @@ -30,6 +30,14 @@ _Guard: TypeAlias = "Callable[[Any], TypeIs[T]]" __all__ = [ + "IntoDataFrame", + "IntoDataFrameT", + "IntoFrame", + "IntoFrameT", + "IntoLazyFrame", + "IntoLazyFrameT", + "IntoSeries", + "IntoSeriesT", "NativeAny", "NativeArrow", "NativeCuDF", @@ -169,6 +177,94 @@ def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # NativeUnknown: TypeAlias = "NativeDataFrame | NativeSeries | NativeLazyFrame" NativeAny: TypeAlias = "NativeKnown | NativeUnknown" +IntoDataFrame: TypeAlias = NativeDataFrame +"""Anything which can be converted to a Narwhals DataFrame. + +Use this if your function accepts a narwhalifiable object but doesn't care about its backend. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]: + ... df = nw.from_native(df_native, eager_only=True) + ... return df.shape +""" + +IntoLazyFrame: TypeAlias = Union[NativeLazyFrame, NativeIbis] + +IntoFrame: TypeAlias = Union[IntoDataFrame, IntoLazyFrame] +"""Anything which can be converted to a Narwhals DataFrame or LazyFrame. + +Use this if your function can accept an object which can be converted to either +`nw.DataFrame` or `nw.LazyFrame` and it doesn't care about its backend. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrame + >>> def agnostic_columns(df_native: IntoFrame) -> list[str]: + ... df = nw.from_native(df_native) + ... return df.collect_schema().names() +""" + +IntoSeries: TypeAlias = NativeSeries +"""Anything which can be converted to a Narwhals Series. + +Use this if your function can accept an object which can be converted to `nw.Series` +and it doesn't care about its backend. + +Examples: + >>> from typing import Any + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + >>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]: + ... s = nw.from_native(s_native) + ... return s.to_list() +""" + +IntoFrameT = TypeVar("IntoFrameT", bound=IntoFrame) +"""TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame. + +Use this if your function accepts an object which is convertible to `nw.DataFrame` +or `nw.LazyFrame` and returns an object of the same type. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns(c=nw.col("a") + 1).to_native() +""" + +IntoDataFrameT = TypeVar("IntoDataFrameT", bound=IntoDataFrame) +"""TypeVar bound to object convertible to Narwhals DataFrame. + +Use this if your function accepts an object which can be converted to `nw.DataFrame` +and returns an object of the same class. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrameT + >>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT: + ... df = nw.from_native(df_native, eager_only=True) + ... return df.with_columns(c=df["a"] + 1).to_native() +""" + +IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound=IntoLazyFrame) + +IntoSeriesT = TypeVar("IntoSeriesT", bound=IntoSeries) +"""TypeVar bound to object convertible to Narwhals Series. + +Use this if your function accepts an object which can be converted to `nw.Series` +and returns an object of the same class. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.abs().to_native() +""" + def is_native_polars(obj: Any) -> TypeIs[NativePolars]: return (pl := get_polars()) is not None and isinstance( diff --git a/narwhals/stable/v1/typing.py b/narwhals/stable/v1/typing.py index 634c9e9951..ab5cd17552 100644 --- a/narwhals/stable/v1/typing.py +++ b/narwhals/stable/v1/typing.py @@ -2,10 +2,12 @@ from typing import TYPE_CHECKING, Any, Protocol, TypeVar, Union +from narwhals._native import IntoSeries, IntoSeriesT + if TYPE_CHECKING: from typing_extensions import TypeAlias - from narwhals._native import NativeDataFrame, NativeLazyFrame, NativeSeries + from narwhals._native import NativeDataFrame, NativeLazyFrame from narwhals.stable.v1 import DataFrame, Expr, LazyFrame, Series class DataFrameLike(Protocol): @@ -22,7 +24,6 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... `nw.Expr`, e.g. `df.select('a')`. """ - IntoDataFrame: TypeAlias = Union["NativeDataFrame", "DataFrameLike"] """Anything which can be converted to a Narwhals DataFrame. @@ -37,7 +38,6 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... """ IntoLazyFrame: TypeAlias = "NativeLazyFrame" - IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. @@ -66,21 +66,6 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... ... return df.columns """ -IntoSeries: TypeAlias = "NativeSeries" -"""Anything which can be converted to a Narwhals Series. - -Use this if your function can accept an object which can be converted to `nw.Series` -and it doesn't care about its backend. - -Examples: - >>> from typing import Any - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - >>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]: - ... s = nw.from_native(s_native) - ... return s.to_list() -""" - IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame") """TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame. @@ -110,7 +95,6 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... """ IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound="IntoLazyFrame") - FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]") """TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame. @@ -139,20 +123,6 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... ... return df.with_columns(c=df["a"] + 1) """ -IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries") -"""TypeVar bound to object convertible to Narwhals Series. - -Use this if your function accepts an object which can be converted to `nw.Series` -and returns an object of the same class. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.abs().to_native() -""" - __all__ = [ "DataFrameT", diff --git a/narwhals/stable/v2/typing.py b/narwhals/stable/v2/typing.py index e5eadcce7d..ca27111410 100644 --- a/narwhals/stable/v2/typing.py +++ b/narwhals/stable/v2/typing.py @@ -2,10 +2,20 @@ from typing import TYPE_CHECKING, Any, TypeVar, Union +from narwhals._native import ( + IntoDataFrame, + IntoDataFrameT, + IntoFrame, + IntoFrameT, + IntoLazyFrame, + IntoLazyFrameT, + IntoSeries, + IntoSeriesT, +) + if TYPE_CHECKING: from typing_extensions import TypeAlias - from narwhals._native import NativeDataFrame, NativeLazyFrame, NativeSeries from narwhals.stable.v2 import DataFrame, Expr, LazyFrame, Series @@ -19,35 +29,6 @@ `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = "NativeDataFrame" -"""Anything which can be converted to a Narwhals DataFrame. - -Use this if your function accepts a narwhalifiable object but doesn't care about its backend. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.shape -""" - -IntoLazyFrame: TypeAlias = "NativeLazyFrame" - -IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] -"""Anything which can be converted to a Narwhals DataFrame or LazyFrame. - -Use this if your function can accept an object which can be converted to either -`nw.DataFrame` or `nw.LazyFrame` and it doesn't care about its backend. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> def agnostic_columns(df_native: IntoFrame) -> list[str]: - ... df = nw.from_native(df_native) - ... return df.collect_schema().names() -""" - Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"] """Narwhals DataFrame or Narwhals LazyFrame. @@ -62,49 +43,6 @@ ... return df.columns """ -IntoSeries: TypeAlias = "NativeSeries" -"""Anything which can be converted to a Narwhals Series. - -Use this if your function can accept an object which can be converted to `nw.Series` -and it doesn't care about its backend. - -Examples: - >>> from typing import Any - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - >>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]: - ... s = nw.from_native(s_native) - ... return s.to_list() -""" - -IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame") -"""TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame. - -Use this if your function accepts an object which is convertible to `nw.DataFrame` -or `nw.LazyFrame` and returns an object of the same type. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(c=nw.col("a") + 1).to_native() -""" - -IntoDataFrameT = TypeVar("IntoDataFrameT", bound="IntoDataFrame") -"""TypeVar bound to object convertible to Narwhals DataFrame. - -Use this if your function accepts an object which can be converted to `nw.DataFrame` -and returns an object of the same class. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrameT - >>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.with_columns(c=df["a"] + 1).to_native() -""" - FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]") """TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame. @@ -133,20 +71,6 @@ ... return df.with_columns(c=df["a"] + 1) """ -IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries") -"""TypeVar bound to object convertible to Narwhals Series. - -Use this if your function accepts an object which can be converted to `nw.Series` -and returns an object of the same class. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.abs().to_native() -""" - __all__ = [ "DataFrameT", @@ -157,6 +81,8 @@ "IntoExpr", "IntoFrame", "IntoFrameT", + "IntoLazyFrame", + "IntoLazyFrameT", "IntoSeries", "IntoSeriesT", ] diff --git a/narwhals/typing.py b/narwhals/typing.py index ef1ed67cd4..4c6e9b9788 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -4,6 +4,16 @@ from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar, Union from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame, CompliantSeries +from narwhals._native import ( + IntoDataFrame, + IntoDataFrameT, + IntoFrame, + IntoFrameT, + IntoLazyFrame, + IntoLazyFrameT, + IntoSeries, + IntoSeriesT, +) from narwhals._typing import Backend, EagerAllowed, IntoBackend, LazyAllowed if TYPE_CHECKING: @@ -19,12 +29,6 @@ from typing_extensions import TypeAlias from narwhals import dtypes - from narwhals._native import ( - NativeDataFrame, - NativeIbis, - NativeLazyFrame, - NativeSeries, - ) from narwhals.dataframe import DataFrame, LazyFrame from narwhals.expr import Expr from narwhals.schema import Schema @@ -102,35 +106,6 @@ def Binary(self) -> type[dtypes.Binary]: ... which will be interpreted as a `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = "NativeDataFrame" -"""Anything which can be converted to a Narwhals DataFrame. - -Use this if your function accepts a narwhalifiable object but doesn't care about its backend. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.shape -""" - -IntoLazyFrame: TypeAlias = Union["NativeLazyFrame", "NativeIbis"] - -IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] -"""Anything which can be converted to a Narwhals DataFrame or LazyFrame. - -Use this if your function can accept an object which can be converted to either -`nw.DataFrame` or `nw.LazyFrame` and it doesn't care about its backend. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> def agnostic_columns(df_native: IntoFrame) -> list[str]: - ... df = nw.from_native(df_native) - ... return df.collect_schema().names() -""" - Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"] """Narwhals DataFrame or Narwhals LazyFrame. @@ -145,51 +120,6 @@ def Binary(self) -> type[dtypes.Binary]: ... ... return df.columns """ -IntoSeries: TypeAlias = "NativeSeries" -"""Anything which can be converted to a Narwhals Series. - -Use this if your function can accept an object which can be converted to `nw.Series` -and it doesn't care about its backend. - -Examples: - >>> from typing import Any - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - >>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]: - ... s = nw.from_native(s_native) - ... return s.to_list() -""" - -IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame") -"""TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame. - -Use this if your function accepts an object which is convertible to `nw.DataFrame` -or `nw.LazyFrame` and returns an object of the same type. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(c=nw.col("a") + 1).to_native() -""" - -IntoDataFrameT = TypeVar("IntoDataFrameT", bound="IntoDataFrame") -"""TypeVar bound to object convertible to Narwhals DataFrame. - -Use this if your function accepts an object which can be converted to `nw.DataFrame` -and returns an object of the same class. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrameT - >>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.with_columns(c=df["a"] + 1).to_native() -""" - -IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound="IntoLazyFrame") - FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]") """TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame. @@ -220,21 +150,6 @@ def Binary(self) -> type[dtypes.Binary]: ... LazyFrameT = TypeVar("LazyFrameT", bound="LazyFrame[Any]") SeriesT = TypeVar("SeriesT", bound="Series[Any]") - -IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries") -"""TypeVar bound to object convertible to Narwhals Series. - -Use this if your function accepts an object which can be converted to `nw.Series` -and returns an object of the same class. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.abs().to_native() -""" - DTypeBackend: TypeAlias = 'Literal["pyarrow", "numpy_nullable"] | None' SizeUnit: TypeAlias = Literal[ "b", @@ -463,6 +378,8 @@ def Binary(self) -> type[dtypes.Binary]: ... "IntoExpr", "IntoFrame", "IntoFrameT", + "IntoLazyFrame", + "IntoLazyFrameT", "IntoSeries", "IntoSeriesT", "LazyAllowed", From 9508dd7f71fccde8d3a704ba9b3ff339d4fbd9ed Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 4 Sep 2025 13:12:58 +0000 Subject: [PATCH 09/18] cov --- narwhals/_native.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/narwhals/_native.py b/narwhals/_native.py index 3df0003b6b..4d393757c2 100644 --- a/narwhals/_native.py +++ b/narwhals/_native.py @@ -297,7 +297,7 @@ def is_native_pandas(obj: Any) -> TypeIs[NativePandas]: def is_native_modin(obj: Any) -> TypeIs[NativeModin]: return (mpd := get_modin()) is not None and isinstance( obj, (mpd.DataFrame, mpd.Series) - ) # pragma: no cover + ) def is_native_cudf(obj: Any) -> TypeIs[NativeCuDF]: @@ -307,9 +307,7 @@ def is_native_cudf(obj: Any) -> TypeIs[NativeCuDF]: def is_native_pandas_like(obj: Any) -> TypeIs[NativePandasLike]: - return ( - is_native_pandas(obj) or is_native_cudf(obj) or is_native_modin(obj) - ) # pragma: no cover + return is_native_pandas(obj) or is_native_cudf(obj) or is_native_modin(obj) def is_native_spark_like(obj: Any) -> TypeIs[NativeSparkLike]: From 57b86c4ec57dbcc17a09dcd9b87fd09b173af8da Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 4 Sep 2025 13:20:53 +0000 Subject: [PATCH 10/18] refactor: okay but smaller --- narwhals/_native.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/narwhals/_native.py b/narwhals/_native.py index 4d393757c2..507581332d 100644 --- a/narwhals/_native.py +++ b/narwhals/_native.py @@ -28,6 +28,7 @@ SQLFrameDataFrame = _BaseDataFrame[Any, Any, Any, Any, Any] T = TypeVar("T") _Guard: TypeAlias = "Callable[[Any], TypeIs[T]]" + Incomplete: TypeAlias = Any __all__ = [ "IntoDataFrame", @@ -76,9 +77,6 @@ ] -Incomplete: TypeAlias = Any - - # All dataframes supported by Narwhals have a # `columns` property. Their similarities don't extend # _that_ much further unfortunately... @@ -191,7 +189,6 @@ def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # """ IntoLazyFrame: TypeAlias = Union[NativeLazyFrame, NativeIbis] - IntoFrame: TypeAlias = Union[IntoDataFrame, IntoLazyFrame] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. @@ -250,7 +247,6 @@ def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # """ IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound=IntoLazyFrame) - IntoSeriesT = TypeVar("IntoSeriesT", bound=IntoSeries) """TypeVar bound to object convertible to Narwhals Series. @@ -278,16 +274,14 @@ def is_native_arrow(obj: Any) -> TypeIs[NativeArrow]: ) -def is_native_dask(obj: Any) -> TypeIs[NativeDask]: - return is_dask_dataframe(obj) - - +is_native_dask = cast("_Guard[NativeDask]", is_dask_dataframe) is_native_duckdb: _Guard[NativeDuckDB] = is_duckdb_relation is_native_sqlframe: _Guard[NativeSQLFrame] = is_sqlframe_dataframe is_native_pyspark = cast("_Guard[NativePySpark]", is_pyspark_dataframe) is_native_pyspark_connect = cast( "_Guard[NativePySparkConnect]", is_pyspark_connect_dataframe ) +is_native_ibis = cast("_Guard[NativeIbis]", is_ibis_table) def is_native_pandas(obj: Any) -> TypeIs[NativePandas]: @@ -316,7 +310,3 @@ def is_native_spark_like(obj: Any) -> TypeIs[NativeSparkLike]: or is_native_pyspark(obj) or is_native_pyspark_connect(obj) ) - - -def is_native_ibis(obj: Any) -> TypeIs[NativeIbis]: - return is_ibis_table(obj) From 227ab27c66ee75594914ba47dd2b3d8e0f073b3e Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 4 Sep 2025 14:40:25 +0000 Subject: [PATCH 11/18] fix(typing): `NativeIbis` is a `NativeFrame` --- narwhals/_native.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_native.py b/narwhals/_native.py index 507581332d..aaa802db65 100644 --- a/narwhals/_native.py +++ b/narwhals/_native.py @@ -136,7 +136,7 @@ class _CuDFSeries(_BasePandasLikeSeries, Protocol): def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ... -class NativeIbis(Protocol): +class NativeIbis(NativeFrame, Protocol): def sql(self, *args: Any, **kwds: Any) -> Any: ... def __pyarrow_result__(self, *args: Any, **kwds: Any) -> Any: ... def __pandas_result__(self, *args: Any, **kwds: Any) -> Any: ... From c6e163e7661f46e6ff8d3cc4413319f465630ed9 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 4 Sep 2025 16:28:14 +0000 Subject: [PATCH 12/18] docs(typing): wip module doc --- narwhals/_native.py | 68 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/narwhals/_native.py b/narwhals/_native.py index aaa802db65..fd44c2296f 100644 --- a/narwhals/_native.py +++ b/narwhals/_native.py @@ -1,3 +1,71 @@ +"""The home for *mostly* [structural] counterparts to [nominal] native types. + +If you find yourself being yelled at by a typechecker and ended up here - **do not fear!** + +### (1) `Native(*Frame|Series)` +Minimal `Protocol`(s) for matching *almost any* supported native type of that group: + + class NativeThing(Protocol): + def something_common(self, *args: Any, **kwargs: Any) -> Any: ... + +Note: + This group is primarily a building block for more useful types. + +### (2) `Into(*Frame|Series)` +*Publicly* exported `TypeAlias`(s) of **(1)**: + + IntoThing: TypeAlias = NativeThing + +**But**, occasionally, there'll be an edge-case which we can spell like: + + IntoThing: TypeAlias = Union[, NativeThing] + +Tip: + Reach for these when there **isn't a need to preserve** the original native type. + +### (3) `Into(*Frame|Series)T` +*Publicly* exported `TypeVar`(s), bound to **(2)**: + + IntoThingT = TypeVar("IntoThingT", bound=IntoThing) + +Important: + In most situations, you'll want to use these as they **do preserve** the original native type. + +Putting it all together, we can now add a *narwhals-level* wrapper: + + class Thing(Generic[IntoThingT]): + def to_native(self) -> IntoThingT: ... + +### (4) The funky ones (WIP) +Everything so far has been focused on the idea of matching an *unknown* native object to +a protocol used by a generic class: + + DataFrame[IntoDataFrameT] + LazyFrame[IntoLazyFrameT] + Series[IntoSeriesT] + +A unique problem arises when we want to describe different behaviors, depending on what +`Into*T` actually is. + +TODO @dangotbanned: finish/shorten/scrap this section + +#### Notes +- Previous comments + - NOTE: Using `pyspark.sql.DataFrame` creates false positives in overloads when not installed + - Arbitrary method that `sqlframe` doesn't have and unlikely to appear anywhere else +- Related `@overload` content + - https://github.com/pola-rs/polars/pull/8011#discussion_r1158657862 + + +### (5) Type guards (WIP) +Using the types from **(4)**, these guards are *mostly* the same as those found in `nw.dependencies`. + +They differ in *how many* native types are checked per-call. + +[structural]: https://typing.python.org/en/latest/spec/glossary.html#term-structural +[nominal]: https://typing.python.org/en/latest/spec/glossary.html#term-nominal +""" + from __future__ import annotations from collections.abc import Callable, Collection, Iterable, Sized From 153c8eed040ff531edad1e8da09ee662fb980126 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 5 Sep 2025 09:36:30 +0000 Subject: [PATCH 13/18] docs: Cherry-pick some links --- narwhals/_native.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/narwhals/_native.py b/narwhals/_native.py index fd44c2296f..d61debd7dc 100644 --- a/narwhals/_native.py +++ b/narwhals/_native.py @@ -3,7 +3,7 @@ If you find yourself being yelled at by a typechecker and ended up here - **do not fear!** ### (1) `Native(*Frame|Series)` -Minimal `Protocol`(s) for matching *almost any* supported native type of that group: +Minimal [`Protocol`]s for matching *almost any* supported native type of that group: class NativeThing(Protocol): def something_common(self, *args: Any, **kwargs: Any) -> Any: ... @@ -12,7 +12,7 @@ def something_common(self, *args: Any, **kwargs: Any) -> Any: ... This group is primarily a building block for more useful types. ### (2) `Into(*Frame|Series)` -*Publicly* exported `TypeAlias`(s) of **(1)**: +*Publicly* exported [`TypeAlias`]s of **(1)**: IntoThing: TypeAlias = NativeThing @@ -24,7 +24,7 @@ def something_common(self, *args: Any, **kwargs: Any) -> Any: ... Reach for these when there **isn't a need to preserve** the original native type. ### (3) `Into(*Frame|Series)T` -*Publicly* exported `TypeVar`(s), bound to **(2)**: +*Publicly* exported [`TypeVar`]s, bound to **(2)**: IntoThingT = TypeVar("IntoThingT", bound=IntoThing) @@ -38,7 +38,7 @@ def to_native(self) -> IntoThingT: ... ### (4) The funky ones (WIP) Everything so far has been focused on the idea of matching an *unknown* native object to -a protocol used by a generic class: +a [`Protocol`] used by a [generic class]: DataFrame[IntoDataFrameT] LazyFrame[IntoLazyFrameT] @@ -64,6 +64,10 @@ def to_native(self) -> IntoThingT: ... [structural]: https://typing.python.org/en/latest/spec/glossary.html#term-structural [nominal]: https://typing.python.org/en/latest/spec/glossary.html#term-nominal +[`Protocol`]: https://typing.python.org/en/latest/spec/protocol.html +[`TypeAlias`]: https://mypy.readthedocs.io/en/stable/kinds_of_types.html#type-aliases +[`TypeVar`]: https://mypy.readthedocs.io/en/stable/generics.html#type-variables-with-upper-bounds +[generic class]: https://docs.python.org/3/library/typing.html#user-defined-generic-types """ from __future__ import annotations From b2cc37a53726b6d6c45b316817282b030e3c5cab Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 9 Sep 2025 17:14:00 +0000 Subject: [PATCH 14/18] docs(typing): Finish filling out module doc https://github.com/narwhals-dev/narwhals/pull/3086#discussion_r2328357102 --- narwhals/_native.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/narwhals/_native.py b/narwhals/_native.py index d61debd7dc..f0c8f04b35 100644 --- a/narwhals/_native.py +++ b/narwhals/_native.py @@ -36,7 +36,7 @@ def something_common(self, *args: Any, **kwargs: Any) -> Any: ... class Thing(Generic[IntoThingT]): def to_native(self) -> IntoThingT: ... -### (4) The funky ones (WIP) +### (4) `Native` Everything so far has been focused on the idea of matching an *unknown* native object to a [`Protocol`] used by a [generic class]: @@ -44,23 +44,24 @@ def to_native(self) -> IntoThingT: ... LazyFrame[IntoLazyFrameT] Series[IntoSeriesT] -A unique problem arises when we want to describe different behaviors, depending on what -`Into*T` actually is. +If we want to describe a set of more specific types (e.g. in [`@overload`s]), then these protocols/aliases are the right tool. -TODO @dangotbanned: finish/shorten/scrap this section +For common and easily-installed backends, aliases are composed of the native type(s): -#### Notes -- Previous comments - - NOTE: Using `pyspark.sql.DataFrame` creates false positives in overloads when not installed - - Arbitrary method that `sqlframe` doesn't have and unlikely to appear anywhere else -- Related `@overload` content - - https://github.com/pola-rs/polars/pull/8011#discussion_r1158657862 + NativePolars: TypeAlias = pl.DataFrame | pl.LazyFrame | pl.Series +Otherwise, we need to define a [`Protocol`] which the native type(s) can match against *when* installed: -### (5) Type guards (WIP) -Using the types from **(4)**, these guards are *mostly* the same as those found in `nw.dependencies`. + class NativeDask(NativeLazyFrame, Protocol): + _partition_type: type[pd.DataFrame] -They differ in *how many* native types are checked per-call. +Important: + The goal is to be as minimal as possible, while still being *specific-enough* to **not** match something else. + +### (5) `is_native_` +[Type guards] for **(4)**, *similar* to those found in `nw.dependencies`. + +They differ by checking **all** native types/protocols in a single-call and using ``Native`` aliases. [structural]: https://typing.python.org/en/latest/spec/glossary.html#term-structural [nominal]: https://typing.python.org/en/latest/spec/glossary.html#term-nominal @@ -68,6 +69,8 @@ def to_native(self) -> IntoThingT: ... [`TypeAlias`]: https://mypy.readthedocs.io/en/stable/kinds_of_types.html#type-aliases [`TypeVar`]: https://mypy.readthedocs.io/en/stable/generics.html#type-variables-with-upper-bounds [generic class]: https://docs.python.org/3/library/typing.html#user-defined-generic-types +[`@overload`s]: https://typing.python.org/en/latest/spec/overload.html +[Type guards]: https://typing.python.org/en/latest/spec/narrowing.html """ from __future__ import annotations @@ -223,10 +226,7 @@ class _ModinSeries(_BasePandasLikeSeries, Protocol): _pandas_class: type[pd.Series[Any]] -# NOTE: Using `pyspark.sql.DataFrame` creates false positives in overloads when not installed class _PySparkDataFrame(NativeLazyFrame, Protocol): - # Arbitrary method that `sqlframe` doesn't have and unlikely to appear anywhere else - # https://github.com/apache/spark/blob/8530444e25b83971da4314c608aa7d763adeceb3/python/pyspark/sql/dataframe.py#L4875 def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # noqa: N802 From 16b0074b88f010645ea95036abf99d97e447c573 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:17:20 +0000 Subject: [PATCH 15/18] docs: Add a link to `ibis` issue walkthrough https://github.com/ibis-project/ibis/issues/9276#issuecomment-3292016818 --- narwhals/_native.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/narwhals/_native.py b/narwhals/_native.py index f0c8f04b35..53a62b3505 100644 --- a/narwhals/_native.py +++ b/narwhals/_native.py @@ -58,6 +58,8 @@ class NativeDask(NativeLazyFrame, Protocol): Important: The goal is to be as minimal as possible, while still being *specific-enough* to **not** match something else. +For a more complete example, see [ibis#9276 comment]. + ### (5) `is_native_` [Type guards] for **(4)**, *similar* to those found in `nw.dependencies`. @@ -70,6 +72,7 @@ class NativeDask(NativeLazyFrame, Protocol): [`TypeVar`]: https://mypy.readthedocs.io/en/stable/generics.html#type-variables-with-upper-bounds [generic class]: https://docs.python.org/3/library/typing.html#user-defined-generic-types [`@overload`s]: https://typing.python.org/en/latest/spec/overload.html +[ibis#9276 comment]: https://github.com/ibis-project/ibis/issues/9276#issuecomment-3292016818 [Type guards]: https://typing.python.org/en/latest/spec/narrowing.html """ From 3683b0696efeff537e32eb206b146b698351669f Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 15 Oct 2025 12:51:59 +0000 Subject: [PATCH 16/18] docs: Start rewriting, introduce some feedback - Use similar syntax to #2455 - Introduce things like #2455 - https://github.com/narwhals-dev/narwhals/pull/3086#discussion_r2336173196 - https://github.com/narwhals-dev/narwhals/pull/3086#discussion_r2365743977 - Sub-divide the 5 cases - Avoid using the terms *wrap* and *match* outside of the two groups - TODOs on https://github.com/narwhals-dev/narwhals/pull/3086#discussion_r2410520445 --- narwhals/_native.py | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/narwhals/_native.py b/narwhals/_native.py index 53a62b3505..20dfd9d230 100644 --- a/narwhals/_native.py +++ b/narwhals/_native.py @@ -2,8 +2,28 @@ If you find yourself being yelled at by a typechecker and ended up here - **do not fear!** -### (1) `Native(*Frame|Series)` -Minimal [`Protocol`]s for matching *almost any* supported native type of that group: +We have 5 funky flavors, which tackle two different problem spaces. + +How do we describe [Native types] when ... +- ... **wrapping in** a [Narwhals type]? +- ... **matching to** an [`Implementation`]? + +## Wrapping in a Narwhals type +[//]: # (TODO @dangotbanned: Replace `Thing` with a better name) + +The following examples use the placeholder type `Thing` which represents one of: +- `DataFrame`: (Eager) 2D data structure representing data as a table with rows and columns. +- `LazyFrame`: (Lazy) Computation graph/query against a DataFrame/database. +- `Series`: 1D data structure representing a single column. + +Our goal is to **wrap** a *partially-unknown* native object **in** a [generic class]: + + DataFrame[IntoDataFrameT] + LazyFrame[IntoLazyFrameT] + Series[IntoSeriesT] + +### (1) `Native` +Minimal [`Protocol`]s that are [assignable to] *almost any* supported native type of that group: class NativeThing(Protocol): def something_common(self, *args: Any, **kwargs: Any) -> Any: ... @@ -11,7 +31,7 @@ def something_common(self, *args: Any, **kwargs: Any) -> Any: ... Note: This group is primarily a building block for more useful types. -### (2) `Into(*Frame|Series)` +### (2) `Into` *Publicly* exported [`TypeAlias`]s of **(1)**: IntoThing: TypeAlias = NativeThing @@ -23,7 +43,7 @@ def something_common(self, *args: Any, **kwargs: Any) -> Any: ... Tip: Reach for these when there **isn't a need to preserve** the original native type. -### (3) `Into(*Frame|Series)T` +### (3) `IntoT` *Publicly* exported [`TypeVar`]s, bound to **(2)**: IntoThingT = TypeVar("IntoThingT", bound=IntoThing) @@ -36,14 +56,10 @@ def something_common(self, *args: Any, **kwargs: Any) -> Any: ... class Thing(Generic[IntoThingT]): def to_native(self) -> IntoThingT: ... -### (4) `Native` -Everything so far has been focused on the idea of matching an *unknown* native object to -a [`Protocol`] used by a [generic class]: - - DataFrame[IntoDataFrameT] - LazyFrame[IntoLazyFrameT] - Series[IntoSeriesT] +## Matching to an `Implementation` +[//]: # (TODO @dangotbanned: Introduce this section?) +### (4) `Native` If we want to describe a set of more specific types (e.g. in [`@overload`s]), then these protocols/aliases are the right tool. For common and easily-installed backends, aliases are composed of the native type(s): @@ -67,7 +83,11 @@ class NativeDask(NativeLazyFrame, Protocol): [structural]: https://typing.python.org/en/latest/spec/glossary.html#term-structural [nominal]: https://typing.python.org/en/latest/spec/glossary.html#term-nominal +[Native types]: https://narwhals-dev.github.io/narwhals/how_it_works/#polars-and-other-implementations +[Narwhals type]: https://narwhals-dev.github.io/narwhals/api-reference/dataframe/ +[`Implementation`]: https://narwhals-dev.github.io/narwhals/api-reference/implementation/ [`Protocol`]: https://typing.python.org/en/latest/spec/protocol.html +[assignable to]: https://typing.python.org/en/latest/spec/glossary.html#term-assignable [`TypeAlias`]: https://mypy.readthedocs.io/en/stable/kinds_of_types.html#type-aliases [`TypeVar`]: https://mypy.readthedocs.io/en/stable/generics.html#type-variables-with-upper-bounds [generic class]: https://docs.python.org/3/library/typing.html#user-defined-generic-types From d060577cf099c7f7d079293691f5276d6445cee1 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 15 Oct 2025 16:15:43 +0000 Subject: [PATCH 17/18] docs: Fluff up "Matching to an `Implementation`" --- narwhals/_native.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/narwhals/_native.py b/narwhals/_native.py index 20dfd9d230..2c9694ebdf 100644 --- a/narwhals/_native.py +++ b/narwhals/_native.py @@ -18,9 +18,9 @@ Our goal is to **wrap** a *partially-unknown* native object **in** a [generic class]: - DataFrame[IntoDataFrameT] - LazyFrame[IntoLazyFrameT] - Series[IntoSeriesT] + def wrapping_in_df(native: IntoDataFrameT) -> DataFrame[IntoDataFrameT]: ... + def wrapping_in_ldf(native: IntoLazyFrameT) -> LazyFrame[IntoLazyFrameT]: ... + def wrapping_in_ser(native: IntoSeriesT) -> Series[IntoSeriesT]: ... ### (1) `Native` Minimal [`Protocol`]s that are [assignable to] *almost any* supported native type of that group: @@ -57,24 +57,30 @@ class Thing(Generic[IntoThingT]): def to_native(self) -> IntoThingT: ... ## Matching to an `Implementation` -[//]: # (TODO @dangotbanned: Introduce this section?) +This problem differs as we need to *create* a relationship between *otherwise-unrelated* types. + +Comparing the problems side-by-side, we can more clearly see this difference: + + def wrapping_in_df(native: IntoDataFrameT) -> DataFrame[IntoDataFrameT]: ... + def matching_to_polars(native: pl.DataFrame) -> Literal[Implementation.POLARS]: ... ### (4) `Native` -If we want to describe a set of more specific types (e.g. in [`@overload`s]), then these protocols/aliases are the right tool. +If we want to describe a set of specific types and **match** them in [`@overload`s], then these the tools we need. -For common and easily-installed backends, aliases are composed of the native type(s): +For common and easily-installed backends, [`TypeAlias`]s are composed of the native type(s): NativePolars: TypeAlias = pl.DataFrame | pl.LazyFrame | pl.Series -Otherwise, we need to define a [`Protocol`] which the native type(s) can match against *when* installed: +Otherwise, we need to define a [`Protocol`] which the native type(s) can **match** against *when* installed: class NativeDask(NativeLazyFrame, Protocol): _partition_type: type[pd.DataFrame] -Important: - The goal is to be as minimal as possible, while still being *specific-enough* to **not** match something else. +Tip: + The goal is to be as minimal as possible, while still being *specific-enough* to **not match** something else. -For a more complete example, see [ibis#9276 comment]. +Important: + See [ibis#9276 comment] for a more *in-depth* example that doesn't fit here 😄 ### (5) `is_native_` [Type guards] for **(4)**, *similar* to those found in `nw.dependencies`. From 3a004e836eee1ddd946f18c9ea8636bbd08aef1e Mon Sep 17 00:00:00 2001 From: Dan Redding <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 17 Oct 2025 16:11:46 +0100 Subject: [PATCH 18/18] Update narwhals/_native.py Co-authored-by: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com> --- narwhals/_native.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_native.py b/narwhals/_native.py index 2c9694ebdf..594370515e 100644 --- a/narwhals/_native.py +++ b/narwhals/_native.py @@ -19,7 +19,7 @@ Our goal is to **wrap** a *partially-unknown* native object **in** a [generic class]: def wrapping_in_df(native: IntoDataFrameT) -> DataFrame[IntoDataFrameT]: ... - def wrapping_in_ldf(native: IntoLazyFrameT) -> LazyFrame[IntoLazyFrameT]: ... + def wrapping_in_lf(native: IntoLazyFrameT) -> LazyFrame[IntoLazyFrameT]: ... def wrapping_in_ser(native: IntoSeriesT) -> Series[IntoSeriesT]: ... ### (1) `Native`