diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index 2fc679289a..91ede3b2f0 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -23,13 +23,11 @@ from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace from narwhals._compliant.series import CompliantSeries, EagerSeries from narwhals._compliant.window import WindowInputs + from narwhals._native import NativeDataFrame, NativeFrame, NativeSeries from narwhals.typing import ( FillNullStrategy, IntoLazyFrame, ModeKeepStrategy, - NativeDataFrame, - NativeFrame, - NativeSeries, RankMethod, RollingInterpolationMethod, ) diff --git a/narwhals/_namespace.py b/narwhals/_namespace.py index ffd7a79390..0d58963825 100644 --- a/narwhals/_namespace.py +++ b/narwhals/_namespace.py @@ -2,42 +2,43 @@ from __future__ import annotations -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Generic, - Protocol, - TypeVar, - cast, - overload, -) +from typing import TYPE_CHECKING, Any, ClassVar, Generic, overload from narwhals._compliant.typing import CompliantNamespaceAny, CompliantNamespaceT_co -from narwhals._utils import Implementation, Version -from narwhals.dependencies import ( - get_cudf, - get_modin, - get_pandas, - get_polars, - get_pyarrow, - is_dask_dataframe, - is_duckdb_relation, - is_ibis_table, - is_pyspark_connect_dataframe, - is_pyspark_dataframe, - is_sqlframe_dataframe, +from narwhals._native import ( + NativeAny, + NativeArrow, + NativeCuDF, + NativeDask, + NativeDuckDB, + NativeIbis, + NativeModin, + NativePandas, + NativePandasLike, + NativePolars, + NativeSparkLike, + NativeUnknown, + _CuDFDataFrame, + _CuDFSeries, + _ModinDataFrame, + _ModinSeries, + is_native_arrow, + is_native_cudf, + is_native_dask, + is_native_duckdb, + is_native_ibis, + is_native_modin, + is_native_pandas, + is_native_polars, + is_native_pyspark_connect, + is_native_spark_like, + is_native_sqlframe, ) +from narwhals._utils import Implementation, Version if TYPE_CHECKING: - from collections.abc import Collection, Sized - from typing import ClassVar - - import duckdb import pandas as pd - import polars as pl - import pyarrow as pa - from typing_extensions import Self, TypeAlias, TypeIs + from typing_extensions import TypeAlias from narwhals._arrow.namespace import ArrowNamespace from narwhals._dask.namespace import DaskNamespace @@ -45,7 +46,6 @@ from narwhals._ibis.namespace import IbisNamespace from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals._polars.namespace import PolarsNamespace - from narwhals._spark_like.dataframe import SQLFrameDataFrame from narwhals._spark_like.namespace import SparkLikeNamespace from narwhals._typing import ( Arrow, @@ -59,87 +59,8 @@ Polars, SparkLike, ) - from narwhals.typing import NativeDataFrame, NativeLazyFrame, NativeSeries - - T = TypeVar("T") - - _Guard: TypeAlias = "Callable[[Any], TypeIs[T]]" EagerAllowedNamespace: TypeAlias = "Namespace[PandasLikeNamespace] | Namespace[ArrowNamespace] | Namespace[PolarsNamespace]" - Incomplete: TypeAlias = Any - - class _BasePandasLike(Sized, Protocol): - index: Any - """`mypy` doesn't like the asymmetric `property` setter in `pandas`.""" - - def __getitem__(self, key: Any, /) -> Any: ... - def __mul__(self, other: float | Collection[float] | Self, /) -> Self: ... - def __floordiv__(self, other: float | Collection[float] | Self, /) -> Self: ... - @property - def loc(self) -> Any: ... - @property - def shape(self) -> tuple[int, ...]: ... - def set_axis(self, labels: Any, *, axis: Any = ..., copy: bool = ...) -> Self: ... - def copy(self, deep: bool = ...) -> Self: ... # noqa: FBT001 - def rename(self, *args: Any, **kwds: Any) -> Self | Incomplete: - """`mypy` & `pyright` disagree on overloads. - - `Incomplete` used to fix [more important issue](https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296139744). - """ - - class _BasePandasLikeFrame(NativeDataFrame, _BasePandasLike, Protocol): ... - - class _BasePandasLikeSeries(NativeSeries, _BasePandasLike, Protocol): - def where(self, cond: Any, other: Any = ..., /) -> Self | Incomplete: ... - - class _NativeDask(NativeLazyFrame, Protocol): - _partition_type: type[pd.DataFrame] - - class _CuDFDataFrame(_BasePandasLikeFrame, Protocol): - def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ... - - class _CuDFSeries(_BasePandasLikeSeries, Protocol): - def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ... - - class _NativeIbis(Protocol): - def sql(self, *args: Any, **kwds: Any) -> Any: ... - def __pyarrow_result__(self, *args: Any, **kwds: Any) -> Any: ... - def __pandas_result__(self, *args: Any, **kwds: Any) -> Any: ... - def __polars_result__(self, *args: Any, **kwds: Any) -> Any: ... - - class _ModinDataFrame(_BasePandasLikeFrame, Protocol): - _pandas_class: type[pd.DataFrame] - - class _ModinSeries(_BasePandasLikeSeries, Protocol): - _pandas_class: type[pd.Series[Any]] - - # NOTE: Using `pyspark.sql.DataFrame` creates false positives in overloads when not installed - class _PySparkDataFrame(NativeLazyFrame, Protocol): - # Arbitrary method that `sqlframe` doesn't have and unlikely to appear anywhere else - # https://github.com/apache/spark/blob/8530444e25b83971da4314c608aa7d763adeceb3/python/pyspark/sql/dataframe.py#L4875 - def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # noqa: N802 - - _NativePolars: TypeAlias = "pl.DataFrame | pl.LazyFrame | pl.Series" - _NativeArrow: TypeAlias = "pa.Table | pa.ChunkedArray[Any]" - _NativeDuckDB: TypeAlias = "duckdb.DuckDBPyRelation" - _NativePandas: TypeAlias = "pd.DataFrame | pd.Series[Any]" - _NativeModin: TypeAlias = "_ModinDataFrame | _ModinSeries" - _NativeCuDF: TypeAlias = "_CuDFDataFrame | _CuDFSeries" - _NativePandasLikeSeries: TypeAlias = "pd.Series[Any] | _CuDFSeries | _ModinSeries" - _NativePandasLikeDataFrame: TypeAlias = ( - "pd.DataFrame | _CuDFDataFrame | _ModinDataFrame" - ) - _NativePandasLike: TypeAlias = "_NativePandasLikeDataFrame |_NativePandasLikeSeries" - _NativeSQLFrame: TypeAlias = "SQLFrameDataFrame" - _NativePySpark: TypeAlias = _PySparkDataFrame - _NativePySparkConnect: TypeAlias = _PySparkDataFrame - _NativeSparkLike: TypeAlias = ( - "_NativeSQLFrame | _NativePySpark | _NativePySparkConnect" - ) - - NativeKnown: TypeAlias = "_NativePolars | _NativeArrow | _NativePandasLike | _NativeSparkLike | _NativeDuckDB | _NativeDask | _NativeIbis" - NativeUnknown: TypeAlias = "NativeDataFrame | NativeSeries | NativeLazyFrame" - NativeAny: TypeAlias = "NativeKnown | NativeUnknown" __all__ = ["Namespace"] @@ -268,55 +189,55 @@ def from_backend( @overload @classmethod def from_native_object( - cls, native: _NativePolars, / + cls, native: NativePolars, / ) -> Namespace[PolarsNamespace]: ... @overload @classmethod def from_native_object( - cls, native: _NativePandas, / + cls, native: NativePandas, / ) -> Namespace[PandasLikeNamespace[pd.DataFrame, pd.Series[Any]]]: ... @overload @classmethod - def from_native_object(cls, native: _NativeArrow, /) -> Namespace[ArrowNamespace]: ... + def from_native_object(cls, native: NativeArrow, /) -> Namespace[ArrowNamespace]: ... @overload @classmethod def from_native_object( - cls, native: _NativeSparkLike, / + cls, native: NativeSparkLike, / ) -> Namespace[SparkLikeNamespace]: ... @overload @classmethod def from_native_object( - cls, native: _NativeDuckDB, / + cls, native: NativeDuckDB, / ) -> Namespace[DuckDBNamespace]: ... @overload @classmethod - def from_native_object(cls, native: _NativeDask, /) -> Namespace[DaskNamespace]: ... + def from_native_object(cls, native: NativeDask, /) -> Namespace[DaskNamespace]: ... @overload @classmethod - def from_native_object(cls, native: _NativeIbis, /) -> Namespace[IbisNamespace]: ... + def from_native_object(cls, native: NativeIbis, /) -> Namespace[IbisNamespace]: ... @overload @classmethod def from_native_object( - cls, native: _NativeModin, / + cls, native: NativeModin, / ) -> Namespace[PandasLikeNamespace[_ModinDataFrame, _ModinSeries]]: ... @overload @classmethod def from_native_object( - cls, native: _NativeCuDF, / + cls, native: NativeCuDF, / ) -> Namespace[PandasLikeNamespace[_CuDFDataFrame, _CuDFSeries]]: ... @overload @classmethod def from_native_object( - cls, native: _NativePandasLike, / + cls, native: NativePandasLike, / ) -> Namespace[PandasLikeNamespace[Any, Any]]: ... @overload @@ -358,61 +279,3 @@ def from_native_object( msg = f"Unsupported type: {type(native).__qualname__!r}" raise TypeError(msg) return cls.from_backend(impl) - - -def is_native_polars(obj: Any) -> TypeIs[_NativePolars]: - return (pl := get_polars()) is not None and isinstance( - obj, (pl.DataFrame, pl.Series, pl.LazyFrame) - ) - - -def is_native_arrow(obj: Any) -> TypeIs[_NativeArrow]: - return (pa := get_pyarrow()) is not None and isinstance( - obj, (pa.Table, pa.ChunkedArray) - ) - - -def is_native_dask(obj: Any) -> TypeIs[_NativeDask]: - return is_dask_dataframe(obj) - - -is_native_duckdb: _Guard[_NativeDuckDB] = is_duckdb_relation -is_native_sqlframe: _Guard[_NativeSQLFrame] = is_sqlframe_dataframe -is_native_pyspark = cast("_Guard[_NativePySpark]", is_pyspark_dataframe) -is_native_pyspark_connect = cast( - "_Guard[_NativePySparkConnect]", is_pyspark_connect_dataframe -) - - -def is_native_pandas(obj: Any) -> TypeIs[_NativePandas]: - return (pd := get_pandas()) is not None and isinstance(obj, (pd.DataFrame, pd.Series)) - - -def is_native_modin(obj: Any) -> TypeIs[_NativeModin]: - return (mpd := get_modin()) is not None and isinstance( - obj, (mpd.DataFrame, mpd.Series) - ) # pragma: no cover - - -def is_native_cudf(obj: Any) -> TypeIs[_NativeCuDF]: - return (cudf := get_cudf()) is not None and isinstance( - obj, (cudf.DataFrame, cudf.Series) - ) # pragma: no cover - - -def is_native_pandas_like(obj: Any) -> TypeIs[_NativePandasLike]: - return ( - is_native_pandas(obj) or is_native_cudf(obj) or is_native_modin(obj) - ) # pragma: no cover - - -def is_native_spark_like(obj: Any) -> TypeIs[_NativeSparkLike]: - return ( - is_native_sqlframe(obj) - or is_native_pyspark(obj) - or is_native_pyspark_connect(obj) - ) - - -def is_native_ibis(obj: Any) -> TypeIs[_NativeIbis]: - return is_ibis_table(obj) diff --git a/narwhals/_native.py b/narwhals/_native.py new file mode 100644 index 0000000000..594370515e --- /dev/null +++ b/narwhals/_native.py @@ -0,0 +1,413 @@ +"""The home for *mostly* [structural] counterparts to [nominal] native types. + +If you find yourself being yelled at by a typechecker and ended up here - **do not fear!** + +We have 5 funky flavors, which tackle two different problem spaces. + +How do we describe [Native types] when ... +- ... **wrapping in** a [Narwhals type]? +- ... **matching to** an [`Implementation`]? + +## Wrapping in a Narwhals type +[//]: # (TODO @dangotbanned: Replace `Thing` with a better name) + +The following examples use the placeholder type `Thing` which represents one of: +- `DataFrame`: (Eager) 2D data structure representing data as a table with rows and columns. +- `LazyFrame`: (Lazy) Computation graph/query against a DataFrame/database. +- `Series`: 1D data structure representing a single column. + +Our goal is to **wrap** a *partially-unknown* native object **in** a [generic class]: + + def wrapping_in_df(native: IntoDataFrameT) -> DataFrame[IntoDataFrameT]: ... + def wrapping_in_lf(native: IntoLazyFrameT) -> LazyFrame[IntoLazyFrameT]: ... + def wrapping_in_ser(native: IntoSeriesT) -> Series[IntoSeriesT]: ... + +### (1) `Native` +Minimal [`Protocol`]s that are [assignable to] *almost any* supported native type of that group: + + class NativeThing(Protocol): + def something_common(self, *args: Any, **kwargs: Any) -> Any: ... + +Note: + This group is primarily a building block for more useful types. + +### (2) `Into` +*Publicly* exported [`TypeAlias`]s of **(1)**: + + IntoThing: TypeAlias = NativeThing + +**But**, occasionally, there'll be an edge-case which we can spell like: + + IntoThing: TypeAlias = Union[, NativeThing] + +Tip: + Reach for these when there **isn't a need to preserve** the original native type. + +### (3) `IntoT` +*Publicly* exported [`TypeVar`]s, bound to **(2)**: + + IntoThingT = TypeVar("IntoThingT", bound=IntoThing) + +Important: + In most situations, you'll want to use these as they **do preserve** the original native type. + +Putting it all together, we can now add a *narwhals-level* wrapper: + + class Thing(Generic[IntoThingT]): + def to_native(self) -> IntoThingT: ... + +## Matching to an `Implementation` +This problem differs as we need to *create* a relationship between *otherwise-unrelated* types. + +Comparing the problems side-by-side, we can more clearly see this difference: + + def wrapping_in_df(native: IntoDataFrameT) -> DataFrame[IntoDataFrameT]: ... + def matching_to_polars(native: pl.DataFrame) -> Literal[Implementation.POLARS]: ... + +### (4) `Native` +If we want to describe a set of specific types and **match** them in [`@overload`s], then these the tools we need. + +For common and easily-installed backends, [`TypeAlias`]s are composed of the native type(s): + + NativePolars: TypeAlias = pl.DataFrame | pl.LazyFrame | pl.Series + +Otherwise, we need to define a [`Protocol`] which the native type(s) can **match** against *when* installed: + + class NativeDask(NativeLazyFrame, Protocol): + _partition_type: type[pd.DataFrame] + +Tip: + The goal is to be as minimal as possible, while still being *specific-enough* to **not match** something else. + +Important: + See [ibis#9276 comment] for a more *in-depth* example that doesn't fit here 😄 + +### (5) `is_native_` +[Type guards] for **(4)**, *similar* to those found in `nw.dependencies`. + +They differ by checking **all** native types/protocols in a single-call and using ``Native`` aliases. + +[structural]: https://typing.python.org/en/latest/spec/glossary.html#term-structural +[nominal]: https://typing.python.org/en/latest/spec/glossary.html#term-nominal +[Native types]: https://narwhals-dev.github.io/narwhals/how_it_works/#polars-and-other-implementations +[Narwhals type]: https://narwhals-dev.github.io/narwhals/api-reference/dataframe/ +[`Implementation`]: https://narwhals-dev.github.io/narwhals/api-reference/implementation/ +[`Protocol`]: https://typing.python.org/en/latest/spec/protocol.html +[assignable to]: https://typing.python.org/en/latest/spec/glossary.html#term-assignable +[`TypeAlias`]: https://mypy.readthedocs.io/en/stable/kinds_of_types.html#type-aliases +[`TypeVar`]: https://mypy.readthedocs.io/en/stable/generics.html#type-variables-with-upper-bounds +[generic class]: https://docs.python.org/3/library/typing.html#user-defined-generic-types +[`@overload`s]: https://typing.python.org/en/latest/spec/overload.html +[ibis#9276 comment]: https://github.com/ibis-project/ibis/issues/9276#issuecomment-3292016818 +[Type guards]: https://typing.python.org/en/latest/spec/narrowing.html +""" + +from __future__ import annotations + +from collections.abc import Callable, Collection, Iterable, Sized +from typing import TYPE_CHECKING, Any, Protocol, TypeVar, Union, cast + +from narwhals.dependencies import ( + get_cudf, + get_modin, + get_pandas, + get_polars, + get_pyarrow, + is_dask_dataframe, + is_duckdb_relation, + is_ibis_table, + is_pyspark_connect_dataframe, + is_pyspark_dataframe, + is_sqlframe_dataframe, +) + +if TYPE_CHECKING: + import duckdb + import pandas as pd + import polars as pl + import pyarrow as pa + from sqlframe.base.dataframe import BaseDataFrame as _BaseDataFrame + from typing_extensions import Self, TypeAlias, TypeIs + + SQLFrameDataFrame = _BaseDataFrame[Any, Any, Any, Any, Any] + T = TypeVar("T") + _Guard: TypeAlias = "Callable[[Any], TypeIs[T]]" + Incomplete: TypeAlias = Any + +__all__ = [ + "IntoDataFrame", + "IntoDataFrameT", + "IntoFrame", + "IntoFrameT", + "IntoLazyFrame", + "IntoLazyFrameT", + "IntoSeries", + "IntoSeriesT", + "NativeAny", + "NativeArrow", + "NativeCuDF", + "NativeDask", + "NativeDataFrame", + "NativeDuckDB", + "NativeFrame", + "NativeIbis", + "NativeKnown", + "NativeLazyFrame", + "NativeModin", + "NativePandas", + "NativePandasLike", + "NativePandasLikeDataFrame", + "NativePandasLikeSeries", + "NativePolars", + "NativePySpark", + "NativePySparkConnect", + "NativeSQLFrame", + "NativeSeries", + "NativeSparkLike", + "NativeUnknown", + "is_native_arrow", + "is_native_cudf", + "is_native_dask", + "is_native_duckdb", + "is_native_ibis", + "is_native_modin", + "is_native_pandas", + "is_native_pandas_like", + "is_native_polars", + "is_native_pyspark", + "is_native_pyspark_connect", + "is_native_spark_like", + "is_native_sqlframe", +] + + +# All dataframes supported by Narwhals have a +# `columns` property. Their similarities don't extend +# _that_ much further unfortunately... +class NativeFrame(Protocol): + @property + def columns(self) -> Any: ... + def join(self, *args: Any, **kwargs: Any) -> Any: ... + + +class NativeDataFrame(Sized, NativeFrame, Protocol): ... + + +class NativeLazyFrame(NativeFrame, Protocol): + def explain(self, *args: Any, **kwargs: Any) -> Any: ... + + +class NativeSeries(Sized, Iterable[Any], Protocol): + def filter(self, *args: Any, **kwargs: Any) -> Any: ... + + +class _BasePandasLike(Sized, Protocol): + index: Any + """`mypy` doesn't like the asymmetric `property` setter in `pandas`.""" + + def __getitem__(self, key: Any, /) -> Any: ... + def __mul__(self, other: float | Collection[float] | Self, /) -> Self: ... + def __floordiv__(self, other: float | Collection[float] | Self, /) -> Self: ... + @property + def loc(self) -> Any: ... + @property + def shape(self) -> tuple[int, ...]: ... + def set_axis(self, labels: Any, *, axis: Any = ..., copy: bool = ...) -> Self: ... + def copy(self, deep: bool = ...) -> Self: ... # noqa: FBT001 + def rename(self, *args: Any, **kwds: Any) -> Self | Incomplete: + """`mypy` & `pyright` disagree on overloads. + + `Incomplete` used to fix [more important issue](https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296139744). + """ + + +class _BasePandasLikeFrame(NativeDataFrame, _BasePandasLike, Protocol): ... + + +class _BasePandasLikeSeries(NativeSeries, _BasePandasLike, Protocol): + def where(self, cond: Any, other: Any = ..., /) -> Self | Incomplete: ... + + +class NativeDask(NativeLazyFrame, Protocol): + _partition_type: type[pd.DataFrame] + + +class _CuDFDataFrame(_BasePandasLikeFrame, Protocol): + def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ... + + +class _CuDFSeries(_BasePandasLikeSeries, Protocol): + def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ... + + +class NativeIbis(NativeFrame, Protocol): + def sql(self, *args: Any, **kwds: Any) -> Any: ... + def __pyarrow_result__(self, *args: Any, **kwds: Any) -> Any: ... + def __pandas_result__(self, *args: Any, **kwds: Any) -> Any: ... + def __polars_result__(self, *args: Any, **kwds: Any) -> Any: ... + + +class _ModinDataFrame(_BasePandasLikeFrame, Protocol): + _pandas_class: type[pd.DataFrame] + + +class _ModinSeries(_BasePandasLikeSeries, Protocol): + _pandas_class: type[pd.Series[Any]] + + +class _PySparkDataFrame(NativeLazyFrame, Protocol): + def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # noqa: N802 + + +NativePolars: TypeAlias = "pl.DataFrame | pl.LazyFrame | pl.Series" +NativeArrow: TypeAlias = "pa.Table | pa.ChunkedArray[Any]" +NativeDuckDB: TypeAlias = "duckdb.DuckDBPyRelation" +NativePandas: TypeAlias = "pd.DataFrame | pd.Series[Any]" +NativeModin: TypeAlias = "_ModinDataFrame | _ModinSeries" +NativeCuDF: TypeAlias = "_CuDFDataFrame | _CuDFSeries" +NativePandasLikeSeries: TypeAlias = "pd.Series[Any] | _CuDFSeries | _ModinSeries" +NativePandasLikeDataFrame: TypeAlias = "pd.DataFrame | _CuDFDataFrame | _ModinDataFrame" +NativePandasLike: TypeAlias = "NativePandasLikeDataFrame | NativePandasLikeSeries" +NativeSQLFrame: TypeAlias = "_BaseDataFrame[Any, Any, Any, Any, Any]" +NativePySpark: TypeAlias = _PySparkDataFrame +NativePySparkConnect: TypeAlias = _PySparkDataFrame +NativeSparkLike: TypeAlias = "NativeSQLFrame | NativePySpark | NativePySparkConnect" +NativeKnown: TypeAlias = "NativePolars | NativeArrow | NativePandasLike | NativeSparkLike | NativeDuckDB | NativeDask | NativeIbis" +NativeUnknown: TypeAlias = "NativeDataFrame | NativeSeries | NativeLazyFrame" +NativeAny: TypeAlias = "NativeKnown | NativeUnknown" + +IntoDataFrame: TypeAlias = NativeDataFrame +"""Anything which can be converted to a Narwhals DataFrame. + +Use this if your function accepts a narwhalifiable object but doesn't care about its backend. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]: + ... df = nw.from_native(df_native, eager_only=True) + ... return df.shape +""" + +IntoLazyFrame: TypeAlias = Union[NativeLazyFrame, NativeIbis] +IntoFrame: TypeAlias = Union[IntoDataFrame, IntoLazyFrame] +"""Anything which can be converted to a Narwhals DataFrame or LazyFrame. + +Use this if your function can accept an object which can be converted to either +`nw.DataFrame` or `nw.LazyFrame` and it doesn't care about its backend. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrame + >>> def agnostic_columns(df_native: IntoFrame) -> list[str]: + ... df = nw.from_native(df_native) + ... return df.collect_schema().names() +""" + +IntoSeries: TypeAlias = NativeSeries +"""Anything which can be converted to a Narwhals Series. + +Use this if your function can accept an object which can be converted to `nw.Series` +and it doesn't care about its backend. + +Examples: + >>> from typing import Any + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + >>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]: + ... s = nw.from_native(s_native) + ... return s.to_list() +""" + +IntoFrameT = TypeVar("IntoFrameT", bound=IntoFrame) +"""TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame. + +Use this if your function accepts an object which is convertible to `nw.DataFrame` +or `nw.LazyFrame` and returns an object of the same type. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns(c=nw.col("a") + 1).to_native() +""" + +IntoDataFrameT = TypeVar("IntoDataFrameT", bound=IntoDataFrame) +"""TypeVar bound to object convertible to Narwhals DataFrame. + +Use this if your function accepts an object which can be converted to `nw.DataFrame` +and returns an object of the same class. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrameT + >>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT: + ... df = nw.from_native(df_native, eager_only=True) + ... return df.with_columns(c=df["a"] + 1).to_native() +""" + +IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound=IntoLazyFrame) +IntoSeriesT = TypeVar("IntoSeriesT", bound=IntoSeries) +"""TypeVar bound to object convertible to Narwhals Series. + +Use this if your function accepts an object which can be converted to `nw.Series` +and returns an object of the same class. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.abs().to_native() +""" + + +def is_native_polars(obj: Any) -> TypeIs[NativePolars]: + return (pl := get_polars()) is not None and isinstance( + obj, (pl.DataFrame, pl.Series, pl.LazyFrame) + ) + + +def is_native_arrow(obj: Any) -> TypeIs[NativeArrow]: + return (pa := get_pyarrow()) is not None and isinstance( + obj, (pa.Table, pa.ChunkedArray) + ) + + +is_native_dask = cast("_Guard[NativeDask]", is_dask_dataframe) +is_native_duckdb: _Guard[NativeDuckDB] = is_duckdb_relation +is_native_sqlframe: _Guard[NativeSQLFrame] = is_sqlframe_dataframe +is_native_pyspark = cast("_Guard[NativePySpark]", is_pyspark_dataframe) +is_native_pyspark_connect = cast( + "_Guard[NativePySparkConnect]", is_pyspark_connect_dataframe +) +is_native_ibis = cast("_Guard[NativeIbis]", is_ibis_table) + + +def is_native_pandas(obj: Any) -> TypeIs[NativePandas]: + return (pd := get_pandas()) is not None and isinstance(obj, (pd.DataFrame, pd.Series)) + + +def is_native_modin(obj: Any) -> TypeIs[NativeModin]: + return (mpd := get_modin()) is not None and isinstance( + obj, (mpd.DataFrame, mpd.Series) + ) + + +def is_native_cudf(obj: Any) -> TypeIs[NativeCuDF]: + return (cudf := get_cudf()) is not None and isinstance( + obj, (cudf.DataFrame, cudf.Series) + ) # pragma: no cover + + +def is_native_pandas_like(obj: Any) -> TypeIs[NativePandasLike]: + return is_native_pandas(obj) or is_native_cudf(obj) or is_native_modin(obj) + + +def is_native_spark_like(obj: Any) -> TypeIs[NativeSparkLike]: + return ( + is_native_sqlframe(obj) + or is_native_pyspark(obj) + or is_native_pyspark_connect(obj) + ) diff --git a/narwhals/_pandas_like/typing.py b/narwhals/_pandas_like/typing.py index 054b011eac..55c644bb5e 100644 --- a/narwhals/_pandas_like/typing.py +++ b/narwhals/_pandas_like/typing.py @@ -10,12 +10,12 @@ import pandas as pd from typing_extensions import TypeAlias - from narwhals._namespace import ( + from narwhals._native import ( + NativePandasLikeDataFrame, _CuDFDataFrame, _CuDFSeries, _ModinDataFrame, _ModinSeries, - _NativePandasLikeDataFrame, ) from narwhals._pandas_like.expr import PandasLikeExpr from narwhals._pandas_like.series import PandasLikeSeries @@ -30,7 +30,7 @@ default="pd.Series[Any]", ) NativeDataFrameT = TypeVar( - "NativeDataFrameT", bound="_NativePandasLikeDataFrame", default="pd.DataFrame" + "NativeDataFrameT", bound="NativePandasLikeDataFrame", default="pd.DataFrame" ) NativeNDFrameT = TypeVar( "NativeNDFrameT", diff --git a/narwhals/_spark_like/dataframe.py b/narwhals/_spark_like/dataframe.py index 17d15c9f0e..4e4acb71b4 100644 --- a/narwhals/_spark_like/dataframe.py +++ b/narwhals/_spark_like/dataframe.py @@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any from narwhals._exceptions import issue_warning -from narwhals._namespace import is_native_spark_like +from narwhals._native import is_native_spark_like from narwhals._spark_like.utils import ( catch_pyspark_connect_exception, catch_pyspark_sql_exception, diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 65a2c183f1..e2ea550bea 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -81,20 +81,20 @@ NativeDataFrameT, NativeLazyFrameT, ) - from narwhals._namespace import ( - Namespace, - _NativeArrow, - _NativeCuDF, - _NativeDask, - _NativeDuckDB, - _NativeIbis, - _NativeModin, - _NativePandas, - _NativePandasLike, - _NativePolars, - _NativePySpark, - _NativePySparkConnect, - _NativeSQLFrame, + from narwhals._namespace import Namespace + from narwhals._native import ( + NativeArrow, + NativeCuDF, + NativeDask, + NativeDuckDB, + NativeIbis, + NativeModin, + NativePandas, + NativePandasLike, + NativePolars, + NativePySpark, + NativePySparkConnect, + NativeSQLFrame, ) from narwhals._translate import ArrowStreamExportable, IntoArrowTable, ToNarwhalsT_co from narwhals._typing import ( @@ -2051,36 +2051,36 @@ def __set_name__(self, owner: type[Any], name: str) -> None: self.__name__: str = name @overload - def __get__(self, instance: Narwhals[_NativePolars], owner: Any) -> _PolarsImpl: ... + def __get__(self, instance: Narwhals[NativePolars], owner: Any) -> _PolarsImpl: ... @overload - def __get__(self, instance: Narwhals[_NativePandas], owner: Any) -> _PandasImpl: ... + def __get__(self, instance: Narwhals[NativePandas], owner: Any) -> _PandasImpl: ... @overload - def __get__(self, instance: Narwhals[_NativeModin], owner: Any) -> _ModinImpl: ... + def __get__(self, instance: Narwhals[NativeModin], owner: Any) -> _ModinImpl: ... @overload - def __get__(self, instance: Narwhals[_NativeCuDF], owner: Any) -> _CuDFImpl: ... + def __get__(self, instance: Narwhals[NativeCuDF], owner: Any) -> _CuDFImpl: ... @overload def __get__( - self, instance: Narwhals[_NativePandasLike], owner: Any + self, instance: Narwhals[NativePandasLike], owner: Any ) -> _PandasLikeImpl: ... @overload - def __get__(self, instance: Narwhals[_NativeArrow], owner: Any) -> _ArrowImpl: ... + def __get__(self, instance: Narwhals[NativeArrow], owner: Any) -> _ArrowImpl: ... @overload def __get__( - self, instance: Narwhals[_NativePolars | _NativeArrow | _NativePandas], owner: Any + self, instance: Narwhals[NativePolars | NativeArrow | NativePandas], owner: Any ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... @overload - def __get__(self, instance: Narwhals[_NativeDuckDB], owner: Any) -> _DuckDBImpl: ... + def __get__(self, instance: Narwhals[NativeDuckDB], owner: Any) -> _DuckDBImpl: ... @overload def __get__( - self, instance: Narwhals[_NativeSQLFrame], owner: Any + self, instance: Narwhals[NativeSQLFrame], owner: Any ) -> _SQLFrameImpl: ... @overload - def __get__(self, instance: Narwhals[_NativeDask], owner: Any) -> _DaskImpl: ... + def __get__(self, instance: Narwhals[NativeDask], owner: Any) -> _DaskImpl: ... @overload - def __get__(self, instance: Narwhals[_NativeIbis], owner: Any) -> _IbisImpl: ... + def __get__(self, instance: Narwhals[NativeIbis], owner: Any) -> _IbisImpl: ... @overload def __get__( - self, instance: Narwhals[_NativePySpark | _NativePySparkConnect], owner: Any + self, instance: Narwhals[NativePySpark | NativePySparkConnect], owner: Any ) -> _PySparkImpl | _PySparkConnectImpl: ... # NOTE: https://docs.python.org/3/howto/descriptor.html#invocation-from-a-class @overload diff --git a/narwhals/functions.py b/narwhals/functions.py index 21d8b1b34a..6baef07688 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -42,6 +42,7 @@ from typing_extensions import TypeAlias, TypeIs from narwhals._compliant import CompliantExpr, CompliantNamespace + from narwhals._native import NativeDataFrame, NativeLazyFrame, NativeSeries from narwhals._translate import IntoArrowTable from narwhals._typing import Backend, EagerAllowed, IntoBackend from narwhals.dataframe import DataFrame, LazyFrame @@ -52,9 +53,6 @@ IntoDType, IntoExpr, IntoSchema, - NativeDataFrame, - NativeLazyFrame, - NativeSeries, NonNestedLiteral, _1DArray, _2DArray, diff --git a/narwhals/stable/v1/typing.py b/narwhals/stable/v1/typing.py index 7581910607..ab5cd17552 100644 --- a/narwhals/stable/v1/typing.py +++ b/narwhals/stable/v1/typing.py @@ -2,35 +2,13 @@ from typing import TYPE_CHECKING, Any, Protocol, TypeVar, Union -if TYPE_CHECKING: - import sys - from collections.abc import Iterable, Sized - - from narwhals.stable.v1 import DataFrame, LazyFrame - - if sys.version_info >= (3, 10): - from typing import TypeAlias - else: - from typing_extensions import TypeAlias - - from narwhals.stable.v1 import Expr, Series - - # All dataframes supported by Narwhals have a - # `columns` property. Their similarities don't extend - # _that_ much further unfortunately... - class NativeFrame(Protocol): - @property - def columns(self) -> Any: ... - - def join(self, *args: Any, **kwargs: Any) -> Any: ... +from narwhals._native import IntoSeries, IntoSeriesT - class NativeDataFrame(Sized, NativeFrame, Protocol): ... - - class NativeLazyFrame(NativeFrame, Protocol): - def explain(self, *args: Any, **kwargs: Any) -> Any: ... +if TYPE_CHECKING: + from typing_extensions import TypeAlias - class NativeSeries(Sized, Iterable[Any], Protocol): - def filter(self, *args: Any, **kwargs: Any) -> Any: ... + from narwhals._native import NativeDataFrame, NativeLazyFrame + from narwhals.stable.v1 import DataFrame, Expr, LazyFrame, Series class DataFrameLike(Protocol): def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... @@ -46,7 +24,6 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... `nw.Expr`, e.g. `df.select('a')`. """ - IntoDataFrame: TypeAlias = Union["NativeDataFrame", "DataFrameLike"] """Anything which can be converted to a Narwhals DataFrame. @@ -61,7 +38,6 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... """ IntoLazyFrame: TypeAlias = "NativeLazyFrame" - IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. @@ -90,21 +66,6 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... ... return df.columns """ -IntoSeries: TypeAlias = "NativeSeries" -"""Anything which can be converted to a Narwhals Series. - -Use this if your function can accept an object which can be converted to `nw.Series` -and it doesn't care about its backend. - -Examples: - >>> from typing import Any - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - >>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]: - ... s = nw.from_native(s_native) - ... return s.to_list() -""" - IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame") """TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame. @@ -134,7 +95,6 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... """ IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound="IntoLazyFrame") - FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]") """TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame. @@ -163,20 +123,6 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... ... return df.with_columns(c=df["a"] + 1) """ -IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries") -"""TypeVar bound to object convertible to Narwhals Series. - -Use this if your function accepts an object which can be converted to `nw.Series` -and returns an object of the same class. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.abs().to_native() -""" - __all__ = [ "DataFrameT", diff --git a/narwhals/stable/v2/typing.py b/narwhals/stable/v2/typing.py index ac45701fdd..ca27111410 100644 --- a/narwhals/stable/v2/typing.py +++ b/narwhals/stable/v2/typing.py @@ -1,36 +1,22 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Protocol, TypeVar, Union +from typing import TYPE_CHECKING, Any, TypeVar, Union + +from narwhals._native import ( + IntoDataFrame, + IntoDataFrameT, + IntoFrame, + IntoFrameT, + IntoLazyFrame, + IntoLazyFrameT, + IntoSeries, + IntoSeriesT, +) if TYPE_CHECKING: - import sys - from collections.abc import Iterable, Sized + from typing_extensions import TypeAlias - from narwhals.stable.v2 import DataFrame, LazyFrame - - if sys.version_info >= (3, 10): - from typing import TypeAlias - else: - from typing_extensions import TypeAlias - - from narwhals.stable.v2 import Expr, Series - - # All dataframes supported by Narwhals have a - # `columns` property. Their similarities don't extend - # _that_ much further unfortunately... - class NativeFrame(Protocol): - @property - def columns(self) -> Any: ... - - def join(self, *args: Any, **kwargs: Any) -> Any: ... - - class NativeDataFrame(Sized, NativeFrame, Protocol): ... - - class NativeLazyFrame(NativeFrame, Protocol): - def explain(self, *args: Any, **kwargs: Any) -> Any: ... - - class NativeSeries(Sized, Iterable[Any], Protocol): - def filter(self, *args: Any, **kwargs: Any) -> Any: ... + from narwhals.stable.v2 import DataFrame, Expr, LazyFrame, Series IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"] @@ -43,35 +29,6 @@ def filter(self, *args: Any, **kwargs: Any) -> Any: ... `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = "NativeDataFrame" -"""Anything which can be converted to a Narwhals DataFrame. - -Use this if your function accepts a narwhalifiable object but doesn't care about its backend. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.shape -""" - -IntoLazyFrame: TypeAlias = "NativeLazyFrame" - -IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] -"""Anything which can be converted to a Narwhals DataFrame or LazyFrame. - -Use this if your function can accept an object which can be converted to either -`nw.DataFrame` or `nw.LazyFrame` and it doesn't care about its backend. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> def agnostic_columns(df_native: IntoFrame) -> list[str]: - ... df = nw.from_native(df_native) - ... return df.collect_schema().names() -""" - Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"] """Narwhals DataFrame or Narwhals LazyFrame. @@ -86,49 +43,6 @@ def filter(self, *args: Any, **kwargs: Any) -> Any: ... ... return df.columns """ -IntoSeries: TypeAlias = "NativeSeries" -"""Anything which can be converted to a Narwhals Series. - -Use this if your function can accept an object which can be converted to `nw.Series` -and it doesn't care about its backend. - -Examples: - >>> from typing import Any - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - >>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]: - ... s = nw.from_native(s_native) - ... return s.to_list() -""" - -IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame") -"""TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame. - -Use this if your function accepts an object which is convertible to `nw.DataFrame` -or `nw.LazyFrame` and returns an object of the same type. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(c=nw.col("a") + 1).to_native() -""" - -IntoDataFrameT = TypeVar("IntoDataFrameT", bound="IntoDataFrame") -"""TypeVar bound to object convertible to Narwhals DataFrame. - -Use this if your function accepts an object which can be converted to `nw.DataFrame` -and returns an object of the same class. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrameT - >>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.with_columns(c=df["a"] + 1).to_native() -""" - FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]") """TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame. @@ -157,20 +71,6 @@ def filter(self, *args: Any, **kwargs: Any) -> Any: ... ... return df.with_columns(c=df["a"] + 1) """ -IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries") -"""TypeVar bound to object convertible to Narwhals Series. - -Use this if your function accepts an object which can be converted to `nw.Series` -and returns an object of the same class. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.abs().to_native() -""" - __all__ = [ "DataFrameT", @@ -181,6 +81,8 @@ def filter(self, *args: Any, **kwargs: Any) -> Any: ... "IntoExpr", "IntoFrame", "IntoFrameT", + "IntoLazyFrame", + "IntoLazyFrameT", "IntoSeries", "IntoSeriesT", ] diff --git a/narwhals/translate.py b/narwhals/translate.py index dd9d931f21..1886ef2843 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, overload from narwhals._constants import EPOCH, MS_PER_SECOND -from narwhals._namespace import ( +from narwhals._native import ( is_native_arrow, is_native_pandas_like, is_native_polars, diff --git a/narwhals/typing.py b/narwhals/typing.py index 356af6e66f..2425d01ebe 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -4,12 +4,22 @@ from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar, Union from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame, CompliantSeries +from narwhals._native import ( + IntoDataFrame, + IntoDataFrameT, + IntoFrame, + IntoFrameT, + IntoLazyFrame, + IntoLazyFrameT, + IntoSeries, + IntoSeriesT, +) from narwhals._typing import Backend, EagerAllowed, IntoBackend, LazyAllowed if TYPE_CHECKING: import datetime as dt import os - from collections.abc import Iterable, Sequence, Sized + from collections.abc import Sequence from decimal import Decimal from types import ModuleType @@ -20,29 +30,11 @@ from typing_extensions import TypeAlias from narwhals import dtypes - from narwhals._namespace import _NativeIbis from narwhals.dataframe import DataFrame, LazyFrame from narwhals.expr import Expr from narwhals.schema import Schema from narwhals.series import Series - # All dataframes supported by Narwhals have a - # `columns` property. Their similarities don't extend - # _that_ much further unfortunately... - class NativeFrame(Protocol): - @property - def columns(self) -> Any: ... - - def join(self, *args: Any, **kwargs: Any) -> Any: ... - - class NativeDataFrame(Sized, NativeFrame, Protocol): ... - - class NativeLazyFrame(NativeFrame, Protocol): - def explain(self, *args: Any, **kwargs: Any) -> Any: ... - - class NativeSeries(Sized, Iterable[Any], Protocol): - def filter(self, *args: Any, **kwargs: Any) -> Any: ... - class SupportsNativeNamespace(Protocol): def __native_namespace__(self) -> ModuleType: ... @@ -115,35 +107,6 @@ def Binary(self) -> type[dtypes.Binary]: ... which will be interpreted as a `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = "NativeDataFrame" -"""Anything which can be converted to a Narwhals DataFrame. - -Use this if your function accepts a narwhalifiable object but doesn't care about its backend. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.shape -""" - -IntoLazyFrame: TypeAlias = Union["NativeLazyFrame", "_NativeIbis"] - -IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] -"""Anything which can be converted to a Narwhals DataFrame or LazyFrame. - -Use this if your function can accept an object which can be converted to either -`nw.DataFrame` or `nw.LazyFrame` and it doesn't care about its backend. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> def agnostic_columns(df_native: IntoFrame) -> list[str]: - ... df = nw.from_native(df_native) - ... return df.collect_schema().names() -""" - Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"] """Narwhals DataFrame or Narwhals LazyFrame. @@ -158,51 +121,6 @@ def Binary(self) -> type[dtypes.Binary]: ... ... return df.columns """ -IntoSeries: TypeAlias = "NativeSeries" -"""Anything which can be converted to a Narwhals Series. - -Use this if your function can accept an object which can be converted to `nw.Series` -and it doesn't care about its backend. - -Examples: - >>> from typing import Any - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - >>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]: - ... s = nw.from_native(s_native) - ... return s.to_list() -""" - -IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame") -"""TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame. - -Use this if your function accepts an object which is convertible to `nw.DataFrame` -or `nw.LazyFrame` and returns an object of the same type. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(c=nw.col("a") + 1).to_native() -""" - -IntoDataFrameT = TypeVar("IntoDataFrameT", bound="IntoDataFrame") -"""TypeVar bound to object convertible to Narwhals DataFrame. - -Use this if your function accepts an object which can be converted to `nw.DataFrame` -and returns an object of the same class. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrameT - >>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.with_columns(c=df["a"] + 1).to_native() -""" - -IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound="IntoLazyFrame") - FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]") """TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame. @@ -233,21 +151,6 @@ def Binary(self) -> type[dtypes.Binary]: ... LazyFrameT = TypeVar("LazyFrameT", bound="LazyFrame[Any]") SeriesT = TypeVar("SeriesT", bound="Series[Any]") - -IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries") -"""TypeVar bound to object convertible to Narwhals Series. - -Use this if your function accepts an object which can be converted to `nw.Series` -and returns an object of the same class. - -Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.abs().to_native() -""" - DTypeBackend: TypeAlias = 'Literal["pyarrow", "numpy_nullable"] | None' SizeUnit: TypeAlias = Literal[ "b", @@ -478,6 +381,8 @@ def Binary(self) -> type[dtypes.Binary]: ... "IntoExpr", "IntoFrame", "IntoFrameT", + "IntoLazyFrame", + "IntoLazyFrameT", "IntoSeries", "IntoSeriesT", "LazyAllowed", diff --git a/tests/conftest.py b/tests/conftest.py index fd4a2d0000..650b8d4a3a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,18 +16,16 @@ if TYPE_CHECKING: from collections.abc import Sequence - import duckdb import ibis import pandas as pd import polars as pl import pyarrow as pa from ibis.backends.duckdb import Backend as IbisDuckDBBackend - from pyspark.sql import DataFrame as PySparkDataFrame from typing_extensions import TypeAlias - from narwhals._spark_like.dataframe import SQLFrameDataFrame + from narwhals._native import NativeDask, NativeDuckDB, NativePySpark, NativeSQLFrame from narwhals._typing import EagerAllowed - from narwhals.typing import NativeDataFrame, NativeLazyFrame, NonNestedDType + from narwhals.typing import IntoDataFrame, NonNestedDType from tests.utils import ( Constructor, ConstructorEager, @@ -108,27 +106,27 @@ def pandas_pyarrow_constructor(obj: Data) -> pd.DataFrame: return pd.DataFrame(obj).convert_dtypes(dtype_backend="pyarrow") -def modin_constructor(obj: Data) -> NativeDataFrame: # pragma: no cover +def modin_constructor(obj: Data) -> IntoDataFrame: # pragma: no cover import modin.pandas as mpd import pandas as pd df = mpd.DataFrame(pd.DataFrame(obj)) - return cast("NativeDataFrame", df) + return cast("IntoDataFrame", df) -def modin_pyarrow_constructor(obj: Data) -> NativeDataFrame: # pragma: no cover +def modin_pyarrow_constructor(obj: Data) -> IntoDataFrame: # pragma: no cover import modin.pandas as mpd import pandas as pd df = mpd.DataFrame(pd.DataFrame(obj)).convert_dtypes(dtype_backend="pyarrow") - return cast("NativeDataFrame", df) + return cast("IntoDataFrame", df) -def cudf_constructor(obj: Data) -> NativeDataFrame: # pragma: no cover +def cudf_constructor(obj: Data) -> IntoDataFrame: # pragma: no cover import cudf df = cudf.DataFrame(obj) - return cast("NativeDataFrame", df) + return cast("IntoDataFrame", df) def polars_eager_constructor(obj: Data) -> pl.DataFrame: @@ -143,7 +141,7 @@ def polars_lazy_constructor(obj: Data) -> pl.LazyFrame: return pl.LazyFrame(obj) -def duckdb_lazy_constructor(obj: Data) -> duckdb.DuckDBPyRelation: +def duckdb_lazy_constructor(obj: Data) -> NativeDuckDB: pytest.importorskip("duckdb") pytest.importorskip("pyarrow") import duckdb @@ -155,16 +153,16 @@ def duckdb_lazy_constructor(obj: Data) -> duckdb.DuckDBPyRelation: return duckdb.table("_df") -def dask_lazy_p1_constructor(obj: Data) -> NativeLazyFrame: # pragma: no cover +def dask_lazy_p1_constructor(obj: Data) -> NativeDask: # pragma: no cover import dask.dataframe as dd - return cast("NativeLazyFrame", dd.from_dict(obj, npartitions=1)) + return cast("NativeDask", dd.from_dict(obj, npartitions=1)) -def dask_lazy_p2_constructor(obj: Data) -> NativeLazyFrame: # pragma: no cover +def dask_lazy_p2_constructor(obj: Data) -> NativeDask: # pragma: no cover import dask.dataframe as dd - return cast("NativeLazyFrame", dd.from_dict(obj, npartitions=2)) + return cast("NativeDask", dd.from_dict(obj, npartitions=2)) def pyarrow_table_constructor(obj: dict[str, Any]) -> pa.Table: @@ -174,7 +172,7 @@ def pyarrow_table_constructor(obj: dict[str, Any]) -> pa.Table: return pa.table(obj) -def pyspark_lazy_constructor() -> Callable[[Data], PySparkDataFrame]: # pragma: no cover +def pyspark_lazy_constructor() -> Callable[[Data], NativePySpark]: # pragma: no cover pytest.importorskip("pyspark") import warnings from atexit import register @@ -189,22 +187,22 @@ def pyspark_lazy_constructor() -> Callable[[Data], PySparkDataFrame]: # pragma: register(session.stop) - def _constructor(obj: Data) -> PySparkDataFrame: + def _constructor(obj: Data) -> NativePySpark: _obj = deepcopy(obj) index_col_name = generate_temporary_column_name(n_bytes=8, columns=list(_obj)) _obj[index_col_name] = list(range(len(_obj[next(iter(_obj))]))) - - return ( + result = ( session.createDataFrame([*zip(*_obj.values())], schema=[*_obj.keys()]) .repartition(2) .orderBy(index_col_name) .drop(index_col_name) ) + return cast("NativePySpark", result) return _constructor -def sqlframe_pyspark_lazy_constructor(obj: Data) -> SQLFrameDataFrame: # pragma: no cover +def sqlframe_pyspark_lazy_constructor(obj: Data) -> NativeSQLFrame: # pragma: no cover pytest.importorskip("sqlframe") pytest.importorskip("duckdb") session = sqlframe_session() diff --git a/tests/expr_and_series/cast_test.py b/tests/expr_and_series/cast_test.py index 47d8ea0818..9121c3dc4c 100644 --- a/tests/expr_and_series/cast_test.py +++ b/tests/expr_and_series/cast_test.py @@ -18,7 +18,7 @@ ) if TYPE_CHECKING: - from narwhals.typing import NativeLazyFrame + from narwhals._native import NativeSQLFrame DATA = { "a": [1], @@ -283,12 +283,12 @@ def test_cast_struct(request: pytest.FixtureRequest, constructor: Constructor) - if "spark" in str(constructor): # pragma: no cover # Special handling for pyspark as it natively maps the input to # a column of type MAP - native_ldf = cast("NativeLazyFrame", native_df) + native_ldf = cast("NativeSQLFrame", native_df) _tmp_nw_compliant_frame = nw.from_native(native_ldf)._compliant_frame F = _tmp_nw_compliant_frame._F # type: ignore[attr-defined] T = _tmp_nw_compliant_frame._native_dtypes # type: ignore[attr-defined] # noqa: N806 - native_ldf = native_ldf.withColumn( # type: ignore[attr-defined] + native_ldf = native_ldf.withColumn( "a", F.struct( F.col("a.movie ").cast(T.StringType()).alias("movie "), diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index 46a869eab9..e2189da202 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -17,11 +17,11 @@ ) if TYPE_CHECKING: - from narwhals.typing import IntoLazyFrameT, JoinStrategy, NativeDataFrame + from narwhals.typing import IntoDataFrame, IntoLazyFrameT, JoinStrategy def from_native_lazy( - native: IntoLazyFrameT | NativeDataFrame, + native: IntoLazyFrameT | IntoDataFrame, ) -> nw.LazyFrame[IntoLazyFrameT] | nw.LazyFrame[Any]: """Every join test [needs to use `.lazy()` for typing]*. diff --git a/tests/utils.py b/tests/utils.py index deaafb4a37..1c0a24a8e1 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -22,7 +22,8 @@ from sqlframe.duckdb import DuckDBSession from typing_extensions import TypeAlias - from narwhals.typing import Frame, NativeDataFrame, NativeLazyFrame, TimeUnit + from narwhals._native import NativeLazyFrame + from narwhals.typing import Frame, IntoDataFrame, TimeUnit def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: @@ -42,8 +43,8 @@ def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: PYSPARK_VERSION: tuple[int, ...] = get_module_version_as_tuple("pyspark") CUDF_VERSION: tuple[int, ...] = get_module_version_as_tuple("cudf") -Constructor: TypeAlias = Callable[[Any], "NativeLazyFrame | NativeDataFrame"] -ConstructorEager: TypeAlias = Callable[[Any], "NativeDataFrame"] +Constructor: TypeAlias = Callable[[Any], "NativeLazyFrame | IntoDataFrame"] +ConstructorEager: TypeAlias = Callable[[Any], "IntoDataFrame"] ConstructorLazy: TypeAlias = Callable[[Any], "NativeLazyFrame"] ConstructorPandasLike: TypeAlias = Callable[[Any], "pd.DataFrame"]