diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index 3e05ab0431..7745a9e7d4 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -845,3 +845,5 @@ def unpivot( ) # TODO(Unassigned): Even with promote_options="permissive", pyarrow does not # upcast numeric to non-numeric (e.g. string) datatypes + + pivot = not_implemented() diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index e22cd60e35..08ac415380 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -37,12 +37,14 @@ from typing_extensions import TypeAlias from narwhals._compliant.group_by import CompliantGroupBy + from narwhals._compliant.group_by import DataFrameGroupBy from narwhals._translate import IntoArrowTable from narwhals.dtypes import DType from narwhals.schema import Schema from narwhals.typing import AsofJoinStrategy from narwhals.typing import JoinStrategy from narwhals.typing import LazyUniqueKeepStrategy + from narwhals.typing import PivotAgg from narwhals.typing import SizeUnit from narwhals.typing import UniqueKeepStrategy from narwhals.typing import _2DArray @@ -136,7 +138,7 @@ def gather_every(self, n: int, offset: int) -> Self: ... def get_column(self, name: str) -> CompliantSeriesT: ... def group_by( self, *keys: str, drop_null_keys: bool - ) -> CompliantGroupBy[Self, Any]: ... + ) -> DataFrameGroupBy[Self, Any]: ... def head(self, n: int) -> Self: ... def item(self, row: int | None, column: int | str | None) -> Any: ... def iter_columns(self) -> Iterator[CompliantSeriesT]: ... @@ -165,6 +167,16 @@ def join_asof( suffix: str, ) -> Self: ... def lazy(self, *, backend: Implementation | None) -> CompliantLazyFrame[Any, Any]: ... + def pivot( + self, + on: Sequence[str], + *, + index: Sequence[str] | None, + values: Sequence[str] | None, + aggregate_function: PivotAgg | None, + sort_columns: bool, + separator: str, + ) -> Self: ... def rename(self, mapping: Mapping[str, str]) -> Self: ... def row(self, index: int) -> tuple[Any, ...]: ... def rows( diff --git a/narwhals/_compliant/group_by.py b/narwhals/_compliant/group_by.py index 477f0d0904..37d37fb259 100644 --- a/narwhals/_compliant/group_by.py +++ b/narwhals/_compliant/group_by.py @@ -76,6 +76,13 @@ def __init__( def agg(self, *exprs: CompliantExprT_contra) -> CompliantFrameT_co: ... +class DataFrameGroupBy( + CompliantGroupBy[CompliantDataFrameT_co, CompliantExprT_contra], + Protocol38[CompliantDataFrameT_co, CompliantExprT_contra], +): + def __iter__(self) -> Iterator[tuple[Any, CompliantDataFrameT_co]]: ... + + class DepthTrackingGroupBy( CompliantGroupBy[CompliantFrameT_co, DepthTrackingExprT_contra], Protocol38[CompliantFrameT_co, DepthTrackingExprT_contra, NativeAggregationT_co], @@ -132,9 +139,9 @@ def _leaf_name(cls, expr: DepthTrackingExprAny, /) -> NarwhalsAggregation | Any: class EagerGroupBy( DepthTrackingGroupBy[CompliantDataFrameT_co, EagerExprT_contra, str], + DataFrameGroupBy[CompliantDataFrameT_co, EagerExprT_contra], Protocol38[CompliantDataFrameT_co, EagerExprT_contra], -): - def __iter__(self) -> Iterator[tuple[Any, CompliantDataFrameT_co]]: ... +): ... class LazyGroupBy( diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 5e0da4beac..4d4493e2bb 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -65,6 +65,7 @@ from narwhals.typing import CompliantLazyFrame from narwhals.typing import DTypeBackend from narwhals.typing import JoinStrategy + from narwhals.typing import PivotAgg from narwhals.typing import SizeUnit from narwhals.typing import UniqueKeepStrategy from narwhals.typing import _1DArray @@ -1017,12 +1018,12 @@ def gather_every(self: Self, n: int, offset: int) -> Self: return self._with_native(self.native.iloc[offset::n], validate_column_names=False) def pivot( - self: Self, - on: list[str], + self, + on: Sequence[str], *, - index: list[str] | None, - values: list[str] | None, - aggregate_function: Any | None, + index: Sequence[str] | None, + values: Sequence[str] | None, + aggregate_function: PivotAgg | None, sort_columns: bool, separator: str, ) -> Self: diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index aea4a34a3a..e73369d1cf 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -5,6 +5,7 @@ from contextlib import suppress from typing import TYPE_CHECKING from typing import Any +from typing import Sequence from typing import Sized from typing import TypeVar from typing import cast @@ -652,9 +653,9 @@ def select_columns_by_name( def pivot_table( df: PandasLikeDataFrame, - values: list[str], - index: list[str], - columns: list[str], + values: Sequence[str], + index: Sequence[str], + columns: Sequence[str], aggregate_function: str | None, ) -> Any: dtypes = import_dtypes_module(df._version) diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index a6dae67223..6258e4218d 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -44,6 +44,7 @@ from narwhals.typing import CompliantDataFrame from narwhals.typing import CompliantLazyFrame from narwhals.typing import JoinStrategy + from narwhals.typing import PivotAgg from narwhals.typing import _2DArray from narwhals.utils import Version from narwhals.utils import _FullContext @@ -77,7 +78,6 @@ class PolarsDataFrame: select: Method[Self] sort: Method[Self] to_arrow: Method[pa.Table] - to_numpy: Method[_2DArray] to_pandas: Method[pd.DataFrame] unique: Method[Self] with_columns: Method[Self] @@ -232,6 +232,9 @@ def __array__( return self.native.__array__(dtype) return self.native.__array__(dtype) + def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _2DArray: + return self.native.to_numpy() + def collect_schema(self: Self) -> dict[str, DType]: if self._backend_version < (1,): return { @@ -412,15 +415,12 @@ def unpivot( ) def pivot( - self: Self, - on: list[str], + self, + on: Sequence[str], *, - index: list[str] | None, - values: list[str] | None, - aggregate_function: Literal[ - "min", "max", "first", "last", "sum", "mean", "median", "len" - ] - | None, + index: Sequence[str] | None, + values: Sequence[str] | None, + aggregate_function: PivotAgg | None, sort_columns: bool, separator: str, ) -> Self: diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index aea25f0753..f67572e1ae 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -33,11 +33,13 @@ from narwhals.utils import find_stacklevel from narwhals.utils import flatten from narwhals.utils import generate_repr +from narwhals.utils import is_compliant_dataframe from narwhals.utils import is_compliant_lazyframe from narwhals.utils import is_list_of from narwhals.utils import is_sequence_but_not_str from narwhals.utils import issue_deprecation_warning from narwhals.utils import parse_version +from narwhals.utils import supports_arrow_c_stream if TYPE_CHECKING: from io import BytesIO @@ -51,6 +53,8 @@ from typing_extensions import ParamSpec from typing_extensions import Self + from narwhals._compliant import CompliantDataFrame + from narwhals._compliant import CompliantLazyFrame from narwhals._compliant import IntoCompliantExpr from narwhals._compliant.typing import EagerNamespaceAny from narwhals.group_by import GroupBy @@ -443,14 +447,13 @@ def _lazyframe(self: Self) -> type[LazyFrame[Any]]: return LazyFrame def __init__( - self: Self, - df: Any, - *, - level: Literal["full", "lazy", "interchange"], + self: Self, df: Any, *, level: Literal["full", "lazy", "interchange"] ) -> None: self._level: Literal["full", "lazy", "interchange"] = level - if hasattr(df, "__narwhals_dataframe__"): - self._compliant_frame: Any = df.__narwhals_dataframe__() + # NOTE: Interchange support (`DataFrameLike`) is the source of the error + self._compliant_frame: CompliantDataFrame[Any, Any, DataFrameT] # type: ignore[type-var] + if is_compliant_dataframe(df): + self._compliant_frame = df.__narwhals_dataframe__() else: # pragma: no cover msg = f"Expected an object which implements `__narwhals_dataframe__`, got: {type(df)}" raise AssertionError(msg) @@ -479,13 +482,13 @@ def implementation(self: Self) -> Implementation: >>> df.implementation.is_polars() False """ - return self._compliant_frame._implementation # type: ignore[no-any-return] + return self._compliant_frame._implementation def __len__(self: Self) -> int: - return self._compliant_frame.__len__() # type: ignore[no-any-return] + return self._compliant_frame.__len__() def __array__(self: Self, dtype: Any = None, copy: bool | None = None) -> _2DArray: # noqa: FBT001 - return self._compliant_frame.__array__(dtype, copy=copy) # type: ignore[no-any-return] + return self._compliant_frame.__array__(dtype, copy=copy) def __repr__(self: Self) -> str: # pragma: no cover return generate_repr("Narwhals DataFrame", self.to_native().__repr__()) @@ -500,7 +503,7 @@ def __arrow_c_stream__(self: Self, requested_schema: object | None = None) -> ob for more. """ native_frame = self._compliant_frame._native_frame - if hasattr(native_frame, "__arrow_c_stream__"): + if supports_arrow_c_stream(native_frame): return native_frame.__arrow_c_stream__(requested_schema=requested_schema) try: import pyarrow as pa # ignore-banned-import @@ -589,8 +592,7 @@ def lazy( ) raise ValueError(msg) return self._lazyframe( - self._compliant_frame.lazy(backend=lazy_backend), - level="lazy", + self._compliant_frame.lazy(backend=lazy_backend), level="lazy" ) def to_native(self: Self) -> DataFrameT: @@ -614,7 +616,7 @@ def to_native(self: Self) -> DataFrameT: 1 2 7.0 b 2 3 8.0 c """ - return self._compliant_frame._native_frame # type: ignore[no-any-return] + return self._compliant_frame._native_frame def to_pandas(self: Self) -> pd.DataFrame: """Convert this DataFrame to a pandas DataFrame. @@ -635,7 +637,7 @@ def to_pandas(self: Self) -> pd.DataFrame: 1 2 7.0 b 2 3 8.0 c """ - return self._compliant_frame.to_pandas() # type: ignore[no-any-return] + return self._compliant_frame.to_pandas() def to_polars(self: Self) -> pl.DataFrame: """Convert this DataFrame to a polars DataFrame. @@ -659,7 +661,7 @@ def to_polars(self: Self) -> pl.DataFrame: │ 2 ┆ 7.0 │ └─────┴─────┘ """ - return self._compliant_frame.to_polars() # type: ignore[no-any-return] + return self._compliant_frame.to_polars() @overload def write_csv(self: Self, file: None = None) -> str: ... @@ -690,7 +692,7 @@ def write_csv(self: Self, file: str | Path | BytesIO | None = None) -> str | Non If we had passed a file name to `write_csv`, it would have been written to that file. """ - return self._compliant_frame.write_csv(file) # type: ignore[no-any-return] + return self._compliant_frame.write_csv(file) def write_parquet(self: Self, file: str | Path | BytesIO) -> None: """Write dataframe to parquet file. @@ -726,7 +728,7 @@ def to_numpy(self: Self) -> _2DArray: array([[1. , 6.5], [2. , 7. ]]) """ - return self._compliant_frame.to_numpy() # type: ignore[no-any-return] + return self._compliant_frame.to_numpy(None, copy=None) @property def shape(self: Self) -> tuple[int, int]: @@ -743,7 +745,7 @@ def shape(self: Self) -> tuple[int, int]: >>> df.shape (2, 1) """ - return self._compliant_frame.shape # type: ignore[no-any-return] + return self._compliant_frame.shape def get_column(self: Self, name: str) -> Series[Any]: """Get a single column by name. @@ -771,10 +773,7 @@ def get_column(self: Self, name: str) -> Series[Any]: 1 2 Name: a, dtype: int64 """ - return self._series( - self._compliant_frame.get_column(name), - level=self._level, - ) + return self._series(self._compliant_frame.get_column(name), level=self._level) def estimated_size(self: Self, unit: SizeUnit = "b") -> int | float: """Return an estimation of the total (heap) allocated size of the `DataFrame`. @@ -796,7 +795,7 @@ def estimated_size(self: Self, unit: SizeUnit = "b") -> int | float: >>> df.estimated_size() 32 """ - return self._compliant_frame.estimated_size(unit=unit) # type: ignore[no-any-return] + return self._compliant_frame.estimated_size(unit=unit) @overload def __getitem__( # type: ignore[overload-overlap] @@ -952,15 +951,12 @@ def to_dict( """ if as_series: return { - key: self._series( - value, - level=self._level, - ) + key: self._series(value, level=self._level) for key, value in self._compliant_frame.to_dict( as_series=as_series ).items() } - return self._compliant_frame.to_dict(as_series=as_series) # type: ignore[no-any-return] + return self._compliant_frame.to_dict(as_series=as_series) def row(self: Self, index: int) -> tuple[Any, ...]: """Get values at given row. @@ -986,7 +982,7 @@ def row(self: Self, index: int) -> tuple[Any, ...]: >>> nw.from_native(df_native).row(1) (, ) """ - return self._compliant_frame.row(index) # type: ignore[no-any-return] + return self._compliant_frame.row(index) # inherited def pipe( @@ -1152,7 +1148,7 @@ def rows( >>> nw.from_native(df_native).rows() [(1, 6.0), (2, 7.0)] """ - return self._compliant_frame.rows(named=named) # type: ignore[no-any-return] + return self._compliant_frame.rows(named=named) # type: ignore[return-value] def iter_columns(self: Self) -> Iterator[Series[Any]]: """Returns an iterator over the columns of this DataFrame. @@ -1229,7 +1225,7 @@ def iter_rows( >>> next(iter_rows) (2, 7.0) """ - return self._compliant_frame.iter_rows(named=named, buffer_size=buffer_size) # type: ignore[no-any-return] + return self._compliant_frame.iter_rows(named=named, buffer_size=buffer_size) # type: ignore[return-value] def with_columns( self: Self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr @@ -1436,9 +1432,7 @@ def unique( if isinstance(subset, str): subset = [subset] return self._with_compliant( - self._compliant_frame.unique( - subset=subset, keep=keep, maintain_order=maintain_order - ) + self._compliant_frame.unique(subset, keep=keep, maintain_order=maintain_order) ) def filter( @@ -1794,10 +1788,7 @@ def is_unique(self: Self) -> Series[Any]: | dtype: bool | └───────────────┘ """ - return self._series( - self._compliant_frame.is_unique(), - level=self._level, - ) + return self._series(self._compliant_frame.is_unique(), level=self._level) def null_count(self: Self) -> Self: r"""Create a new DataFrame that shows the null counts per column. @@ -1987,7 +1978,7 @@ def to_arrow(self: Self) -> pa.Table: foo: [[1,null]] bar: [[2,3]] """ - return self._compliant_frame.to_arrow() # type: ignore[no-any-return] + return self._compliant_frame.to_arrow() def sample( self: Self, @@ -2185,15 +2176,11 @@ def _dataframe(self: Self) -> type[DataFrame[Any]]: return DataFrame def __init__( - self: Self, - df: Any, - *, - level: Literal["full", "lazy", "interchange"], + self: Self, df: Any, *, level: Literal["full", "lazy", "interchange"] ) -> None: self._level = level + self._compliant_frame: CompliantLazyFrame[Any, FrameT] # type: ignore[type-var] if is_compliant_lazyframe(df): - # NOTE: Blocked by (#2239) - # self._compliant_frame: CompliantLazyFrame[Any, FrameT] = df.__narwhals_lazyframe__() # noqa: ERA001 self._compliant_frame = df.__narwhals_lazyframe__() else: # pragma: no cover msg = f"Expected Polars LazyFrame or an object that implements `__narwhals_lazyframe__`, got: {type(df)}" @@ -2219,7 +2206,7 @@ def implementation(self: Self) -> Implementation: >>> nw.from_native(lf_native).implementation """ - return self._compliant_frame._implementation # type: ignore[no-any-return] + return self._compliant_frame._implementation def __getitem__(self: Self, item: str | slice) -> NoReturn: msg = "Slicing is not supported on LazyFrame" diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py index e32e544abc..1f49ef9642 100644 --- a/narwhals/dependencies.py +++ b/narwhals/dependencies.py @@ -26,7 +26,6 @@ from narwhals.dataframe import DataFrame from narwhals.dataframe import LazyFrame from narwhals.series import Series - from narwhals.typing import DataFrameT from narwhals.typing import FrameT from narwhals.typing import IntoDataFrameT from narwhals.typing import IntoSeriesT @@ -364,8 +363,8 @@ def is_into_dataframe(native_dataframe: Any | IntoDataFrameT) -> TypeIs[IntoData def is_narwhals_dataframe( - df: Any | DataFrame[DataFrameT], -) -> TypeIs[DataFrame[DataFrameT]]: + df: DataFrame[IntoDataFrameT] | Any, +) -> TypeIs[DataFrame[IntoDataFrameT]]: """Check whether `df` is a Narwhals DataFrame. This is useful if you expect a user to pass in a Narwhals diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index ea310d8aa4..4743bc5ab5 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -101,6 +101,8 @@ from narwhals.typing import _2DArray FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]") + DataFrameT = TypeVar("DataFrameT", bound="DataFrame[Any]") + LazyFrameT = TypeVar("LazyFrameT", bound="LazyFrame[Any]") SeriesT = TypeVar("SeriesT", bound="Series[Any]") IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries", default=Any) T = TypeVar("T", default=Any) @@ -651,6 +653,20 @@ def _stableify( def from_native(native_object: SeriesT, **kwds: Any) -> SeriesT: ... +@overload +def from_native(native_object: DataFrameT, **kwds: Any) -> DataFrameT: ... + + +@overload +def from_native(native_object: LazyFrameT, **kwds: Any) -> LazyFrameT: ... + + +@overload +def from_native( + native_object: DataFrameT | LazyFrameT, **kwds: Any +) -> DataFrameT | LazyFrameT: ... + + @overload def from_native( native_object: IntoDataFrameT | IntoSeriesT, diff --git a/narwhals/translate.py b/narwhals/translate.py index 4c7ca1193e..89544cbf32 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -44,12 +44,14 @@ from narwhals.dataframe import DataFrame from narwhals.dataframe import LazyFrame from narwhals.series import Series + from narwhals.typing import DataFrameT from narwhals.typing import IntoDataFrameT from narwhals.typing import IntoFrame from narwhals.typing import IntoFrameT from narwhals.typing import IntoLazyFrameT from narwhals.typing import IntoSeries from narwhals.typing import IntoSeriesT + from narwhals.typing import LazyFrameT from narwhals.typing import SeriesT T = TypeVar("T") @@ -134,14 +136,11 @@ def from_native(native_object: SeriesT, **kwds: Any) -> SeriesT: ... @overload -def from_native( - native_object: IntoDataFrameT | IntoSeries, - *, - pass_through: Literal[True], - eager_only: Literal[False] = ..., - series_only: Literal[False] = ..., - allow_series: Literal[True], -) -> DataFrame[IntoDataFrameT]: ... +def from_native(native_object: DataFrameT, **kwds: Any) -> DataFrameT: ... + + +@overload +def from_native(native_object: LazyFrameT, **kwds: Any) -> LazyFrameT: ... @overload @@ -235,7 +234,6 @@ def from_native( # type: ignore[overload-overlap] ) -> LazyFrame[IntoLazyFrameT]: ... -# NOTE: `pl.LazyFrame` originally matched here @overload def from_native( native_object: IntoDataFrameT, @@ -280,17 +278,6 @@ def from_native( ) -> Series[IntoSeriesT]: ... -@overload -def from_native( - native_object: IntoFrameT | IntoLazyFrameT, - *, - pass_through: Literal[False] = ..., - eager_only: Literal[False] = ..., - series_only: Literal[False] = ..., - allow_series: None = ..., -) -> DataFrame[IntoFrameT] | LazyFrame[IntoLazyFrameT]: ... - - # All params passed in as variables @overload def from_native( diff --git a/narwhals/typing.py b/narwhals/typing.py index ec7e7d5915..da5fc2096b 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -57,7 +57,7 @@ def __native_namespace__(self) -> ModuleType: ... which will be interpreted as a `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrame[Any]", "DataFrameLike"] +IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrameLike"] """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. @@ -70,11 +70,9 @@ def __native_namespace__(self) -> ModuleType: ... ... return df.shape """ -IntoLazyFrame: TypeAlias = "NativeLazyFrame | LazyFrame[Any]" +IntoLazyFrame: TypeAlias = "NativeLazyFrame" -IntoFrame: TypeAlias = Union[ - "NativeFrame", "DataFrame[Any]", "LazyFrame[Any]", "DataFrameLike" -] +IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. Use this if your function can accept an object which can be converted to either diff --git a/tests/expr_and_series/cast_test.py b/tests/expr_and_series/cast_test.py index 801569685a..2208f63f28 100644 --- a/tests/expr_and_series/cast_test.py +++ b/tests/expr_and_series/cast_test.py @@ -4,6 +4,8 @@ from datetime import time from datetime import timedelta from datetime import timezone +from typing import TYPE_CHECKING +from typing import cast import pandas as pd import pytest @@ -16,6 +18,9 @@ from tests.utils import assert_equal_data from tests.utils import is_windows +if TYPE_CHECKING: + from narwhals.typing import NativeLazyFrame + DATA = { "a": [1], "b": [1], @@ -283,27 +288,30 @@ def test_cast_struct(request: pytest.FixtureRequest, constructor: Constructor) - native_df = constructor(data) + # NOTE: This branch needs to be rewritten to **not depend** on private `SparkLikeLazyFrame` properties if "spark" in str(constructor): # pragma: no cover # Special handling for pyspark as it natively maps the input to # a column of type MAP - _tmp_nw_compliant_frame = nw.from_native(native_df)._compliant_frame - F = _tmp_nw_compliant_frame._F # noqa: N806 - T = _tmp_nw_compliant_frame._native_dtypes # noqa: N806 + native_ldf = cast("NativeLazyFrame", native_df) + _tmp_nw_compliant_frame = nw.from_native(native_ldf)._compliant_frame + F = _tmp_nw_compliant_frame._F # type: ignore[attr-defined] # noqa: N806 + T = _tmp_nw_compliant_frame._native_dtypes # type: ignore[attr-defined] # noqa: N806 - native_df = native_df.withColumn( # type: ignore[union-attr] + native_ldf = native_ldf.withColumn( # type: ignore[attr-defined] "a", F.struct( F.col("a.movie ").cast(T.StringType()).alias("movie "), F.col("a.rating").cast(T.DoubleType()).alias("rating"), ), ) - assert nw.from_native(native_df).schema == nw.Schema( + assert nw.from_native(native_ldf).schema == nw.Schema( { "a": nw.Struct( [nw.Field("movie ", nw.String()), nw.Field("rating", nw.Float64())] ) } ) + native_df = native_ldf dtype = nw.Struct([nw.Field("movie ", nw.String()), nw.Field("rating", nw.Float32())]) result = nw.from_native(native_df).select(nw.col("a").cast(dtype)).lazy().collect() diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py index 05675984a3..3902506ebb 100644 --- a/tests/translate/from_native_test.py +++ b/tests/translate/from_native_test.py @@ -1,5 +1,25 @@ +"""`from_native` runtime and static typing tests. + +# Static Typing +The style of the tests is *intentionally* repetitive, aiming to provide an individual scope +for each attempted `@overload` match. + +## `mypy` ignores +[inline config] is used to prevent [mypy specific errors] from hiding `pyright` diagnostics. + +[`--disallow-any-generics`] and [`var-annotated`] are ignored to verify we don't regress to +**prior false positive** behaviors identified in [#2239]. + +[inline config]: https://mypy.readthedocs.io/en/stable/inline_config.html +[mypy specific errors]: https://discuss.python.org/t/ignore-mypy-specific-type-errors/58535 +[`--disallow-any-generics`]: https://mypy.readthedocs.io/en/stable/error_code_list2.html#check-that-type-arguments-exist-type-arg +[`var-annotated`]: https://mypy.readthedocs.io/en/stable/error_code_list.html#require-annotation-if-variable-type-is-unclear-var-annotated +[#2239]: https://github.com/narwhals-dev/narwhals/issues/2239 +""" + from __future__ import annotations +# mypy: disallow-any-generics=false, disable-error-code="var-annotated" import sys from contextlib import nullcontext as does_not_raise from importlib.util import find_spec @@ -371,6 +391,95 @@ def test_from_native_lazyframe() -> None: assert isinstance(unstable_lazy, nw.LazyFrame) +def test_dataframe_recursive() -> None: + pytest.importorskip("polars") + import polars as pl + + pl_frame = pl.DataFrame({"a": [1, 2, 3]}) + nw_frame = nw.from_native(pl_frame) + with pytest.raises(AssertionError): + nw.DataFrame(nw_frame, level="full") + + nw_frame_early_return = nw.from_native(nw_frame) + + if TYPE_CHECKING: + assert_type(pl_frame, pl.DataFrame) + assert_type(nw_frame, nw.DataFrame[pl.DataFrame]) + + nw_frame_depth_2 = nw.DataFrame(nw_frame, level="full") + # NOTE: Checking that the type is `DataFrame[Unknown]` + assert_type(nw_frame_depth_2, nw.DataFrame) + assert_type(nw_frame_early_return, nw.DataFrame[pl.DataFrame]) + + +def test_lazyframe_recursive() -> None: + pytest.importorskip("polars") + import polars as pl + + pl_frame = pl.DataFrame({"a": [1, 2, 3]}).lazy() + nw_frame = nw.from_native(pl_frame) + with pytest.raises(AssertionError): + nw.LazyFrame(nw_frame, level="lazy") + + nw_frame_early_return = nw.from_native(nw_frame) + + if TYPE_CHECKING: + assert_type(pl_frame, pl.LazyFrame) + assert_type(nw_frame, nw.LazyFrame[pl.LazyFrame]) + + nw_frame_depth_2 = nw.LazyFrame(nw_frame, level="lazy") + # NOTE: Checking that the type is `LazyFrame[Unknown]` + assert_type(nw_frame_depth_2, nw.LazyFrame) + assert_type(nw_frame_early_return, nw.LazyFrame[pl.LazyFrame]) + + +def test_dataframe_recursive_v1() -> None: + """`v1` always returns a `Union` for `DataFrame`.""" + pytest.importorskip("polars") + import polars as pl + + pl_frame = pl.DataFrame({"a": [1, 2, 3]}) + nw_frame = nw_v1.from_native(pl_frame) + with pytest.raises(AssertionError): + nw_v1.DataFrame(nw_frame, level="full") + + nw_frame_early_return = nw_v1.from_native(nw_frame) + + if TYPE_CHECKING: + assert_type(pl_frame, pl.DataFrame) + assert_type( + nw_frame, "nw_v1.DataFrame[pl.DataFrame] | nw_v1.LazyFrame[pl.DataFrame]" + ) + nw_frame_depth_2 = nw_v1.DataFrame(nw_frame, level="full") + assert_type(nw_frame_depth_2, nw_v1.DataFrame) + # NOTE: Checking that the type is `DataFrame[Unknown]` + assert_type( + nw_frame_early_return, + "nw_v1.DataFrame[pl.DataFrame] | nw_v1.LazyFrame[pl.DataFrame]", + ) + + +def test_lazyframe_recursive_v1() -> None: + pytest.importorskip("polars") + import polars as pl + + pl_frame = pl.DataFrame({"a": [1, 2, 3]}).lazy() + nw_frame = nw_v1.from_native(pl_frame) + with pytest.raises(AssertionError): + nw_v1.LazyFrame(nw_frame, level="lazy") + + nw_frame_early_return = nw_v1.from_native(nw_frame) + + if TYPE_CHECKING: + assert_type(pl_frame, pl.LazyFrame) + assert_type(nw_frame, nw_v1.LazyFrame[pl.LazyFrame]) + + nw_frame_depth_2 = nw_v1.LazyFrame(nw_frame, level="lazy") + # NOTE: Checking that the type is `LazyFrame[Unknown]` + assert_type(nw_frame_depth_2, nw_v1.LazyFrame) + assert_type(nw_frame_early_return, nw_v1.LazyFrame[pl.LazyFrame]) + + def test_series_recursive() -> None: """https://github.com/narwhals-dev/narwhals/issues/2239.""" pytest.importorskip("polars") @@ -387,9 +496,9 @@ def test_series_recursive() -> None: assert_type(pl_series, pl.Series) assert_type(nw_series, nw.Series[pl.Series]) - nw_series_depth_2 = nw.Series(nw_series, level="full") # type: ignore[var-annotated] + nw_series_depth_2 = nw.Series(nw_series, level="full") # NOTE: Checking that the type is `Series[Unknown]` - assert_type(nw_series_depth_2, nw.Series) # type: ignore[type-arg] + assert_type(nw_series_depth_2, nw.Series) assert_type(nw_series_early_return, nw.Series[pl.Series])