Skip to content

Commit 4d5fb30

Browse files
authored
chore: Spec CompliantLazyFrame (#2232)
* chore(typing): Mostly finish `CompliantLazyFrame` * fix: add missing `CompliantDataFrame.explode` * feat(typing): Adds `not_implemented.deprecated` Only way I've come up with to preserve the deprecation message * chore(typing): Finish `PolarsLazyFrame` * chore(typing): Finish `SparkLikeLazyFrame` * chore(typing): Finish `DaskLazyFrame` * chore(typing): Finish `DuckDBLazyFrame` * fix(typing): `PandasLikeDataFrame.explode` * chore(typing): Mark `.lazy` return as `Incomplete` I can't work out what is the desired behavior * chore(typing): Fill `Incomplete` for `*Expr` * revert: remove `.to_(arrow|pandas)` #2232 (comment) * fix(DRAFT): Expose `CompliantLazyFrame.native` - Will need something similar with `DataFrame` - Aiming to solve #2239 (comment) * fix: remove default in `CompliantLazyFrame.lazy` ```py narwhals/_compliant/dataframe.py:262: error: Definition of "lazy" in base class "CompliantDataFrame" is incompatible with definition in base class "CompliantLazyFrame" [misc] class EagerDataFrame( ^ ``` * fix(typing): Align `unique` sigantures 3x of these: ```py error: Signature of "unique" incompatible with supertype "CompliantLazyFrame" [override] ``` Even though `maintain_order` is unused - `EagerDataFrame` (and subclasses) require the same signature * fix: coverage for `PolarsLazyFrame.native` https://github.com/narwhals-dev/narwhals/actions/runs/13929256423/job/38981936344 * fix(typing): Add missing `_change_version` method > error: Cannot access attribute "_change_version" for class "CompliantLazyFrame[Any, FrameT@_stableify]" * chore(typing): Mark intended annotation, that isn't valid yet `mypy` expands to an invalid type > "CompliantLazyFrame[Any, NativeFrame]" of "Any | CompliantLazyFrame[Any, NativeFrame] | CompliantLazyFrame[Any, DataFrame[Any]] | CompliantLazyFrame[Any, LazyFrame[Any]] | CompliantLazyFrame[Any, DataFrameLike]" * revert: remove `CompliantLazyFrame.lazy` #2232 (comment) * lol `maintain_order` default one side only 😅 Resolves #2232 (comment) * revert: don't widen `keep` for lazy * make `keep` a keyword again conflict from (#2247)
1 parent bfce451 commit 4d5fb30

File tree

12 files changed

+271
-112
lines changed

12 files changed

+271
-112
lines changed

narwhals/_arrow/dataframe.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252
from narwhals._arrow.typing import Mask # type: ignore[attr-defined]
5353
from narwhals._arrow.typing import Order # type: ignore[attr-defined]
5454
from narwhals.dtypes import DType
55+
from narwhals.typing import CompliantDataFrame
56+
from narwhals.typing import CompliantLazyFrame
5557
from narwhals.typing import SizeUnit
5658
from narwhals.typing import _1DArray
5759
from narwhals.typing import _2DArray
@@ -69,11 +71,8 @@
6971
]
7072
PromoteOptions: TypeAlias = Literal["none", "default", "permissive"]
7173

72-
from narwhals.typing import CompliantDataFrame
73-
from narwhals.typing import CompliantLazyFrame
7474

75-
76-
class ArrowDataFrame(EagerDataFrame["ArrowSeries", "ArrowExpr"], CompliantLazyFrame):
75+
class ArrowDataFrame(EagerDataFrame["ArrowSeries", "ArrowExpr", "pa.Table"]):
7776
# --- not in the spec ---
7877
def __init__(
7978
self: Self,
@@ -349,6 +348,8 @@ def estimated_size(self: Self, unit: SizeUnit) -> int | float:
349348
sz = self._native_frame.nbytes
350349
return scale_bytes(sz, unit)
351350

351+
explode = not_implemented()
352+
352353
@property
353354
def columns(self: Self) -> list[str]:
354355
return self._native_frame.schema.names
@@ -573,7 +574,9 @@ def tail(self: Self, n: int) -> Self:
573574
else:
574575
return self._from_native_frame(df.slice(abs(n)), validate_column_names=False)
575576

576-
def lazy(self: Self, *, backend: Implementation | None = None) -> CompliantLazyFrame:
577+
def lazy(
578+
self: Self, *, backend: Implementation | None = None
579+
) -> CompliantLazyFrame[Any, Any]:
577580
from narwhals.utils import parse_version
578581

579582
if backend is None:

narwhals/_compliant/dataframe.py

Lines changed: 90 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@
1616
from narwhals._compliant.typing import CompliantSeriesT
1717
from narwhals._compliant.typing import EagerExprT_contra
1818
from narwhals._compliant.typing import EagerSeriesT
19+
from narwhals._compliant.typing import NativeFrameT_co
1920
from narwhals._expression_parsing import evaluate_output_names_and_aliases
21+
from narwhals.utils import Version
22+
from narwhals.utils import _StoresNative
23+
from narwhals.utils import deprecated
2024

2125
if TYPE_CHECKING:
2226
from io import BytesIO
@@ -70,6 +74,7 @@ def collect_schema(self) -> Mapping[str, DType]: ...
7074
def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ...
7175
def drop_nulls(self, subset: Sequence[str] | None) -> Self: ...
7276
def estimated_size(self, unit: SizeUnit) -> int | float: ...
77+
def explode(self: Self, columns: Sequence[str]) -> Self: ...
7378
def filter(self, predicate: CompliantExprT_contra | Incomplete) -> Self: ...
7479
def gather_every(self, n: int, offset: int) -> Self: ...
7580
def get_column(self, name: str) -> CompliantSeriesT: ...
@@ -101,7 +106,7 @@ def join_asof(
101106
strategy: Literal["backward", "forward", "nearest"],
102107
suffix: str,
103108
) -> Self: ...
104-
def lazy(self, *, backend: Implementation | None) -> CompliantLazyFrame: ...
109+
def lazy(self, *, backend: Implementation | None) -> CompliantLazyFrame[Any, Any]: ...
105110
def rename(self, mapping: Mapping[str, str]) -> Self: ...
106111
def row(self, index: int) -> tuple[Any, ...]: ...
107112
def rows(
@@ -136,7 +141,7 @@ def unique(
136141
subset: Sequence[str] | None,
137142
*,
138143
keep: Literal["any", "first", "last", "none"],
139-
maintain_order: bool | None,
144+
maintain_order: bool | None = None,
140145
) -> Self: ...
141146
def unpivot(
142147
self,
@@ -155,26 +160,101 @@ def write_csv(self, file: str | Path | BytesIO | None) -> str | None: ...
155160
def write_parquet(self, file: str | Path | BytesIO) -> None: ...
156161

157162

158-
class CompliantLazyFrame(Protocol):
163+
class CompliantLazyFrame(
164+
_StoresNative[NativeFrameT_co], Protocol[CompliantExprT_contra, NativeFrameT_co]
165+
):
166+
_native_frame: Any
167+
_implementation: Implementation
168+
_backend_version: tuple[int, ...]
169+
_version: Version
170+
159171
def __narwhals_lazyframe__(self) -> Self: ...
160172
def __narwhals_namespace__(self) -> Any: ...
161-
def simple_select(
162-
self, *column_names: str
163-
) -> Self: ... # `select` where all args are column names.
164-
def aggregate(self, *exprs: Any) -> Self: # pragma: no cover
165-
... # `select` where all args are aggregations or literals
166-
# (so, no broadcasting is necessary).
173+
174+
def simple_select(self, *column_names: str) -> Self:
175+
"""`select` where all args are column names."""
176+
...
177+
178+
def aggregate(self, *exprs: CompliantExprT_contra) -> Self:
179+
"""`select` where all args are aggregations or literals.
180+
181+
(so, no broadcasting is necessary).
182+
"""
183+
...
184+
185+
def _change_version(self, version: Version) -> Self: ...
186+
187+
@property
188+
def native(self) -> NativeFrameT_co:
189+
return self._native_frame # type: ignore[no-any-return]
167190

168191
@property
169192
def columns(self) -> Sequence[str]: ...
170193
@property
171194
def schema(self) -> Mapping[str, DType]: ...
172195
def _iter_columns(self) -> Iterator[Any]: ...
196+
def collect(
197+
self, backend: Implementation | None, **kwargs: Any
198+
) -> CompliantDataFrame[Any, Any]: ...
199+
def collect_schema(self) -> Mapping[str, DType]: ...
200+
def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ...
201+
def drop_nulls(self, subset: Sequence[str] | None) -> Self: ...
202+
def explode(self: Self, columns: Sequence[str]) -> Self: ...
203+
def filter(self, predicate: CompliantExprT_contra | Incomplete) -> Self: ...
204+
@deprecated(
205+
"`LazyFrame.gather_every` is deprecated and will be removed in a future version."
206+
)
207+
def gather_every(self, n: int, offset: int) -> Self: ...
208+
def group_by(self, *keys: str, drop_null_keys: bool) -> Incomplete: ...
209+
def head(self, n: int) -> Self: ...
210+
def join(
211+
self: Self,
212+
other: Self,
213+
*,
214+
how: Literal["left", "inner", "cross", "anti", "semi"],
215+
left_on: Sequence[str] | None,
216+
right_on: Sequence[str] | None,
217+
suffix: str,
218+
) -> Self: ...
219+
def join_asof(
220+
self: Self,
221+
other: Self,
222+
*,
223+
left_on: str | None,
224+
right_on: str | None,
225+
by_left: Sequence[str] | None,
226+
by_right: Sequence[str] | None,
227+
strategy: Literal["backward", "forward", "nearest"],
228+
suffix: str,
229+
) -> Self: ...
230+
def rename(self, mapping: Mapping[str, str]) -> Self: ...
231+
def select(self, *exprs: CompliantExprT_contra) -> Self: ...
232+
def sort(
233+
self, *by: str, descending: bool | Sequence[bool], nulls_last: bool
234+
) -> Self: ...
235+
@deprecated("`LazyFrame.tail` is deprecated and will be removed in a future version.")
236+
def tail(self, n: int) -> Self: ...
237+
def unique(
238+
self,
239+
subset: Sequence[str] | None,
240+
*,
241+
keep: Literal["any", "none"],
242+
) -> Self: ...
243+
def unpivot(
244+
self,
245+
on: Sequence[str] | None,
246+
index: Sequence[str] | None,
247+
variable_name: str,
248+
value_name: str,
249+
) -> Self: ...
250+
def with_columns(self, *exprs: CompliantExprT_contra) -> Self: ...
251+
def with_row_index(self, name: str) -> Self: ...
173252

174253

175254
class EagerDataFrame(
176255
CompliantDataFrame[EagerSeriesT, EagerExprT_contra],
177-
Protocol[EagerSeriesT, EagerExprT_contra],
256+
CompliantLazyFrame[EagerExprT_contra, NativeFrameT_co],
257+
Protocol[EagerSeriesT, EagerExprT_contra, NativeFrameT_co],
178258
):
179259
def _evaluate_expr(self, expr: EagerExprT_contra, /) -> EagerSeriesT:
180260
"""Evaluate `expr` and ensure it has a **single** output."""

narwhals/_compliant/selectors.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,11 @@
6565
SeriesOrExprT = TypeVar("SeriesOrExprT", bound="CompliantSeries | NativeExpr")
6666
SeriesT = TypeVar("SeriesT", bound="CompliantSeries")
6767
ExprT = TypeVar("ExprT", bound="NativeExpr")
68-
FrameT = TypeVar("FrameT", bound="CompliantDataFrame[Any, Any] | CompliantLazyFrame")
68+
FrameT = TypeVar(
69+
"FrameT", bound="CompliantDataFrame[Any, Any] | CompliantLazyFrame[Any, Any]"
70+
)
6971
DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrame[Any, Any]")
70-
LazyFrameT = TypeVar("LazyFrameT", bound="CompliantLazyFrame")
72+
LazyFrameT = TypeVar("LazyFrameT", bound="CompliantLazyFrame[Any, Any]")
7173
SelectorOrExpr: TypeAlias = (
7274
"CompliantSelector[FrameT, SeriesOrExprT] | CompliantExpr[FrameT, SeriesOrExprT]"
7375
)
@@ -309,7 +311,7 @@ def __repr__(self: Self) -> str: # pragma: no cover
309311

310312

311313
def _eval_lhs_rhs(
312-
df: CompliantDataFrame[Any, Any] | CompliantLazyFrame,
314+
df: CompliantDataFrame[Any, Any] | CompliantLazyFrame[Any, Any],
313315
lhs: CompliantExpr[Any, Any],
314316
rhs: CompliantExpr[Any, Any],
315317
) -> tuple[Sequence[str], Sequence[str]]:

narwhals/_compliant/typing.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from narwhals._compliant.namespace import EagerNamespace
1919
from narwhals._compliant.series import CompliantSeries
2020
from narwhals._compliant.series import EagerSeries
21+
from narwhals.typing import NativeFrame
2122

2223
__all__ = [
2324
"AliasName",
@@ -35,26 +36,28 @@
3536
bound="CompliantSeries | NativeExpr",
3637
covariant=True,
3738
)
39+
40+
NativeFrameT_co = TypeVar("NativeFrameT_co", bound="NativeFrame", covariant=True)
3841
CompliantFrameT = TypeVar(
39-
"CompliantFrameT", bound="CompliantDataFrame[Any, Any] | CompliantLazyFrame"
42+
"CompliantFrameT", bound="CompliantDataFrame[Any, Any] | CompliantLazyFrame[Any, Any]"
4043
)
4144
CompliantDataFrameT = TypeVar("CompliantDataFrameT", bound="CompliantDataFrame[Any, Any]")
42-
CompliantLazyFrameT = TypeVar("CompliantLazyFrameT", bound="CompliantLazyFrame")
45+
CompliantLazyFrameT = TypeVar("CompliantLazyFrameT", bound="CompliantLazyFrame[Any, Any]")
4346
IntoCompliantExpr: TypeAlias = "CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co] | CompliantSeriesOrNativeExprT_co"
4447
CompliantExprT = TypeVar("CompliantExprT", bound="CompliantExpr[Any, Any]")
4548
CompliantExprT_contra = TypeVar(
4649
"CompliantExprT_contra", bound="CompliantExpr[Any, Any]", contravariant=True
4750
)
4851

49-
EagerDataFrameT = TypeVar("EagerDataFrameT", bound="EagerDataFrame[Any, Any]")
52+
EagerDataFrameT = TypeVar("EagerDataFrameT", bound="EagerDataFrame[Any, Any, Any]")
5053
EagerSeriesT = TypeVar("EagerSeriesT", bound="EagerSeries[Any]")
5154
EagerSeriesT_co = TypeVar("EagerSeriesT_co", bound="EagerSeries[Any]", covariant=True)
5255
EagerExprT = TypeVar("EagerExprT", bound="EagerExpr[Any, Any]")
5356
EagerExprT_contra = TypeVar(
5457
"EagerExprT_contra", bound="EagerExpr[Any, Any]", contravariant=True
5558
)
5659
EagerNamespaceAny: TypeAlias = (
57-
"EagerNamespace[EagerDataFrame[Any, Any], EagerSeries[Any], EagerExpr[Any, Any]]"
60+
"EagerNamespace[EagerDataFrame[Any, Any, Any], EagerSeries[Any], EagerExpr[Any, Any]]"
5861
)
5962

6063
AliasNames: TypeAlias = Callable[[Sequence[str]], Sequence[str]]

narwhals/_dask/dataframe.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import Any
55
from typing import Iterator
66
from typing import Literal
7+
from typing import Mapping
78
from typing import Sequence
89

910
import dask.dataframe as dd
@@ -18,6 +19,7 @@
1819
from narwhals.utils import Implementation
1920
from narwhals.utils import check_column_exists
2021
from narwhals.utils import generate_temporary_column_name
22+
from narwhals.utils import not_implemented
2123
from narwhals.utils import parse_columns_to_drop
2224
from narwhals.utils import parse_version
2325
from narwhals.utils import validate_backend_version
@@ -35,7 +37,7 @@
3537
from narwhals.utils import Version
3638

3739

38-
class DaskLazyFrame(CompliantLazyFrame):
40+
class DaskLazyFrame(CompliantLazyFrame["DaskExpr", "dd.DataFrame"]):
3941
def __init__(
4042
self: Self,
4143
native_dataframe: dd.DataFrame,
@@ -168,7 +170,7 @@ def select(self: Self, *exprs: DaskExpr) -> Self:
168170
)
169171
return self._from_native_frame(df)
170172

171-
def drop_nulls(self: Self, subset: list[str] | None) -> Self:
173+
def drop_nulls(self: Self, subset: Sequence[str] | None) -> Self:
172174
if subset is None:
173175
return self._from_native_frame(self._native_frame.dropna())
174176
plx = self.__narwhals_namespace__()
@@ -189,7 +191,7 @@ def schema(self: Self) -> dict[str, DType]:
189191
def collect_schema(self: Self) -> dict[str, DType]:
190192
return self.schema
191193

192-
def drop(self: Self, columns: list[str], strict: bool) -> Self: # noqa: FBT001
194+
def drop(self: Self, columns: Sequence[str], *, strict: bool) -> Self:
193195
to_drop = parse_columns_to_drop(
194196
compliant_frame=self, columns=columns, strict=strict
195197
)
@@ -205,7 +207,7 @@ def with_row_index(self: Self, name: str) -> Self:
205207
)
206208
)
207209

208-
def rename(self: Self, mapping: dict[str, str]) -> Self:
210+
def rename(self: Self, mapping: Mapping[str, str]) -> Self:
209211
return self._from_native_frame(self._native_frame.rename(columns=mapping))
210212

211213
def head(self: Self, n: int) -> Self:
@@ -215,7 +217,7 @@ def head(self: Self, n: int) -> Self:
215217

216218
def unique(
217219
self: Self,
218-
subset: list[str] | None,
220+
subset: Sequence[str] | None,
219221
*,
220222
keep: Literal["any", "none"],
221223
) -> Self:
@@ -254,8 +256,8 @@ def join(
254256
other: Self,
255257
*,
256258
how: Literal["left", "inner", "cross", "anti", "semi"],
257-
left_on: list[str] | None,
258-
right_on: list[str] | None,
259+
left_on: Sequence[str] | None,
260+
right_on: Sequence[str] | None,
259261
suffix: str,
260262
) -> Self:
261263
if how == "cross":
@@ -286,7 +288,7 @@ def join(
286288
other_native = (
287289
select_columns_by_name(
288290
other._native_frame,
289-
right_on,
291+
list(right_on),
290292
self._backend_version,
291293
self._implementation,
292294
)
@@ -313,7 +315,7 @@ def join(
313315
other_native = (
314316
select_columns_by_name(
315317
other._native_frame,
316-
right_on,
318+
list(right_on),
317319
self._backend_version,
318320
self._implementation,
319321
)
@@ -364,8 +366,8 @@ def join_asof(
364366
*,
365367
left_on: str | None,
366368
right_on: str | None,
367-
by_left: list[str] | None,
368-
by_right: list[str] | None,
369+
by_left: Sequence[str] | None,
370+
by_right: Sequence[str] | None,
369371
strategy: Literal["backward", "forward", "nearest"],
370372
suffix: str,
371373
) -> Self:
@@ -412,8 +414,8 @@ def gather_every(self: Self, n: int, offset: int) -> Self:
412414

413415
def unpivot(
414416
self: Self,
415-
on: list[str] | None,
416-
index: list[str] | None,
417+
on: Sequence[str] | None,
418+
index: Sequence[str] | None,
417419
variable_name: str,
418420
value_name: str,
419421
) -> Self:
@@ -425,3 +427,5 @@ def unpivot(
425427
value_name=value_name,
426428
)
427429
)
430+
431+
explode = not_implemented()

0 commit comments

Comments
 (0)