Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
aa6578e
fix(typing): Narrow `IntoDataFrame`
dangotbanned Apr 7, 2025
51c5b63
fix(typing): Remove `DataFrame` from `IntoFrame`
dangotbanned Apr 7, 2025
6cbbc74
fix(typing): Narrow `IntoLazyFrame`, `IntoFrame`
dangotbanned Apr 7, 2025
255ab27
fix(typing): Annotate `DataFrame._compliant_frame`
dangotbanned Apr 7, 2025
ba2f6e1
chore: Add missing `CompliantDataFrame.pivot`
dangotbanned Apr 7, 2025
1815752
fix(typing): Ensure `__iter__` is available on group_by
dangotbanned Apr 7, 2025
07deea2
chore(typing): Fix most of `DataFrame`
dangotbanned Apr 7, 2025
3881822
chore(typing): Ignore interchange `[type-var]`
dangotbanned Apr 7, 2025
375fabc
test(typing): Barely fix dodgy spark typing
dangotbanned Apr 7, 2025
21e80ef
fix: Implement `to_numpy` to catch args
dangotbanned Apr 7, 2025
c124985
fix(typing): Annotate `LazyFrame._compliant_frame`
dangotbanned Apr 7, 2025
831a6be
chore(typing): Ignore and add note for `spark_like` cast
dangotbanned Apr 7, 2025
1725f36
chore(typing): Partial `v1` backport
dangotbanned Apr 7, 2025
c4bed59
fix(typing): Just preserve `v1` behavior
dangotbanned Apr 7, 2025
6a9fd91
simplify
dangotbanned Apr 7, 2025
ed65ad2
try old `Union`
dangotbanned Apr 7, 2025
b97149d
Merge remote-tracking branch 'upstream/main' into narrow-type-var-frame
dangotbanned Apr 8, 2025
6a66779
docs(typing): Provide more context on what and why
dangotbanned Apr 8, 2025
675329c
Merge branch 'main' into narrow-type-var-frame
dangotbanned Apr 8, 2025
5dd7825
Merge branch 'main' into narrow-type-var-frame
dangotbanned Apr 9, 2025
a1c51ff
Merge branch 'main' into narrow-type-var-frame
dangotbanned Apr 9, 2025
c2b328b
Merge branch 'main' into narrow-type-var-frame
dangotbanned Apr 10, 2025
b66d1bf
Merge branch 'main' into narrow-type-var-frame
dangotbanned Apr 12, 2025
ba525f1
Merge branch 'main' into narrow-type-var-frame
dangotbanned Apr 12, 2025
c45f1f4
Merge branch 'main' into narrow-type-var-frame
dangotbanned Apr 12, 2025
322b00d
Merge branch 'main' into narrow-type-var-frame
dangotbanned Apr 12, 2025
d6cb16b
chore(typing): Use `Sequence[str]` in `pivot`
dangotbanned Apr 12, 2025
cbd60d9
refactor(typing): Use `PivotAgg`
dangotbanned Apr 12, 2025
53722e8
Merge remote-tracking branch 'upstream/main' into narrow-type-var-frame
dangotbanned Apr 12, 2025
e111fc3
Merge branch 'main' into narrow-type-var-frame
dangotbanned Apr 13, 2025
23158d1
Merge branch 'main' into narrow-type-var-frame
dangotbanned Apr 13, 2025
bc72941
Merge branch 'main' into narrow-type-var-frame
dangotbanned Apr 14, 2025
a1fb349
Merge branch 'main' into narrow-type-var-frame
dangotbanned Apr 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,3 +847,5 @@ def unpivot(
)
# TODO(Unassigned): Even with promote_options="permissive", pyarrow does not
# upcast numeric to non-numeric (e.g. string) datatypes

pivot = not_implemented()
15 changes: 14 additions & 1 deletion narwhals/_compliant/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from typing_extensions import TypeAlias

from narwhals._compliant.group_by import CompliantGroupBy
from narwhals._compliant.group_by import DataFrameGroupBy
from narwhals._translate import IntoArrowTable
from narwhals.dtypes import DType
from narwhals.schema import Schema
Expand Down Expand Up @@ -132,7 +133,7 @@ def gather_every(self, n: int, offset: int) -> Self: ...
def get_column(self, name: str) -> CompliantSeriesT: ...
def group_by(
self, *keys: str, drop_null_keys: bool
) -> CompliantGroupBy[Self, Any]: ...
) -> DataFrameGroupBy[Self, Any]: ...
def head(self, n: int) -> Self: ...
def item(self, row: int | None, column: int | str | None) -> Any: ...
def iter_columns(self) -> Iterator[CompliantSeriesT]: ...
Expand Down Expand Up @@ -161,6 +162,18 @@ def join_asof(
suffix: str,
) -> Self: ...
def lazy(self, *, backend: Implementation | None) -> CompliantLazyFrame[Any, Any]: ...
def pivot(
self,
on: str | Sequence[str],
*,
index: str | Sequence[str] | None,
values: str | Sequence[str] | None,
aggregate_function: Literal[
"min", "max", "first", "last", "sum", "mean", "median", "len", None
],
sort_columns: bool,
separator: str,
) -> Self: ...
def rename(self, mapping: Mapping[str, str]) -> Self: ...
def row(self, index: int) -> tuple[Any, ...]: ...
def rows(
Expand Down
11 changes: 9 additions & 2 deletions narwhals/_compliant/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ def __init__(
def agg(self, *exprs: CompliantExprT_contra) -> CompliantFrameT_co: ...


class DataFrameGroupBy(
CompliantGroupBy[CompliantDataFrameT_co, CompliantExprT_contra],
Protocol38[CompliantDataFrameT_co, CompliantExprT_contra],
):
def __iter__(self) -> Iterator[tuple[Any, CompliantDataFrameT_co]]: ...
Comment on lines +79 to +83
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@FBruzzesi I forgot to mention this will probably be helpful in getting the typing working for (#2325).

It means you can use CompliantGroupBy for Polars* and put the unrelated parts here.

I needed to add it to resolve an unrelated issue (1815752)



class DepthTrackingGroupBy(
CompliantGroupBy[CompliantFrameT_co, DepthTrackingExprT_contra],
Protocol38[CompliantFrameT_co, DepthTrackingExprT_contra, NativeAggregationT_co],
Expand Down Expand Up @@ -132,9 +139,9 @@ def _leaf_name(cls, expr: DepthTrackingExprAny, /) -> NarwhalsAggregation | Any:

class EagerGroupBy(
DepthTrackingGroupBy[CompliantDataFrameT_co, EagerExprT_contra, str],
DataFrameGroupBy[CompliantDataFrameT_co, EagerExprT_contra],
Protocol38[CompliantDataFrameT_co, EagerExprT_contra],
):
def __iter__(self) -> Iterator[tuple[Any, CompliantDataFrameT_co]]: ...
): ...


class LazyGroupBy(
Expand Down
6 changes: 3 additions & 3 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1028,10 +1028,10 @@ def gather_every(self: Self, n: int, offset: int) -> Self:

def pivot(
self: Self,
on: list[str],
on: Sequence[str],
*,
index: list[str] | None,
values: list[str] | None,
index: Sequence[str] | None,
values: Sequence[str] | None,
aggregate_function: Any | None,
sort_columns: bool,
separator: str,
Expand Down
7 changes: 4 additions & 3 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from contextlib import suppress
from typing import TYPE_CHECKING
from typing import Any
from typing import Sequence
from typing import Sized
from typing import TypeVar
from typing import cast
Expand Down Expand Up @@ -742,9 +743,9 @@ def select_columns_by_name(

def pivot_table(
df: PandasLikeDataFrame,
values: list[str],
index: list[str],
columns: list[str],
values: Sequence[str],
index: Sequence[str],
columns: Sequence[str],
aggregate_function: str | None,
) -> Any:
dtypes = import_dtypes_module(df._version)
Expand Down
17 changes: 9 additions & 8 deletions narwhals/_polars/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ class PolarsDataFrame:
select: Method[Self]
sort: Method[Self]
to_arrow: Method[pa.Table]
to_numpy: Method[_2DArray]
to_pandas: Method[pd.DataFrame]
unique: Method[Self]
with_columns: Method[Self]
Expand Down Expand Up @@ -231,6 +230,9 @@ def __array__(
return self.native.__array__(dtype)
return self.native.__array__(dtype)

def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _2DArray:
return self.native.to_numpy()

def collect_schema(self: Self) -> dict[str, DType]:
if self._backend_version < (1,):
return {
Expand Down Expand Up @@ -411,15 +413,14 @@ def unpivot(
)

def pivot(
self: Self,
on: list[str],
self,
on: str | Sequence[str],
*,
index: list[str] | None,
values: list[str] | None,
index: str | Sequence[str] | None,
values: str | Sequence[str] | None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do these change? is Sequence[str] | None not fine?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks @dangotbanned ! just got a question

Thanks @MarcoGorelli, happy to answer πŸ˜…

why do these change? is Sequence[str] | None not fine?

the original annotation was overly narrow, since polars accepts much more than a list

I followed through to pandas to see if that was the source of using list[str], but IIRC it used an alias like IndexLabel.

Sequence[str] | None is equivalent to str | Sequence[str] | None sadly (python/typing#256).
But I prefer being more explicit that it can accept "one or more columns"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if it's str we already make it a list at the narwhals level, can this just be Sequence[str] | None?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MarcoGorelli sure thing!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolved in (d6cb16b)

aggregate_function: Literal[
"min", "max", "first", "last", "sum", "mean", "median", "len"
]
| None,
"min", "max", "first", "last", "sum", "mean", "median", "len", None
],
sort_columns: bool,
separator: str,
) -> Self:
Expand Down
79 changes: 33 additions & 46 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@
from narwhals.utils import find_stacklevel
from narwhals.utils import flatten
from narwhals.utils import generate_repr
from narwhals.utils import is_compliant_dataframe
from narwhals.utils import is_compliant_lazyframe
from narwhals.utils import is_sequence_but_not_str
from narwhals.utils import issue_deprecation_warning
from narwhals.utils import parse_version
from narwhals.utils import supports_arrow_c_stream

if TYPE_CHECKING:
from io import BytesIO
Expand All @@ -50,6 +52,8 @@
from typing_extensions import ParamSpec
from typing_extensions import Self

from narwhals._compliant import CompliantDataFrame
from narwhals._compliant import CompliantLazyFrame
from narwhals._compliant import IntoCompliantExpr
from narwhals._compliant.typing import EagerNamespaceAny
from narwhals.group_by import GroupBy
Expand Down Expand Up @@ -441,14 +445,13 @@ def _lazyframe(self: Self) -> type[LazyFrame[Any]]:
return LazyFrame

def __init__(
self: Self,
df: Any,
*,
level: Literal["full", "lazy", "interchange"],
self: Self, df: Any, *, level: Literal["full", "lazy", "interchange"]
) -> None:
self._level: Literal["full", "lazy", "interchange"] = level
if hasattr(df, "__narwhals_dataframe__"):
self._compliant_frame: Any = df.__narwhals_dataframe__()
# NOTE: Interchange support (`DataFrameLike`) is the source of the error
self._compliant_frame: CompliantDataFrame[Any, Any, DataFrameT] # type: ignore[type-var]
if is_compliant_dataframe(df):
self._compliant_frame = df.__narwhals_dataframe__()
else: # pragma: no cover
msg = f"Expected an object which implements `__narwhals_dataframe__`, got: {type(df)}"
raise AssertionError(msg)
Expand Down Expand Up @@ -477,13 +480,13 @@ def implementation(self: Self) -> Implementation:
>>> df.implementation.is_polars()
False
"""
return self._compliant_frame._implementation # type: ignore[no-any-return]
return self._compliant_frame._implementation

def __len__(self: Self) -> int:
return self._compliant_frame.__len__() # type: ignore[no-any-return]
return self._compliant_frame.__len__()

def __array__(self: Self, dtype: Any = None, copy: bool | None = None) -> _2DArray: # noqa: FBT001
return self._compliant_frame.__array__(dtype, copy=copy) # type: ignore[no-any-return]
return self._compliant_frame.__array__(dtype, copy=copy)

def __repr__(self: Self) -> str: # pragma: no cover
return generate_repr("Narwhals DataFrame", self.to_native().__repr__())
Expand All @@ -498,7 +501,7 @@ def __arrow_c_stream__(self: Self, requested_schema: object | None = None) -> ob
for more.
"""
native_frame = self._compliant_frame._native_frame
if hasattr(native_frame, "__arrow_c_stream__"):
if supports_arrow_c_stream(native_frame):
return native_frame.__arrow_c_stream__(requested_schema=requested_schema)
try:
import pyarrow as pa # ignore-banned-import
Expand Down Expand Up @@ -587,8 +590,7 @@ def lazy(
)
raise ValueError(msg)
return self._lazyframe(
self._compliant_frame.lazy(backend=lazy_backend),
level="lazy",
self._compliant_frame.lazy(backend=lazy_backend), level="lazy"
)

def to_native(self: Self) -> DataFrameT:
Expand All @@ -612,7 +614,7 @@ def to_native(self: Self) -> DataFrameT:
1 2 7.0 b
2 3 8.0 c
"""
return self._compliant_frame._native_frame # type: ignore[no-any-return]
return self._compliant_frame._native_frame

def to_pandas(self: Self) -> pd.DataFrame:
"""Convert this DataFrame to a pandas DataFrame.
Expand All @@ -633,7 +635,7 @@ def to_pandas(self: Self) -> pd.DataFrame:
1 2 7.0 b
2 3 8.0 c
"""
return self._compliant_frame.to_pandas() # type: ignore[no-any-return]
return self._compliant_frame.to_pandas()

def to_polars(self: Self) -> pl.DataFrame:
"""Convert this DataFrame to a polars DataFrame.
Expand All @@ -657,7 +659,7 @@ def to_polars(self: Self) -> pl.DataFrame:
β”‚ 2 ┆ 7.0 β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜
"""
return self._compliant_frame.to_polars() # type: ignore[no-any-return]
return self._compliant_frame.to_polars()

@overload
def write_csv(self: Self, file: None = None) -> str: ...
Expand Down Expand Up @@ -688,7 +690,7 @@ def write_csv(self: Self, file: str | Path | BytesIO | None = None) -> str | Non
If we had passed a file name to `write_csv`, it would have been
written to that file.
"""
return self._compliant_frame.write_csv(file) # type: ignore[no-any-return]
return self._compliant_frame.write_csv(file)

def write_parquet(self: Self, file: str | Path | BytesIO) -> None:
"""Write dataframe to parquet file.
Expand Down Expand Up @@ -724,7 +726,7 @@ def to_numpy(self: Self) -> _2DArray:
array([[1. , 6.5],
[2. , 7. ]])
"""
return self._compliant_frame.to_numpy() # type: ignore[no-any-return]
return self._compliant_frame.to_numpy(None, copy=None)

@property
def shape(self: Self) -> tuple[int, int]:
Expand All @@ -741,7 +743,7 @@ def shape(self: Self) -> tuple[int, int]:
>>> df.shape
(2, 1)
"""
return self._compliant_frame.shape # type: ignore[no-any-return]
return self._compliant_frame.shape

def get_column(self: Self, name: str) -> Series[Any]:
"""Get a single column by name.
Expand Down Expand Up @@ -769,10 +771,7 @@ def get_column(self: Self, name: str) -> Series[Any]:
1 2
Name: a, dtype: int64
"""
return self._series(
self._compliant_frame.get_column(name),
level=self._level,
)
return self._series(self._compliant_frame.get_column(name), level=self._level)

def estimated_size(self: Self, unit: SizeUnit = "b") -> int | float:
"""Return an estimation of the total (heap) allocated size of the `DataFrame`.
Expand All @@ -794,7 +793,7 @@ def estimated_size(self: Self, unit: SizeUnit = "b") -> int | float:
>>> df.estimated_size()
32
"""
return self._compliant_frame.estimated_size(unit=unit) # type: ignore[no-any-return]
return self._compliant_frame.estimated_size(unit=unit)

@overload
def __getitem__( # type: ignore[overload-overlap]
Expand Down Expand Up @@ -950,15 +949,12 @@ def to_dict(
"""
if as_series:
return {
key: self._series(
value,
level=self._level,
)
key: self._series(value, level=self._level)
for key, value in self._compliant_frame.to_dict(
as_series=as_series
).items()
}
return self._compliant_frame.to_dict(as_series=as_series) # type: ignore[no-any-return]
return self._compliant_frame.to_dict(as_series=as_series)

def row(self: Self, index: int) -> tuple[Any, ...]:
"""Get values at given row.
Expand All @@ -984,7 +980,7 @@ def row(self: Self, index: int) -> tuple[Any, ...]:
>>> nw.from_native(df_native).row(1)
(<pyarrow.Int64Scalar: 2>, <pyarrow.Int64Scalar: 5>)
"""
return self._compliant_frame.row(index) # type: ignore[no-any-return]
return self._compliant_frame.row(index)

# inherited
def pipe(
Expand Down Expand Up @@ -1150,7 +1146,7 @@ def rows(
>>> nw.from_native(df_native).rows()
[(1, 6.0), (2, 7.0)]
"""
return self._compliant_frame.rows(named=named) # type: ignore[no-any-return]
return self._compliant_frame.rows(named=named) # type: ignore[return-value]

def iter_columns(self: Self) -> Iterator[Series[Any]]:
"""Returns an iterator over the columns of this DataFrame.
Expand Down Expand Up @@ -1227,7 +1223,7 @@ def iter_rows(
>>> next(iter_rows)
(2, 7.0)
"""
return self._compliant_frame.iter_rows(named=named, buffer_size=buffer_size) # type: ignore[no-any-return]
return self._compliant_frame.iter_rows(named=named, buffer_size=buffer_size) # type: ignore[return-value]

def with_columns(
self: Self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr
Expand Down Expand Up @@ -1434,9 +1430,7 @@ def unique(
if isinstance(subset, str):
subset = [subset]
return self._with_compliant(
self._compliant_frame.unique(
subset=subset, keep=keep, maintain_order=maintain_order
)
self._compliant_frame.unique(subset, keep=keep, maintain_order=maintain_order)
)

def filter(
Expand Down Expand Up @@ -1792,10 +1786,7 @@ def is_unique(self: Self) -> Series[Any]:
| dtype: bool |
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
return self._series(
self._compliant_frame.is_unique(),
level=self._level,
)
return self._series(self._compliant_frame.is_unique(), level=self._level)

def null_count(self: Self) -> Self:
r"""Create a new DataFrame that shows the null counts per column.
Expand Down Expand Up @@ -1988,7 +1979,7 @@ def to_arrow(self: Self) -> pa.Table:
foo: [[1,null]]
bar: [[2,3]]
"""
return self._compliant_frame.to_arrow() # type: ignore[no-any-return]
return self._compliant_frame.to_arrow()

def sample(
self: Self,
Expand Down Expand Up @@ -2186,15 +2177,11 @@ def _dataframe(self: Self) -> type[DataFrame[Any]]:
return DataFrame

def __init__(
self: Self,
df: Any,
*,
level: Literal["full", "lazy", "interchange"],
self: Self, df: Any, *, level: Literal["full", "lazy", "interchange"]
) -> None:
self._level = level
self._compliant_frame: CompliantLazyFrame[Any, FrameT] # type: ignore[type-var]
if is_compliant_lazyframe(df):
# NOTE: Blocked by (#2239)
# self._compliant_frame: CompliantLazyFrame[Any, FrameT] = df.__narwhals_lazyframe__() # noqa: ERA001
self._compliant_frame = df.__narwhals_lazyframe__()
else: # pragma: no cover
msg = f"Expected Polars LazyFrame or an object that implements `__narwhals_lazyframe__`, got: {type(df)}"
Expand All @@ -2220,7 +2207,7 @@ def implementation(self: Self) -> Implementation:
>>> nw.from_native(lf_native).implementation
<Implementation.DASK: 7>
"""
return self._compliant_frame._implementation # type: ignore[no-any-return]
return self._compliant_frame._implementation

def __getitem__(self: Self, item: str | slice) -> NoReturn:
msg = "Slicing is not supported on LazyFrame"
Expand Down
Loading
Loading