Skip to content
Merged
1 change: 1 addition & 0 deletions docs/api-reference/dataframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
- is_empty
- is_unique
- item
- iter_columns
- iter_rows
- join
- join_asof
Expand Down
11 changes: 11 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,17 @@ def rows(self: Self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, A
return list(self.iter_rows(named=False, buffer_size=512)) # type: ignore[return-value]
return self._native_frame.to_pylist()

def iter_columns(self) -> Iterator[ArrowSeries]:
from narwhals._arrow.series import ArrowSeries

for name, series in zip(self.columns, self._native_frame.itercolumns()):
yield ArrowSeries(
series,
name=name,
backend_version=self._backend_version,
version=self._version,
)

def iter_rows(
self: Self, *, named: bool, buffer_size: int
) -> Iterator[tuple[Any, ...]] | Iterator[dict[str, Any]]:
Expand Down
9 changes: 9 additions & 0 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,15 @@ def rows(self: Self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, A

return self._native_frame.to_dict(orient="records")

def iter_columns(self) -> Iterator[PandasLikeSeries]:
for _name, series in self._native_frame.items(): # noqa: PERF102
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See (#2064 (comment)) regarding false-positive (PERF102)

yield PandasLikeSeries(
series,
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
)

def iter_rows(
self: Self,
*,
Expand Down
9 changes: 9 additions & 0 deletions narwhals/_polars/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import TYPE_CHECKING
from typing import Any
from typing import Iterator
from typing import Literal
from typing import Sequence
from typing import overload
Expand Down Expand Up @@ -232,6 +233,14 @@ def get_column(self: Self, name: str) -> PolarsSeries:
version=self._version,
)

def iter_columns(self) -> Iterator[PolarsSeries]:
from narwhals._polars.series import PolarsSeries

for series in self._native_frame.iter_columns():
yield PolarsSeries(
series, backend_version=self._backend_version, version=self._version
)

@property
def columns(self: Self) -> list[str]:
return self._native_frame.columns
Expand Down
31 changes: 31 additions & 0 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,37 @@ def rows(
"""
return self._compliant_frame.rows(named=named) # type: ignore[no-any-return]

def iter_columns(self: Self) -> Iterator[Series[Any]]:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iter_columns and all other methods on DataFrame that include Series[Any] in their return type (e.g get_column) could benefit from a change I have in #2064

class CompliantDataFrame(Generic[CompliantSeriesT_co], Protocol):
def __narwhals_dataframe__(self) -> Self: ...
def __narwhals_namespace__(self) -> Any: ...
def simple_select(
self, *column_names: str
) -> Self: ... # `select` where all args are column names.
def aggregate(self, *exprs: Any) -> Self:
... # `select` where all args are aggregations or literals
# (so, no broadcasting is necessary).
@property
def columns(self) -> Sequence[str]: ...
@property
def schema(self) -> Mapping[str, DType]: ...
def get_column(self, name: str) -> CompliantSeriesT_co: ...

On the public side, it would mean adding:

# narwhals.typing.py
SeriesT_co = TypeVar("SeriesT_co", bound="Series[Any]", covariant=True)

And making changes like this:

# narwhals.dataframe.py
from narwhals.typing import SeriesT_co

class DataFrame(BaseFrame[DataFrameT], Generic[SeriesT_co]):
    def get_column(self: Self, name: str) -> SeriesT_co: ...
    def iter_columns(self: Self) -> Iterator[SeriesT_co]: ...
    ...

So then you'd have things like:

DataFrame[pd.DataFrame, pd.Series]
DataFrame[pl.DataFrame, pl.Series]
DataFrame[pa.Table, pa.ChunkedArray]

The link between DataFrame and Series is something I've explored some more in #2055 also

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the v1 backport merges - it might make this process easier.
We wouldn't have the complexity of stable/unstable Series not having a TypeVar free

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would ❀️ love ❀️ to have this

"""Returns an iterator over the columns of this DataFrame.

Yields:
A Narwhals Series, backed by a native series.

Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"foo": [1, 2], "bar": [6.0, 7.0]})
>>> iter_columns = nw.from_native(df_native).iter_columns()
>>> next(iter_columns)
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
| Narwhals Series |
|-----------------------|
|0 1 |
|1 2 |
|Name: foo, dtype: int64|
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
>>> next(iter_columns)
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
| Narwhals Series |
|-------------------------|
|0 6.0 |
|1 7.0 |
|Name: bar, dtype: float64|
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
for series in self._compliant_frame.iter_columns():
yield self._series(series, level=self._level)

@overload
def iter_rows(
self: Self, *, named: Literal[False], buffer_size: int = ...
Expand Down
10 changes: 10 additions & 0 deletions tests/frame/columns_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

if TYPE_CHECKING:
from tests.utils import Constructor
from tests.utils import ConstructorEager

data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]}


@pytest.mark.filterwarnings("ignore:Determining|Resolving.*")
Expand All @@ -17,3 +20,10 @@ def test_columns(constructor: Constructor) -> None:
result = df.columns
expected = ["a", "b", "z"]
assert result == expected


def test_iter_columns(constructor_eager: ConstructorEager) -> None:
df = nw.from_native(constructor_eager(data), eager_only=True)
expected = df.to_dict(as_series=True)
result = {series.name: series for series in df.iter_columns()}
assert result == expected
Comment on lines +25 to +29
Copy link
Member Author

@dangotbanned dangotbanned Feb 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I put the test here, following the lead of iter_rows:

def test_iter_rows(
request: Any,
constructor_eager: ConstructorEager,
named: bool, # noqa: FBT001
expected: list[tuple[Any, ...]] | list[dict[str, Any]],
) -> None:
if "cudf" in str(constructor_eager):
request.applymarker(pytest.mark.xfail)
data = {"a": [1, 3, 2], "_b": [4, 4, 6], "z": [7.0, 8.0, 9.0], "1": [5, 6, 7]}
df = nw.from_native(constructor_eager(data), eager_only=True)
result = list(df.iter_rows(named=named))
assert result == expected

Not sure if we want anything more complex?
Maybe some consideration for a roundtrip through pyarrow and make sure the name is preserved?

Loading