Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 5 additions & 18 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from narwhals._pandas_like.utils import check_column_names_are_unique
from narwhals._pandas_like.utils import convert_str_slice_to_int_slice
from narwhals._pandas_like.utils import get_dtype_backend
from narwhals._pandas_like.utils import horizontal_concat
from narwhals._pandas_like.utils import native_to_narwhals_dtype
from narwhals._pandas_like.utils import object_native_to_narwhals_dtype
from narwhals._pandas_like.utils import pivot_table
Expand Down Expand Up @@ -504,11 +503,8 @@ def select(self: PandasLikeDataFrame, *exprs: PandasLikeExpr) -> PandasLikeDataF
# return empty dataframe, like Polars does
return self._with_native(self.native.__class__(), validate_column_names=False)
new_series = align_series_full_broadcast(*new_series)
df = horizontal_concat(
[s.native for s in new_series],
implementation=self._implementation,
backend_version=self._backend_version,
)
namespace = self.__narwhals_namespace__()
df = namespace._concat_horizontal([s.native for s in new_series])
return self._with_native(df, validate_column_names=True)

def drop_nulls(
Expand All @@ -531,13 +527,7 @@ def with_row_index(self: Self, name: str) -> Self:
row_index = namespace._series.from_iterable(
range(len(frame)), context=self, index=frame.index
).alias(name)
return self._with_native(
horizontal_concat(
[row_index.native, frame],
implementation=self._implementation,
backend_version=self._backend_version,
)
)
return self._with_native(namespace._concat_horizontal([row_index.native, frame]))

def row(self: Self, index: int) -> tuple[Any, ...]:
return tuple(x for x in self.native.iloc[index])
Expand Down Expand Up @@ -571,11 +561,8 @@ def with_columns(
series = self.native[name]
to_concat.append(series)
to_concat.extend(self._extract_comparand(s) for s in name_columns.values())
df = horizontal_concat(
to_concat,
implementation=self._implementation,
backend_version=self._backend_version,
)
namespace = self.__narwhals_namespace__()
df = namespace._concat_horizontal(to_concat)
return self._with_native(df, validate_column_names=False)

def rename(self: Self, mapping: Mapping[str, str]) -> Self:
Expand Down
8 changes: 2 additions & 6 deletions narwhals/_pandas_like/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

from narwhals._compliant import EagerGroupBy
from narwhals._expression_parsing import evaluate_output_names_and_aliases
from narwhals._pandas_like.utils import horizontal_concat
from narwhals._pandas_like.utils import select_columns_by_name
from narwhals._pandas_like.utils import set_columns
from narwhals.utils import find_stacklevel
Expand Down Expand Up @@ -233,11 +232,8 @@ def agg(self: Self, *exprs: PandasLikeExpr) -> PandasLikeDataFrame: # noqa: PLR
pass
msg = f"Expected unique output names, got:{msg}"
raise ValueError(msg)
result = horizontal_concat(
dfs=result_aggs,
implementation=implementation,
backend_version=backend_version,
)
namespace = self.compliant.__narwhals_namespace__()
result = namespace._concat_horizontal(result_aggs)
else:
# No aggregation provided
result = self.compliant.__native_namespace__().DataFrame(
Expand Down
96 changes: 55 additions & 41 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from __future__ import annotations

import operator
import warnings
from functools import reduce
from typing import TYPE_CHECKING
from typing import Any
from typing import Iterable
from typing import Sequence

from narwhals._compliant import CompliantThen
from narwhals._compliant import EagerNamespace
Expand All @@ -16,15 +18,13 @@
from narwhals._pandas_like.selectors import PandasSelectorNamespace
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals._pandas_like.utils import align_series_full_broadcast
from narwhals._pandas_like.utils import diagonal_concat
from narwhals._pandas_like.utils import horizontal_concat
from narwhals._pandas_like.utils import vertical_concat
from narwhals.utils import import_dtypes_module

if TYPE_CHECKING:
import pandas as pd
from typing_extensions import Self

from narwhals._pandas_like.typing import NDFrameT
from narwhals.dtypes import DType
from narwhals.typing import ConcatMethod
from narwhals.utils import Implementation
Expand Down Expand Up @@ -223,48 +223,62 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
context=self,
)

def _concat_diagonal(self, dfs: Sequence[NDFrameT], /) -> NDFrameT:
"""Concatenate (native) DataFrames diagonally."""
concat = self._implementation.to_native_namespace().concat
if self._implementation.is_pandas() and self._backend_version < (3,):
if self._backend_version < (1,):
return concat(dfs, axis=0, copy=False, sort=False)
return concat(dfs, axis=0, copy=False)
return concat(dfs, axis=0)

def _concat_horizontal(self, dfs: Sequence[NDFrameT], /) -> NDFrameT:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't the output type always be pd.DataFrame regardless of input in dfs?

Suggested change
def _concat_horizontal(self, dfs: Sequence[NDFrameT], /) -> NDFrameT:
def _concat_horizontal(self, dfs: Sequence[NDFrameT], /) -> pd.DataFrame:

Here a snippet:

import pandas as pd

s1 = pd.Series([1, 2, 3])
s2 = pd.Series(["a", "b", "c"])

type(pd.concat([s1], axis=1))
# pandas.core.frame.DataFrame

type(pd.concat([s1, s2], axis=1))
# pandas.core.frame.DataFrame

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well spotted!

So I added that type for horizontal, which gets used with a Series as well (IIRC in group_by maybe?).

vertical used some property that is only available on DataFrame, so I had to make that one narrower.

diagonal (I think) should work with either type, but is currently only used for DataFrame.

Replying from my phone, hope that all makes sense πŸ˜…

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see what you mean now!

Copy link
Member Author

@dangotbanned dangotbanned Apr 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#2368 (comment)
@FBruzzesi I think you'd also need to change the type of dfs to be:

Sequence[pd.DataFrame] | Sequence[pd.Series[Any]]

If you kept the TypeVar in pyright will tell you off πŸ˜‰

Edit: I forgot about this new thing I learned πŸ€¦β€β™‚οΈ #2283 (comment)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@FBruzzesi quite happy with this now (b6d0711)

Thanks for the pandas 🧠

image

image

image

"""Concatenate (native) DataFrames horizontally."""
concat = self._implementation.to_native_namespace().concat
if self._implementation.is_cudf():
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message="The behavior of array concatenation with empty entries is deprecated",
category=FutureWarning,
)
return concat(dfs, axis=1)
elif self._implementation.is_pandas() and self._backend_version < (3,):
return concat(dfs, axis=1, copy=False)
return concat(dfs, axis=1)

def _concat_vertical(self, dfs: Sequence[pd.DataFrame], /) -> pd.DataFrame:
"""Concatenate (native) DataFrames vertically."""
concat = self._implementation.to_native_namespace().concat
cols_0 = dfs[0].columns
for i, df in enumerate(dfs[1:], start=1):
cols_current = df.columns
if not (
(len(cols_current) == len(cols_0)) and (cols_current == cols_0).all()
):
msg = (
"unable to vstack, column names don't match:\n"
f" - dataframe 0: {cols_0.to_list()}\n"
f" - dataframe {i}: {cols_current.to_list()}\n"
)
raise TypeError(msg)
if self._implementation.is_pandas() and self._backend_version < (3,):
return concat(dfs, axis=0, copy=False)
return concat(dfs, axis=0)

def concat(
self, items: Iterable[PandasLikeDataFrame], *, how: ConcatMethod
) -> PandasLikeDataFrame:
dfs: list[Any] = [item._native_frame for item in items]
dfs: list[pd.DataFrame] = [item.native for item in items]
if how == "horizontal":
return PandasLikeDataFrame(
horizontal_concat(
dfs,
implementation=self._implementation,
backend_version=self._backend_version,
),
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
validate_column_names=True,
)
if how == "vertical":
return PandasLikeDataFrame(
vertical_concat(
dfs,
implementation=self._implementation,
backend_version=self._backend_version,
),
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
validate_column_names=True,
)

if how == "diagonal":
return PandasLikeDataFrame(
diagonal_concat(
dfs,
implementation=self._implementation,
backend_version=self._backend_version,
),
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
validate_column_names=True,
)
raise NotImplementedError
native = self._concat_horizontal(dfs)
elif how == "vertical":
native = self._concat_vertical(dfs)
elif how == "diagonal":
native = self._concat_diagonal(dfs)
else:
raise NotImplementedError
return PandasLikeDataFrame.from_native(native, context=self)

def when(self: Self, predicate: PandasLikeExpr) -> PandasWhen:
return PandasWhen.from_expr(predicate, context=self)
Expand Down
13 changes: 6 additions & 7 deletions narwhals/_pandas_like/typing.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
from __future__ import annotations # pragma: no cover

from typing import TYPE_CHECKING # pragma: no cover
from typing import Union # pragma: no cover

if TYPE_CHECKING:
import sys
from typing import Any
from typing import TypeVar

if sys.version_info >= (3, 10):
from typing import TypeAlias
else:
from typing_extensions import TypeAlias
import pandas as pd
from typing_extensions import TypeAlias

from narwhals._pandas_like.expr import PandasLikeExpr
from narwhals._pandas_like.series import PandasLikeSeries

IntoPandasLikeExpr: TypeAlias = Union[PandasLikeExpr, PandasLikeSeries]
IntoPandasLikeExpr: TypeAlias = "PandasLikeExpr | PandasLikeSeries"
NDFrameT = TypeVar("NDFrameT", "pd.DataFrame", "pd.Series[Any]")
90 changes: 0 additions & 90 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import functools
import re
import warnings
from contextlib import suppress
from typing import TYPE_CHECKING
from typing import Any
Expand Down Expand Up @@ -130,95 +129,6 @@ def align_and_extract_native(
return lhs.native, rhs


def horizontal_concat(
dfs: list[Any], *, implementation: Implementation, backend_version: tuple[int, ...]
) -> Any:
"""Concatenate (native) DataFrames horizontally.

Should be in namespace.
"""
if implementation is Implementation.CUDF:
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message="The behavior of array concatenation with empty entries is deprecated",
category=FutureWarning,
)
return implementation.to_native_namespace().concat(dfs, axis=1)

if implementation.is_pandas_like():
extra_kwargs = (
{"copy": False}
if implementation is Implementation.PANDAS and backend_version < (3,)
else {}
)
return implementation.to_native_namespace().concat(dfs, axis=1, **extra_kwargs)

else: # pragma: no cover
msg = f"Expected pandas-like implementation ({PANDAS_LIKE_IMPLEMENTATION}), found {implementation}"
raise TypeError(msg)


def vertical_concat(
dfs: list[Any], *, implementation: Implementation, backend_version: tuple[int, ...]
) -> Any:
"""Concatenate (native) DataFrames vertically.

Should be in namespace.
"""
if not dfs:
msg = "No dataframes to concatenate" # pragma: no cover
raise AssertionError(msg)
cols_0 = dfs[0].columns
for i, df in enumerate(dfs[1:], start=1):
cols_current = df.columns
if not ((len(cols_current) == len(cols_0)) and (cols_current == cols_0).all()):
msg = (
"unable to vstack, column names don't match:\n"
f" - dataframe 0: {cols_0.to_list()}\n"
f" - dataframe {i}: {cols_current.to_list()}\n"
)
raise TypeError(msg)

if implementation in PANDAS_LIKE_IMPLEMENTATION:
extra_kwargs = (
{"copy": False}
if implementation is Implementation.PANDAS and backend_version < (3,)
else {}
)
return implementation.to_native_namespace().concat(dfs, axis=0, **extra_kwargs)

else: # pragma: no cover
msg = f"Expected pandas-like implementation ({PANDAS_LIKE_IMPLEMENTATION}), found {implementation}"
raise TypeError(msg)


def diagonal_concat(
dfs: list[Any], *, implementation: Implementation, backend_version: tuple[int, ...]
) -> Any:
"""Concatenate (native) DataFrames diagonally.

Should be in namespace.
"""
if not dfs:
msg = "No dataframes to concatenate" # pragma: no cover
raise AssertionError(msg)

if implementation in PANDAS_LIKE_IMPLEMENTATION:
extra_kwargs = (
{"copy": False, "sort": False}
if implementation is Implementation.PANDAS and backend_version < (1,)
else {"copy": False}
if implementation is Implementation.PANDAS and backend_version < (3,)
else {}
)
return implementation.to_native_namespace().concat(dfs, axis=0, **extra_kwargs)

else: # pragma: no cover
msg = f"Expected pandas-like implementation ({PANDAS_LIKE_IMPLEMENTATION}), found {implementation}"
raise TypeError(msg)


def set_index(
obj: T,
index: Any,
Expand Down
Loading