diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 519307681e..a3f8ecc964 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -21,7 +21,6 @@ from narwhals._pandas_like.utils import check_column_names_are_unique from narwhals._pandas_like.utils import convert_str_slice_to_int_slice from narwhals._pandas_like.utils import get_dtype_backend -from narwhals._pandas_like.utils import horizontal_concat from narwhals._pandas_like.utils import native_to_narwhals_dtype from narwhals._pandas_like.utils import object_native_to_narwhals_dtype from narwhals._pandas_like.utils import pivot_table @@ -504,11 +503,8 @@ def select(self: PandasLikeDataFrame, *exprs: PandasLikeExpr) -> PandasLikeDataF # return empty dataframe, like Polars does return self._with_native(self.native.__class__(), validate_column_names=False) new_series = align_series_full_broadcast(*new_series) - df = horizontal_concat( - [s.native for s in new_series], - implementation=self._implementation, - backend_version=self._backend_version, - ) + namespace = self.__narwhals_namespace__() + df = namespace._concat_horizontal([s.native for s in new_series]) return self._with_native(df, validate_column_names=True) def drop_nulls( @@ -531,13 +527,7 @@ def with_row_index(self: Self, name: str) -> Self: row_index = namespace._series.from_iterable( range(len(frame)), context=self, index=frame.index ).alias(name) - return self._with_native( - horizontal_concat( - [row_index.native, frame], - implementation=self._implementation, - backend_version=self._backend_version, - ) - ) + return self._with_native(namespace._concat_horizontal([row_index.native, frame])) def row(self: Self, index: int) -> tuple[Any, ...]: return tuple(x for x in self.native.iloc[index]) @@ -571,11 +561,8 @@ def with_columns( series = self.native[name] to_concat.append(series) to_concat.extend(self._extract_comparand(s) for s in name_columns.values()) - df = horizontal_concat( - to_concat, - implementation=self._implementation, - backend_version=self._backend_version, - ) + namespace = self.__narwhals_namespace__() + df = namespace._concat_horizontal(to_concat) return self._with_native(df, validate_column_names=False) def rename(self: Self, mapping: Mapping[str, str]) -> Self: diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py index bf633b5cd9..fd3e71ab9a 100644 --- a/narwhals/_pandas_like/group_by.py +++ b/narwhals/_pandas_like/group_by.py @@ -11,7 +11,6 @@ from narwhals._compliant import EagerGroupBy from narwhals._expression_parsing import evaluate_output_names_and_aliases -from narwhals._pandas_like.utils import horizontal_concat from narwhals._pandas_like.utils import select_columns_by_name from narwhals._pandas_like.utils import set_columns from narwhals.utils import find_stacklevel @@ -233,11 +232,8 @@ def agg(self: Self, *exprs: PandasLikeExpr) -> PandasLikeDataFrame: # noqa: PLR pass msg = f"Expected unique output names, got:{msg}" raise ValueError(msg) - result = horizontal_concat( - dfs=result_aggs, - implementation=implementation, - backend_version=backend_version, - ) + namespace = self.compliant.__narwhals_namespace__() + result = namespace._concat_horizontal(result_aggs) else: # No aggregation provided result = self.compliant.__native_namespace__().DataFrame( diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 9553edc1f9..a7cf2a8e81 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -1,10 +1,13 @@ from __future__ import annotations import operator +import warnings from functools import reduce from typing import TYPE_CHECKING from typing import Any from typing import Iterable +from typing import Literal +from typing import Sequence from narwhals._compliant import CompliantThen from narwhals._compliant import EagerNamespace @@ -16,20 +19,21 @@ from narwhals._pandas_like.selectors import PandasSelectorNamespace from narwhals._pandas_like.series import PandasLikeSeries from narwhals._pandas_like.utils import align_series_full_broadcast -from narwhals._pandas_like.utils import diagonal_concat -from narwhals._pandas_like.utils import horizontal_concat -from narwhals._pandas_like.utils import vertical_concat from narwhals.utils import import_dtypes_module if TYPE_CHECKING: import pandas as pd from typing_extensions import Self + from narwhals._pandas_like.typing import NDFrameT from narwhals.dtypes import DType from narwhals.typing import ConcatMethod from narwhals.utils import Implementation from narwhals.utils import Version +VERTICAL: Literal[0] = 0 +HORIZONTAL: Literal[1] = 1 + class PandasLikeNamespace( EagerNamespace[ @@ -223,48 +227,66 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: context=self, ) + @property + def _concat(self): # type: ignore[no-untyped-def] # noqa: ANN202 + """Return the **native** equivalent of `pd.concat`.""" + # NOTE: Leave un-annotated to allow `@overload` matching via inference. + if TYPE_CHECKING: + import pandas as pd + + return pd.concat + return self._implementation.to_native_namespace().concat + + def _concat_diagonal(self, dfs: Sequence[pd.DataFrame], /) -> pd.DataFrame: + if self._implementation.is_pandas() and self._backend_version < (3,): + if self._backend_version < (1,): + return self._concat(dfs, axis=VERTICAL, copy=False, sort=False) + return self._concat(dfs, axis=VERTICAL, copy=False) + return self._concat(dfs, axis=VERTICAL) + + def _concat_horizontal(self, dfs: Sequence[NDFrameT], /) -> pd.DataFrame: + if self._implementation.is_cudf(): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="The behavior of array concatenation with empty entries is deprecated", + category=FutureWarning, + ) + return self._concat(dfs, axis=HORIZONTAL) + elif self._implementation.is_pandas() and self._backend_version < (3,): + return self._concat(dfs, axis=HORIZONTAL, copy=False) + return self._concat(dfs, axis=HORIZONTAL) + + def _concat_vertical(self, dfs: Sequence[pd.DataFrame], /) -> pd.DataFrame: + cols_0 = dfs[0].columns + for i, df in enumerate(dfs[1:], start=1): + cols_current = df.columns + if not ( + (len(cols_current) == len(cols_0)) and (cols_current == cols_0).all() + ): + msg = ( + "unable to vstack, column names don't match:\n" + f" - dataframe 0: {cols_0.to_list()}\n" + f" - dataframe {i}: {cols_current.to_list()}\n" + ) + raise TypeError(msg) + if self._implementation.is_pandas() and self._backend_version < (3,): + return self._concat(dfs, axis=VERTICAL, copy=False) + return self._concat(dfs, axis=VERTICAL) + def concat( self, items: Iterable[PandasLikeDataFrame], *, how: ConcatMethod ) -> PandasLikeDataFrame: - dfs: list[Any] = [item._native_frame for item in items] + dfs: list[pd.DataFrame] = [item.native for item in items] if how == "horizontal": - return PandasLikeDataFrame( - horizontal_concat( - dfs, - implementation=self._implementation, - backend_version=self._backend_version, - ), - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, - validate_column_names=True, - ) - if how == "vertical": - return PandasLikeDataFrame( - vertical_concat( - dfs, - implementation=self._implementation, - backend_version=self._backend_version, - ), - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, - validate_column_names=True, - ) - - if how == "diagonal": - return PandasLikeDataFrame( - diagonal_concat( - dfs, - implementation=self._implementation, - backend_version=self._backend_version, - ), - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, - validate_column_names=True, - ) - raise NotImplementedError + native = self._concat_horizontal(dfs) + elif how == "vertical": + native = self._concat_vertical(dfs) + elif how == "diagonal": + native = self._concat_diagonal(dfs) + else: + raise NotImplementedError + return self._dataframe.from_native(native, context=self) def when(self: Self, predicate: PandasLikeExpr) -> PandasWhen: return PandasWhen.from_expr(predicate, context=self) diff --git a/narwhals/_pandas_like/typing.py b/narwhals/_pandas_like/typing.py index 9def250676..a6eb160a00 100644 --- a/narwhals/_pandas_like/typing.py +++ b/narwhals/_pandas_like/typing.py @@ -1,17 +1,16 @@ from __future__ import annotations # pragma: no cover from typing import TYPE_CHECKING # pragma: no cover -from typing import Union # pragma: no cover if TYPE_CHECKING: - import sys + from typing import Any + from typing import TypeVar - if sys.version_info >= (3, 10): - from typing import TypeAlias - else: - from typing_extensions import TypeAlias + import pandas as pd + from typing_extensions import TypeAlias from narwhals._pandas_like.expr import PandasLikeExpr from narwhals._pandas_like.series import PandasLikeSeries - IntoPandasLikeExpr: TypeAlias = Union[PandasLikeExpr, PandasLikeSeries] + IntoPandasLikeExpr: TypeAlias = "PandasLikeExpr | PandasLikeSeries" + NDFrameT = TypeVar("NDFrameT", "pd.DataFrame", "pd.Series[Any]") diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 6c7c0d1db5..aea4a34a3a 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -2,7 +2,6 @@ import functools import re -import warnings from contextlib import suppress from typing import TYPE_CHECKING from typing import Any @@ -130,95 +129,6 @@ def align_and_extract_native( return lhs.native, rhs -def horizontal_concat( - dfs: list[Any], *, implementation: Implementation, backend_version: tuple[int, ...] -) -> Any: - """Concatenate (native) DataFrames horizontally. - - Should be in namespace. - """ - if implementation is Implementation.CUDF: - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message="The behavior of array concatenation with empty entries is deprecated", - category=FutureWarning, - ) - return implementation.to_native_namespace().concat(dfs, axis=1) - - if implementation.is_pandas_like(): - extra_kwargs = ( - {"copy": False} - if implementation is Implementation.PANDAS and backend_version < (3,) - else {} - ) - return implementation.to_native_namespace().concat(dfs, axis=1, **extra_kwargs) - - else: # pragma: no cover - msg = f"Expected pandas-like implementation ({PANDAS_LIKE_IMPLEMENTATION}), found {implementation}" - raise TypeError(msg) - - -def vertical_concat( - dfs: list[Any], *, implementation: Implementation, backend_version: tuple[int, ...] -) -> Any: - """Concatenate (native) DataFrames vertically. - - Should be in namespace. - """ - if not dfs: - msg = "No dataframes to concatenate" # pragma: no cover - raise AssertionError(msg) - cols_0 = dfs[0].columns - for i, df in enumerate(dfs[1:], start=1): - cols_current = df.columns - if not ((len(cols_current) == len(cols_0)) and (cols_current == cols_0).all()): - msg = ( - "unable to vstack, column names don't match:\n" - f" - dataframe 0: {cols_0.to_list()}\n" - f" - dataframe {i}: {cols_current.to_list()}\n" - ) - raise TypeError(msg) - - if implementation in PANDAS_LIKE_IMPLEMENTATION: - extra_kwargs = ( - {"copy": False} - if implementation is Implementation.PANDAS and backend_version < (3,) - else {} - ) - return implementation.to_native_namespace().concat(dfs, axis=0, **extra_kwargs) - - else: # pragma: no cover - msg = f"Expected pandas-like implementation ({PANDAS_LIKE_IMPLEMENTATION}), found {implementation}" - raise TypeError(msg) - - -def diagonal_concat( - dfs: list[Any], *, implementation: Implementation, backend_version: tuple[int, ...] -) -> Any: - """Concatenate (native) DataFrames diagonally. - - Should be in namespace. - """ - if not dfs: - msg = "No dataframes to concatenate" # pragma: no cover - raise AssertionError(msg) - - if implementation in PANDAS_LIKE_IMPLEMENTATION: - extra_kwargs = ( - {"copy": False, "sort": False} - if implementation is Implementation.PANDAS and backend_version < (1,) - else {"copy": False} - if implementation is Implementation.PANDAS and backend_version < (3,) - else {} - ) - return implementation.to_native_namespace().concat(dfs, axis=0, **extra_kwargs) - - else: # pragma: no cover - msg = f"Expected pandas-like implementation ({PANDAS_LIKE_IMPLEMENTATION}), found {implementation}" - raise TypeError(msg) - - def set_index( obj: T, index: Any,