-
Notifications
You must be signed in to change notification settings - Fork 170
refactor: Simplify PandasLikeNamespace.concat
#2368
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
0062258
5030b47
31952b9
69f374e
b2c332f
4abd0a5
b6d0711
1fa9f8f
fa30706
657aef9
0aee5bf
4ba461f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -1,10 +1,13 @@ | ||||||||
| from __future__ import annotations | ||||||||
|
|
||||||||
| import operator | ||||||||
| import warnings | ||||||||
| from functools import reduce | ||||||||
| from typing import TYPE_CHECKING | ||||||||
| from typing import Any | ||||||||
| from typing import Iterable | ||||||||
| from typing import Literal | ||||||||
| from typing import Sequence | ||||||||
|
|
||||||||
| from narwhals._compliant import CompliantThen | ||||||||
| from narwhals._compliant import EagerNamespace | ||||||||
|
|
@@ -16,20 +19,21 @@ | |||||||
| from narwhals._pandas_like.selectors import PandasSelectorNamespace | ||||||||
| from narwhals._pandas_like.series import PandasLikeSeries | ||||||||
| from narwhals._pandas_like.utils import align_series_full_broadcast | ||||||||
| from narwhals._pandas_like.utils import diagonal_concat | ||||||||
| from narwhals._pandas_like.utils import horizontal_concat | ||||||||
| from narwhals._pandas_like.utils import vertical_concat | ||||||||
| from narwhals.utils import import_dtypes_module | ||||||||
|
|
||||||||
| if TYPE_CHECKING: | ||||||||
| import pandas as pd | ||||||||
| from typing_extensions import Self | ||||||||
|
|
||||||||
| from narwhals._pandas_like.typing import NDFrameT | ||||||||
| from narwhals.dtypes import DType | ||||||||
| from narwhals.typing import ConcatMethod | ||||||||
| from narwhals.utils import Implementation | ||||||||
| from narwhals.utils import Version | ||||||||
|
|
||||||||
| VERTICAL: Literal[0] = 0 | ||||||||
| HORIZONTAL: Literal[1] = 1 | ||||||||
|
|
||||||||
|
|
||||||||
| class PandasLikeNamespace( | ||||||||
| EagerNamespace[ | ||||||||
|
|
@@ -223,48 +227,66 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: | |||||||
| context=self, | ||||||||
| ) | ||||||||
|
|
||||||||
| @property | ||||||||
| def _concat(self): # type: ignore[no-untyped-def] # noqa: ANN202 | ||||||||
| """Return the **native** equivalent of `pd.concat`.""" | ||||||||
| # NOTE: Leave un-annotated to allow `@overload` matching via inference. | ||||||||
| if TYPE_CHECKING: | ||||||||
| import pandas as pd | ||||||||
|
|
||||||||
| return pd.concat | ||||||||
| return self._implementation.to_native_namespace().concat | ||||||||
|
|
||||||||
| def _concat_diagonal(self, dfs: Sequence[NDFrameT], /) -> pd.DataFrame: | ||||||||
| if self._implementation.is_pandas() and self._backend_version < (3,): | ||||||||
| if self._backend_version < (1,): | ||||||||
| return self._concat(dfs, axis=VERTICAL, copy=False, sort=False) | ||||||||
| return self._concat(dfs, axis=VERTICAL, copy=False) | ||||||||
| return self._concat(dfs, axis=VERTICAL) | ||||||||
|
Comment on lines
+241
to
+245
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @MarcoGorelli should I just copied it over, but they have a subtle difference: narwhals/narwhals/_pandas_like/namespace.py Lines 273 to 275 in 657aef9
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. probably fine for now, perhaps let's make an issue and revisit? |
||||||||
|
|
||||||||
| def _concat_horizontal(self, dfs: Sequence[NDFrameT], /) -> pd.DataFrame: | ||||||||
| if self._implementation.is_cudf(): | ||||||||
| with warnings.catch_warnings(): | ||||||||
| warnings.filterwarnings( | ||||||||
| "ignore", | ||||||||
| message="The behavior of array concatenation with empty entries is deprecated", | ||||||||
| category=FutureWarning, | ||||||||
| ) | ||||||||
| return self._concat(dfs, axis=HORIZONTAL) | ||||||||
| elif self._implementation.is_pandas() and self._backend_version < (3,): | ||||||||
| return self._concat(dfs, axis=HORIZONTAL, copy=False) | ||||||||
| return self._concat(dfs, axis=HORIZONTAL) | ||||||||
|
|
||||||||
| def _concat_vertical(self, dfs: Sequence[pd.DataFrame], /) -> pd.DataFrame: | ||||||||
| cols_0 = dfs[0].columns | ||||||||
| for i, df in enumerate(dfs[1:], start=1): | ||||||||
| cols_current = df.columns | ||||||||
| if not ( | ||||||||
| (len(cols_current) == len(cols_0)) and (cols_current == cols_0).all() | ||||||||
| ): | ||||||||
| msg = ( | ||||||||
| "unable to vstack, column names don't match:\n" | ||||||||
| f" - dataframe 0: {cols_0.to_list()}\n" | ||||||||
| f" - dataframe {i}: {cols_current.to_list()}\n" | ||||||||
| ) | ||||||||
| raise TypeError(msg) | ||||||||
| if self._implementation.is_pandas() and self._backend_version < (3,): | ||||||||
| return self._concat(dfs, axis=VERTICAL, copy=False) | ||||||||
| return self._concat(dfs, axis=VERTICAL) | ||||||||
|
|
||||||||
| def concat( | ||||||||
| self, items: Iterable[PandasLikeDataFrame], *, how: ConcatMethod | ||||||||
| ) -> PandasLikeDataFrame: | ||||||||
| dfs: list[Any] = [item._native_frame for item in items] | ||||||||
| dfs: list[pd.DataFrame] = [item.native for item in items] | ||||||||
| if how == "horizontal": | ||||||||
| return PandasLikeDataFrame( | ||||||||
| horizontal_concat( | ||||||||
| dfs, | ||||||||
| implementation=self._implementation, | ||||||||
| backend_version=self._backend_version, | ||||||||
| ), | ||||||||
| implementation=self._implementation, | ||||||||
| backend_version=self._backend_version, | ||||||||
| version=self._version, | ||||||||
| validate_column_names=True, | ||||||||
| ) | ||||||||
| if how == "vertical": | ||||||||
| return PandasLikeDataFrame( | ||||||||
| vertical_concat( | ||||||||
| dfs, | ||||||||
| implementation=self._implementation, | ||||||||
| backend_version=self._backend_version, | ||||||||
| ), | ||||||||
| implementation=self._implementation, | ||||||||
| backend_version=self._backend_version, | ||||||||
| version=self._version, | ||||||||
| validate_column_names=True, | ||||||||
| ) | ||||||||
|
|
||||||||
| if how == "diagonal": | ||||||||
| return PandasLikeDataFrame( | ||||||||
| diagonal_concat( | ||||||||
| dfs, | ||||||||
| implementation=self._implementation, | ||||||||
| backend_version=self._backend_version, | ||||||||
| ), | ||||||||
| implementation=self._implementation, | ||||||||
| backend_version=self._backend_version, | ||||||||
| version=self._version, | ||||||||
| validate_column_names=True, | ||||||||
| ) | ||||||||
| raise NotImplementedError | ||||||||
| native = self._concat_horizontal(dfs) | ||||||||
| elif how == "vertical": | ||||||||
| native = self._concat_vertical(dfs) | ||||||||
| elif how == "diagonal": | ||||||||
| native = self._concat_diagonal(dfs) | ||||||||
| else: | ||||||||
| raise NotImplementedError | ||||||||
| return self._dataframe.from_native(native, context=self) | ||||||||
|
|
||||||||
| def when(self: Self, predicate: PandasLikeExpr) -> PandasWhen: | ||||||||
| return PandasWhen.from_expr(predicate, context=self) | ||||||||
|
|
||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,17 +1,16 @@ | ||
| from __future__ import annotations # pragma: no cover | ||
|
|
||
| from typing import TYPE_CHECKING # pragma: no cover | ||
| from typing import Union # pragma: no cover | ||
|
|
||
| if TYPE_CHECKING: | ||
| import sys | ||
| from typing import Any | ||
| from typing import TypeVar | ||
|
|
||
| if sys.version_info >= (3, 10): | ||
| from typing import TypeAlias | ||
| else: | ||
| from typing_extensions import TypeAlias | ||
| import pandas as pd | ||
| from typing_extensions import TypeAlias | ||
|
|
||
| from narwhals._pandas_like.expr import PandasLikeExpr | ||
| from narwhals._pandas_like.series import PandasLikeSeries | ||
|
|
||
| IntoPandasLikeExpr: TypeAlias = Union[PandasLikeExpr, PandasLikeSeries] | ||
| IntoPandasLikeExpr: TypeAlias = "PandasLikeExpr | PandasLikeSeries" | ||
| NDFrameT = TypeVar("NDFrameT", "pd.DataFrame", "pd.Series[Any]") |
Uh oh!
There was an error while loading. Please reload this page.