|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | 3 | import operator |
| 4 | +import warnings |
4 | 5 | from functools import reduce |
5 | 6 | from typing import TYPE_CHECKING |
6 | 7 | from typing import Any |
7 | 8 | from typing import Iterable |
| 9 | +from typing import Literal |
| 10 | +from typing import Sequence |
8 | 11 |
|
9 | 12 | from narwhals._compliant import CompliantThen |
10 | 13 | from narwhals._compliant import EagerNamespace |
|
16 | 19 | from narwhals._pandas_like.selectors import PandasSelectorNamespace |
17 | 20 | from narwhals._pandas_like.series import PandasLikeSeries |
18 | 21 | from narwhals._pandas_like.utils import align_series_full_broadcast |
19 | | -from narwhals._pandas_like.utils import diagonal_concat |
20 | | -from narwhals._pandas_like.utils import horizontal_concat |
21 | | -from narwhals._pandas_like.utils import vertical_concat |
22 | 22 | from narwhals.utils import import_dtypes_module |
23 | 23 |
|
24 | 24 | if TYPE_CHECKING: |
25 | 25 | import pandas as pd |
26 | 26 | from typing_extensions import Self |
27 | 27 |
|
| 28 | + from narwhals._pandas_like.typing import NDFrameT |
28 | 29 | from narwhals.dtypes import DType |
29 | 30 | from narwhals.typing import ConcatMethod |
30 | 31 | from narwhals.utils import Implementation |
31 | 32 | from narwhals.utils import Version |
32 | 33 |
|
| 34 | +VERTICAL: Literal[0] = 0 |
| 35 | +HORIZONTAL: Literal[1] = 1 |
| 36 | + |
33 | 37 |
|
34 | 38 | class PandasLikeNamespace( |
35 | 39 | EagerNamespace[ |
@@ -223,48 +227,66 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: |
223 | 227 | context=self, |
224 | 228 | ) |
225 | 229 |
|
| 230 | + @property |
| 231 | + def _concat(self): # type: ignore[no-untyped-def] # noqa: ANN202 |
| 232 | + """Return the **native** equivalent of `pd.concat`.""" |
| 233 | + # NOTE: Leave un-annotated to allow `@overload` matching via inference. |
| 234 | + if TYPE_CHECKING: |
| 235 | + import pandas as pd |
| 236 | + |
| 237 | + return pd.concat |
| 238 | + return self._implementation.to_native_namespace().concat |
| 239 | + |
| 240 | + def _concat_diagonal(self, dfs: Sequence[pd.DataFrame], /) -> pd.DataFrame: |
| 241 | + if self._implementation.is_pandas() and self._backend_version < (3,): |
| 242 | + if self._backend_version < (1,): |
| 243 | + return self._concat(dfs, axis=VERTICAL, copy=False, sort=False) |
| 244 | + return self._concat(dfs, axis=VERTICAL, copy=False) |
| 245 | + return self._concat(dfs, axis=VERTICAL) |
| 246 | + |
| 247 | + def _concat_horizontal(self, dfs: Sequence[NDFrameT], /) -> pd.DataFrame: |
| 248 | + if self._implementation.is_cudf(): |
| 249 | + with warnings.catch_warnings(): |
| 250 | + warnings.filterwarnings( |
| 251 | + "ignore", |
| 252 | + message="The behavior of array concatenation with empty entries is deprecated", |
| 253 | + category=FutureWarning, |
| 254 | + ) |
| 255 | + return self._concat(dfs, axis=HORIZONTAL) |
| 256 | + elif self._implementation.is_pandas() and self._backend_version < (3,): |
| 257 | + return self._concat(dfs, axis=HORIZONTAL, copy=False) |
| 258 | + return self._concat(dfs, axis=HORIZONTAL) |
| 259 | + |
| 260 | + def _concat_vertical(self, dfs: Sequence[pd.DataFrame], /) -> pd.DataFrame: |
| 261 | + cols_0 = dfs[0].columns |
| 262 | + for i, df in enumerate(dfs[1:], start=1): |
| 263 | + cols_current = df.columns |
| 264 | + if not ( |
| 265 | + (len(cols_current) == len(cols_0)) and (cols_current == cols_0).all() |
| 266 | + ): |
| 267 | + msg = ( |
| 268 | + "unable to vstack, column names don't match:\n" |
| 269 | + f" - dataframe 0: {cols_0.to_list()}\n" |
| 270 | + f" - dataframe {i}: {cols_current.to_list()}\n" |
| 271 | + ) |
| 272 | + raise TypeError(msg) |
| 273 | + if self._implementation.is_pandas() and self._backend_version < (3,): |
| 274 | + return self._concat(dfs, axis=VERTICAL, copy=False) |
| 275 | + return self._concat(dfs, axis=VERTICAL) |
| 276 | + |
226 | 277 | def concat( |
227 | 278 | self, items: Iterable[PandasLikeDataFrame], *, how: ConcatMethod |
228 | 279 | ) -> PandasLikeDataFrame: |
229 | | - dfs: list[Any] = [item._native_frame for item in items] |
| 280 | + dfs: list[pd.DataFrame] = [item.native for item in items] |
230 | 281 | if how == "horizontal": |
231 | | - return PandasLikeDataFrame( |
232 | | - horizontal_concat( |
233 | | - dfs, |
234 | | - implementation=self._implementation, |
235 | | - backend_version=self._backend_version, |
236 | | - ), |
237 | | - implementation=self._implementation, |
238 | | - backend_version=self._backend_version, |
239 | | - version=self._version, |
240 | | - validate_column_names=True, |
241 | | - ) |
242 | | - if how == "vertical": |
243 | | - return PandasLikeDataFrame( |
244 | | - vertical_concat( |
245 | | - dfs, |
246 | | - implementation=self._implementation, |
247 | | - backend_version=self._backend_version, |
248 | | - ), |
249 | | - implementation=self._implementation, |
250 | | - backend_version=self._backend_version, |
251 | | - version=self._version, |
252 | | - validate_column_names=True, |
253 | | - ) |
254 | | - |
255 | | - if how == "diagonal": |
256 | | - return PandasLikeDataFrame( |
257 | | - diagonal_concat( |
258 | | - dfs, |
259 | | - implementation=self._implementation, |
260 | | - backend_version=self._backend_version, |
261 | | - ), |
262 | | - implementation=self._implementation, |
263 | | - backend_version=self._backend_version, |
264 | | - version=self._version, |
265 | | - validate_column_names=True, |
266 | | - ) |
267 | | - raise NotImplementedError |
| 282 | + native = self._concat_horizontal(dfs) |
| 283 | + elif how == "vertical": |
| 284 | + native = self._concat_vertical(dfs) |
| 285 | + elif how == "diagonal": |
| 286 | + native = self._concat_diagonal(dfs) |
| 287 | + else: |
| 288 | + raise NotImplementedError |
| 289 | + return self._dataframe.from_native(native, context=self) |
268 | 290 |
|
269 | 291 | def when(self: Self, predicate: PandasLikeExpr) -> PandasWhen: |
270 | 292 | return PandasWhen.from_expr(predicate, context=self) |
|
0 commit comments