Skip to content
18 changes: 0 additions & 18 deletions narwhals/_dask/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,24 +175,6 @@ def concat(
backend_version=self._backend_version,
version=self._version,
)
if how == "horizontal":
all_column_names: list[str] = [
column for frame in dfs for column in frame.columns
]
if len(all_column_names) != len(set(all_column_names)): # pragma: no cover
duplicates = [
i for i in all_column_names if all_column_names.count(i) > 1
]
msg = (
f"Columns with name(s): {', '.join(duplicates)} "
"have more than one occurrence"
)
raise AssertionError(msg)
return DaskLazyFrame(
dd.concat(dfs, axis=1, join="outer"),
backend_version=self._backend_version,
version=self._version,
)
Comment on lines -178 to -195
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Beautiful 😍

if how == "diagonal":
return DaskLazyFrame(
dd.concat(dfs, axis=0, join="outer"),
Expand Down
11 changes: 3 additions & 8 deletions narwhals/_duckdb/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,16 @@ def _lazyframe(self) -> type[DuckDBLazyFrame]:
return DuckDBLazyFrame

def concat(
self, items: Iterable[DuckDBLazyFrame], *, how: ConcatMethod
self: Self, items: Iterable[DuckDBLazyFrame], *, how: ConcatMethod
) -> DuckDBLazyFrame:
if how == "horizontal":
msg = "horizontal concat not supported for duckdb. Please join instead"
raise TypeError(msg)
if how == "diagonal":
msg = "Not implemented yet"
raise NotImplementedError(msg)
native_items = [item._native_frame for item in items]
items = list(items)
first = items[0]
schema = first.schema
if how == "vertical" and not all(x.schema == schema for x in items[1:]):
msg = "inputs should all have the same schema"
raise TypeError(msg)
res = reduce(lambda x, y: x.union(y), (item._native_frame for item in items))
res = reduce(lambda x, y: x.union(y), native_items)
return first._with_native(res)

def concat_str(
Expand Down
7 changes: 0 additions & 7 deletions narwhals/_spark_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,13 +192,6 @@ def concat(
self, items: Iterable[SparkLikeLazyFrame], *, how: ConcatMethod
) -> SparkLikeLazyFrame:
dfs = [item._native_frame for item in items]
if how == "horizontal":
msg = (
"Horizontal concatenation is not supported for LazyFrame backed by "
"a PySpark DataFrame."
)
raise NotImplementedError(msg)

if how == "vertical":
cols_0 = dfs[0].columns
for i, df in enumerate(dfs[1:], start=1):
Expand Down
23 changes: 17 additions & 6 deletions narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from narwhals.dependencies import is_numpy_array
from narwhals.dependencies import is_numpy_array_2d
from narwhals.dependencies import is_pyarrow_table
from narwhals.exceptions import InvalidOperationError
from narwhals.expr import Expr
from narwhals.series import Series
from narwhals.translate import from_native
Expand Down Expand Up @@ -79,12 +80,13 @@ def concat(items: Iterable[FrameT], *, how: ConcatMethod = "vertical") -> FrameT

- vertical: Concatenate vertically. Column names must match.
- horizontal: Concatenate horizontally. If lengths don't match, then
missing rows are filled with null values.
missing rows are filled with null values. This is only supported
when all inputs are (eager) DataFrames.
- diagonal: Finds a union between the column schemas and fills missing column
values with null.

Returns:
A new DataFrame, Lazyframe resulting from the concatenation.
A new DataFrame or Lazyframe resulting from the concatenation.

Raises:
TypeError: The items to concatenate should either all be eager, or all lazy
Expand Down Expand Up @@ -151,16 +153,25 @@ def concat(items: Iterable[FrameT], *, how: ConcatMethod = "vertical") -> FrameT
|z: [[null,null],["x","y"]]|
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
if how not in {"horizontal", "vertical", "diagonal"}: # pragma: no cover
msg = "Only vertical, horizontal and diagonal concatenations are supported."
raise NotImplementedError(msg)
from narwhals.dataframe import LazyFrame

if not items:
msg = "No items to concatenate"
msg = "No items to concatenate."
raise ValueError(msg)
items = list(items)
validate_laziness(items)
if how not in {"horizontal", "vertical", "diagonal"}: # pragma: no cover
msg = "Only vertical, horizontal and diagonal concatenations are supported."
raise NotImplementedError(msg)
first_item = items[0]
plx = first_item.__narwhals_namespace__()
if isinstance(first_item, LazyFrame) and how == "horizontal":
msg = (
"Horizontal concatenation is not supported for LazyFrames.\n\n"
"Hint: you may want to use `join` instead."
)
raise InvalidOperationError(msg)

return first_item._with_compliant(
plx.concat([df._compliant_frame for df in items], how=how),
)
Expand Down
5 changes: 3 additions & 2 deletions narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2069,12 +2069,13 @@ def concat(items: Iterable[FrameT], *, how: ConcatMethod = "vertical") -> FrameT

- vertical: Concatenate vertically. Column names must match.
- horizontal: Concatenate horizontally. If lengths don't match, then
missing rows are filled with null values.
missing rows are filled with null values. This is only supported
when all inputs are (eager) DataFrames.
- diagonal: Finds a union between the column schemas and fills missing column
values with null.

Returns:
A new DataFrame, Lazyframe resulting from the concatenation.
A new DataFrame or Lazyframe resulting from the concatenation.

Raises:
TypeError: The items to concatenate should either all be eager, or all lazy
Expand Down
14 changes: 7 additions & 7 deletions tests/frame/concat_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,18 @@
import pytest

import narwhals.stable.v1 as nw
from narwhals.exceptions import InvalidOperationError
from tests.utils import Constructor
from tests.utils import ConstructorEager
from tests.utils import assert_equal_data


def test_concat_horizontal(
constructor: Constructor, request: pytest.FixtureRequest
) -> None:
if ("pyspark" in str(constructor)) or "duckdb" in str(constructor):
request.applymarker(pytest.mark.xfail)
def test_concat_horizontal(constructor_eager: ConstructorEager) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]}
df_left = nw.from_native(constructor(data)).lazy()
df_left = nw.from_native(constructor_eager(data), eager_only=True)

data_right = {"c": [6, 12, -1], "d": [0, -4, 2]}
df_right = nw.from_native(constructor(data_right)).lazy()
df_right = nw.from_native(constructor_eager(data_right), eager_only=True)

result = nw.concat([df_left, df_right], how="horizontal")
expected = {
Expand All @@ -30,6 +28,8 @@ def test_concat_horizontal(

with pytest.raises(ValueError, match="No items"):
nw.concat([])
with pytest.raises(InvalidOperationError):
nw.concat([df_left.lazy()], how="horizontal")


def test_concat_vertical(constructor: Constructor) -> None:
Expand Down
13 changes: 13 additions & 0 deletions tests/series_only/hist_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
from tests.utils import ConstructorEager
from tests.utils import assert_equal_data

xfail_hist = pytest.mark.xfail(
reason="https://github.com/narwhals-dev/narwhals/issues/2348", strict=False
Comment on lines +16 to +17
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • I used this on every test to mimic what skip was doing
  • No idea which one(s) was causing the failures

)


data = {
"int": [0, 1, 2, 3, 4, 5, 6],
}
Expand Down Expand Up @@ -76,6 +81,7 @@
]


@xfail_hist
@pytest.mark.parametrize("params", bins_and_expected)
@pytest.mark.parametrize("include_breakpoint", [True, False])
@pytest.mark.filterwarnings(
Expand Down Expand Up @@ -161,6 +167,7 @@ def test_hist_bin(
assert_equal_data(result, expected)


@xfail_hist
@pytest.mark.parametrize("params", counts_and_expected)
@pytest.mark.parametrize("include_breakpoint", [True, False])
@pytest.mark.filterwarnings(
Expand Down Expand Up @@ -232,6 +239,7 @@ def test_hist_count(
)


@xfail_hist
@pytest.mark.filterwarnings(
"ignore:`Series.hist` is being called from the stable API although considered an unstable feature."
)
Expand Down Expand Up @@ -268,6 +276,7 @@ def test_hist_count_no_spread(
assert_equal_data(result, expected)


@xfail_hist
@pytest.mark.filterwarnings(
"ignore:`Series.hist` is being called from the stable API although considered an unstable feature."
)
Expand All @@ -283,6 +292,7 @@ def test_hist_bin_and_bin_count() -> None:
s.hist(bins=[1, 3], bin_count=4)


@xfail_hist
@pytest.mark.filterwarnings(
"ignore:`Series.hist` is being called from the stable API although considered an unstable feature."
)
Expand Down Expand Up @@ -331,6 +341,7 @@ def test_hist_small_bins(
s["values"].hist(bins=[1, 3], bin_count=4)


@xfail_hist
@pytest.mark.filterwarnings(
"ignore:`Series.hist` is being called from the stable API although considered an unstable feature."
)
Expand Down Expand Up @@ -365,6 +376,7 @@ def test_hist_non_monotonic(constructor_eager: ConstructorEager) -> None:
st.floats(min_value=0.001, max_value=1_000, allow_nan=False), max_size=50
),
)
@xfail_hist
@pytest.mark.filterwarnings(
"ignore:`Series.hist` is being called from the stable API although considered an unstable feature.",
"ignore:invalid value encountered in cast:RuntimeWarning",
Expand Down Expand Up @@ -421,6 +433,7 @@ def test_hist_bin_hypotheis(
),
bin_count=st.integers(min_value=0, max_value=1_000),
)
@xfail_hist
@pytest.mark.skipif(
POLARS_VERSION < (1, 15),
reason="hist(bin_count=...) behavior significantly changed after this version",
Expand Down
Loading