Skip to content

Commit b6ff4d5

Browse files
authored
fix: duckdb Lazyframe.unique was raising when column name was "group" (#3070)
1 parent d318ad3 commit b6ff4d5

File tree

3 files changed

+21
-6
lines changed

3 files changed

+21
-6
lines changed

narwhals/_duckdb/dataframe.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
catch_duckdb_exception,
1414
col,
1515
evaluate_exprs,
16+
join_column_names,
1617
lit,
1718
native_to_narwhals_dtype,
1819
window_expression,
@@ -396,7 +397,7 @@ def unique(
396397
.filter(col(name) == lit(1))
397398
.select(StarExpression(exclude=[count_name, idx_name]))
398399
)
399-
return self._with_native(self.native.unique(", ".join(self.columns)))
400+
return self._with_native(self.native.unique(join_column_names(*self.columns)))
400401

401402
def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self:
402403
if isinstance(descending, bool):
@@ -499,16 +500,16 @@ def unpivot(
499500
msg = "`value_name` cannot be empty string for duckdb backend."
500501
raise NotImplementedError(msg)
501502

502-
unpivot_on = ", ".join(str(col(name)) for name in on_)
503+
unpivot_on = join_column_names(*on_)
503504
rel = self.native # noqa: F841
504505
# Replace with Python API once
505506
# https://github.com/duckdb/duckdb/discussions/16980 is addressed.
506507
query = f"""
507508
unpivot rel
508509
on {unpivot_on}
509510
into
510-
name "{variable_name}"
511-
value "{value_name}"
511+
name {col(variable_name)}
512+
value {col(value_name)}
512513
"""
513514
return self._with_native(
514515
duckdb.sql(query).select(*[*index_, variable_name, value_name])

narwhals/_duckdb/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,10 @@ def generate_partition_by_sql(*partition_by: str | Expression) -> str:
295295
return f"partition by {by_sql}"
296296

297297

298+
def join_column_names(*names: str) -> str:
299+
return ", ".join(str(col(name)) for name in names)
300+
301+
298302
def generate_order_by_sql(
299303
*order_by: str | Expression, descending: Sequence[bool], nulls_last: Sequence[bool]
300304
) -> str:

tests/frame/unique_test.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44

55
import pytest
66

7-
# We use nw instead of nw.stable.v1 to ensure that DuckDBPyRelation
8-
# becomes LazyFrame instead of DataFrame
97
import narwhals as nw
108
from narwhals.exceptions import ColumnNotFoundError
119
from tests.utils import DUCKDB_VERSION, Constructor, ConstructorEager, assert_equal_data
@@ -100,3 +98,15 @@ def test_unique_none(constructor: Constructor) -> None:
10098
if not isinstance(df, nw.LazyFrame):
10199
result = df.unique(maintain_order=True)
102100
assert_equal_data(result, data)
101+
102+
103+
def test_unique_3069(constructor: Constructor, request: pytest.FixtureRequest) -> None:
104+
if "ibis" in str(constructor):
105+
# https://github.com/ibis-project/ibis/issues/11591
106+
request.applymarker(pytest.mark.xfail)
107+
data = {"name": ["a", "b", "c"], "group": ["d", "e", "f"], "value": [1, 2, 3]}
108+
df = nw.from_native(constructor(data))
109+
unique_to_get = "group"
110+
result = df.select(nw.col(unique_to_get)).unique().sort(unique_to_get)
111+
expected = {"group": ["d", "e", "f"]}
112+
assert_equal_data(result, expected)

0 commit comments

Comments
 (0)