Skip to content

Commit b1d40d2

Browse files
authored
chore!: use Python API more for DuckDB support (#2370)
1 parent b4982bc commit b1d40d2

File tree

5 files changed

+32
-18
lines changed

5 files changed

+32
-18
lines changed

.github/workflows/check_tpch_queries.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,8 @@ jobs:
2929
cache-dependency-glob: "pyproject.toml"
3030
- name: local-install
3131
run: |
32-
# once https://github.com/duckdb/duckdb/issues/16445
33-
# is addressed, try using `--pre`
3432
uv pip install -U -e ".[dask]" --group core-tests --system
33+
uv pip install -U --pre duckdb --system
3534
- name: generate-data
3635
run: cd tpch && python generate_data.py
3736
- name: tpch-tests

narwhals/_duckdb/dataframe.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import contextlib
34
from functools import reduce
45
from operator import and_
56
from typing import TYPE_CHECKING
@@ -21,6 +22,7 @@
2122
from narwhals.exceptions import ColumnNotFoundError
2223
from narwhals.exceptions import InvalidOperationError
2324
from narwhals.typing import CompliantDataFrame
25+
from narwhals.typing import CompliantLazyFrame
2426
from narwhals.utils import Implementation
2527
from narwhals.utils import Version
2628
from narwhals.utils import generate_temporary_column_name
@@ -48,7 +50,8 @@
4850
from narwhals.typing import LazyUniqueKeepStrategy
4951
from narwhals.utils import _FullContext
5052

51-
from narwhals.typing import CompliantLazyFrame
53+
with contextlib.suppress(ImportError): # requires duckdb>=1.3.0
54+
from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore]
5255

5356

5457
class DuckDBLazyFrame(CompliantLazyFrame["DuckDBExpr", "duckdb.DuckDBPyRelation"]):
@@ -338,7 +341,7 @@ def join_asof(
338341
):
339342
select.append(f'rhs."{name}" as "{name}{suffix}"')
340343
elif right_on is None or name not in {right_on, *by_right}:
341-
select.append(f'"{name}"')
344+
select.append(str(col(name)))
342345
# Replace with Python API call once
343346
# https://github.com/duckdb/duckdb/discussions/16947 is addressed.
344347
query = f"""
@@ -359,23 +362,28 @@ def unique(
359362
self, subset: Sequence[str] | None, *, keep: LazyUniqueKeepStrategy
360363
) -> Self:
361364
if subset_ := subset if keep == "any" else (subset or self.columns):
365+
if self._backend_version < (1, 3):
366+
msg = (
367+
"At least version 1.3 of DuckDB is required for `unique` operation\n"
368+
"with `subset` specified."
369+
)
370+
raise NotImplementedError(msg)
362371
# Sanitise input
363372
if any(x not in self.columns for x in subset_):
364373
msg = f"Columns {set(subset_).difference(self.columns)} not found in {self.columns}."
365374
raise ColumnNotFoundError(msg)
366375
idx_name = generate_temporary_column_name(8, self.columns)
367376
count_name = generate_temporary_column_name(8, [*self.columns, idx_name])
368377
partition_by_sql = generate_partition_by_sql(*(subset_))
369-
rel = self.native # noqa: F841
370-
query = f"""
371-
select *,
372-
row_number() over ({partition_by_sql}) as "{idx_name}",
373-
count(*) over ({partition_by_sql}) as "{count_name}"
374-
from rel
375-
""" # noqa: S608
376378
name = count_name if keep == "none" else idx_name
379+
idx_expr = SQLExpression(
380+
f"{FunctionExpression('row_number')} over ({partition_by_sql})"
381+
).alias(idx_name)
382+
count_expr = SQLExpression(
383+
f"{FunctionExpression('count', StarExpression())} over ({partition_by_sql})"
384+
).alias(count_name)
377385
return self._with_native(
378-
duckdb.sql(query)
386+
self.native.select(StarExpression(), idx_expr, count_expr)
379387
.filter(col(name) == lit(1))
380388
.select(StarExpression(exclude=[count_name, idx_name]))
381389
)
@@ -465,7 +473,7 @@ def unpivot(
465473
msg = "`value_name` cannot be empty string for duckdb backend."
466474
raise NotImplementedError(msg)
467475

468-
unpivot_on = ", ".join(f'"{name}"' for name in on_)
476+
unpivot_on = ", ".join(str(col(name)) for name in on_)
469477
rel = self.native # noqa: F841
470478
# Replace with Python API once
471479
# https://github.com/duckdb/duckdb/discussions/16980 is addressed.

narwhals/_duckdb/expr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ def func(window_inputs: WindowInputs) -> duckdb.Expression:
565565
)
566566
else:
567567
partition_by_sql = f"partition by {window_inputs.expr}"
568-
sql = f"row_number() over({partition_by_sql} {order_by_sql})"
568+
sql = f"{FunctionExpression('row_number')} over({partition_by_sql} {order_by_sql})"
569569
return SQLExpression(sql) == lit(1) # type: ignore[no-any-return, unused-ignore]
570570

571571
return self._with_window_function(func)
@@ -580,7 +580,7 @@ def func(window_inputs: WindowInputs) -> duckdb.Expression:
580580
)
581581
else:
582582
partition_by_sql = f"partition by {window_inputs.expr}"
583-
sql = f"row_number() over({partition_by_sql} {order_by_sql})"
583+
sql = f"{FunctionExpression('row_number')} over({partition_by_sql} {order_by_sql})"
584584
return SQLExpression(sql) == lit(1) # type: ignore[no-any-return, unused-ignore]
585585

586586
return self._with_window_function(func)

narwhals/_duckdb/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,15 +225,15 @@ def narwhals_to_native_dtype(dtype: DType | type[DType], version: Version) -> st
225225
def generate_partition_by_sql(*partition_by: str) -> str:
226226
if not partition_by:
227227
return ""
228-
by_sql = ", ".join([f'"{x}"' for x in partition_by])
228+
by_sql = ", ".join([f"{col(x)}" for x in partition_by])
229229
return f"partition by {by_sql}"
230230

231231

232232
def generate_order_by_sql(*order_by: str, ascending: bool) -> str:
233233
if ascending:
234-
by_sql = ", ".join([f'"{x}" asc nulls first' for x in order_by])
234+
by_sql = ", ".join([f"{col(x)} asc nulls first" for x in order_by])
235235
else:
236-
by_sql = ", ".join([f'"{x}" desc nulls last' for x in order_by])
236+
by_sql = ", ".join([f"{col(x)} desc nulls last" for x in order_by])
237237
return f"order by {by_sql}"
238238

239239

tests/frame/unique_test.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
# becomes LazyFrame instead of DataFrame
99
import narwhals as nw
1010
from narwhals.exceptions import ColumnNotFoundError
11+
from tests.utils import DUCKDB_VERSION
1112
from tests.utils import Constructor
1213
from tests.utils import ConstructorEager
1314
from tests.utils import assert_equal_data
@@ -36,6 +37,8 @@ def test_unique_eager(
3637

3738

3839
def test_unique_invalid_subset(constructor: Constructor) -> None:
40+
if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
41+
pytest.skip()
3942
df_raw = constructor(data)
4043
df = nw.from_native(df_raw)
4144
with pytest.raises(ColumnNotFoundError):
@@ -56,6 +59,8 @@ def test_unique(
5659
keep: Literal["any", "none"],
5760
expected: dict[str, list[float]],
5861
) -> None:
62+
if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
63+
pytest.skip()
5964
df_raw = constructor(data)
6065
df = nw.from_native(df_raw)
6166
result = df.unique(subset, keep=keep).sort("z")
@@ -76,6 +81,8 @@ def test_unique_full_subset(
7681
keep: Literal["any", "none"],
7782
expected: dict[str, list[float]],
7883
) -> None:
84+
if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
85+
pytest.skip()
7986
data = {"a": [1, 1, 1, 2], "b": [3, 3, 4, 4]}
8087
df_raw = constructor(data)
8188
df = nw.from_native(df_raw)

0 commit comments

Comments
 (0)