Skip to content

Commit 13a1167

Browse files
authored
chore: pandas-nightly and duckdb-nightly fixes (#3158)
1 parent dded9ca commit 13a1167

29 files changed

+192
-56
lines changed

.github/workflows/extremes.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
cache-suffix: min-versions-${{ matrix.python-version }}
3030
cache-dependency-glob: "pyproject.toml"
3131
- name: install-minimum-versions
32-
run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.3 polars==0.20.4 numpy==1.19.3 pyarrow==13.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
32+
run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.3 polars==0.20.4 numpy==1.19.3 pyarrow==13.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.1 tzdata --system
3333
- name: install-reqs
3434
run: |
3535
uv pip install -e . --group tests --system
@@ -44,9 +44,9 @@ jobs:
4444
echo "$DEPS" | grep 'pyarrow==13.0.0'
4545
echo "$DEPS" | grep 'scipy==1.6.0'
4646
echo "$DEPS" | grep 'scikit-learn==1.1.0'
47-
echo "$DEPS" | grep 'duckdb==1.0'
47+
echo "$DEPS" | grep 'duckdb==1.1'
4848
- name: Run pytest
49-
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy]
49+
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],duckdb
5050

5151
pretty_old_versions:
5252
strategy:
@@ -66,7 +66,7 @@ jobs:
6666
cache-suffix: pretty-old-versions-${{ matrix.python-version }}
6767
cache-dependency-glob: "pyproject.toml"
6868
- name: install-pretty-old-versions
69-
run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.4 numpy==1.19.3 pyarrow==14.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
69+
run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.4 numpy==1.19.3 pyarrow==14.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.2 tzdata --system
7070
- name: install-reqs
7171
run: uv pip install -e . --group tests --system
7272
- name: show-deps
@@ -82,9 +82,9 @@ jobs:
8282
echo "$DEPS" | grep 'pyarrow==14.0.0'
8383
echo "$DEPS" | grep 'scipy==1.6.0'
8484
echo "$DEPS" | grep 'scikit-learn==1.1.0'
85-
echo "$DEPS" | grep 'duckdb==1.0'
85+
echo "$DEPS" | grep 'duckdb==1.2'
8686
- name: Run pytest
87-
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy]
87+
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],duckdb
8888

8989
not_so_old_versions:
9090
strategy:
@@ -104,7 +104,7 @@ jobs:
104104
cache-suffix: not-so-old-versions-${{ matrix.python-version }}
105105
cache-dependency-glob: "pyproject.toml"
106106
- name: install-not-so-old-versions
107-
run: uv pip install tox virtualenv setuptools pandas==2.0.3 polars==0.20.8 numpy==1.24.4 pyarrow==15.0.0 "pyarrow-stubs<17" scipy==1.8.0 scikit-learn==1.3.0 duckdb==1.0 dask[dataframe]==2024.10 tzdata --system
107+
run: uv pip install tox virtualenv setuptools pandas==2.0.3 polars==0.20.8 numpy==1.24.4 pyarrow==15.0.0 "pyarrow-stubs<17" scipy==1.8.0 scikit-learn==1.3.0 duckdb==1.3 dask[dataframe]==2024.10 tzdata --system
108108
- name: install-reqs
109109
run: uv pip install -e . --group tests --system
110110
- name: show-deps
@@ -119,9 +119,9 @@ jobs:
119119
echo "$DEPS" | grep 'scipy==1.8.0'
120120
echo "$DEPS" | grep 'scikit-learn==1.3.0'
121121
echo "$DEPS" | grep 'dask==2024.10'
122-
echo "$DEPS" | grep 'duckdb==1.0'
122+
echo "$DEPS" | grep 'duckdb==1.3'
123123
- name: Run pytest
124-
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],dask
124+
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],dask,duckdb
125125

126126
nightlies:
127127
strategy:

narwhals/_duckdb/dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,14 @@
4141
import pandas as pd
4242
import pyarrow as pa
4343
from duckdb import Expression
44-
from duckdb.typing import DuckDBPyType
4544
from typing_extensions import Self, TypeIs
4645

4746
from narwhals._compliant.typing import CompliantDataFrameAny
4847
from narwhals._duckdb.expr import DuckDBExpr
4948
from narwhals._duckdb.group_by import DuckDBGroupBy
5049
from narwhals._duckdb.namespace import DuckDBNamespace
5150
from narwhals._duckdb.series import DuckDBInterchangeSeries
51+
from narwhals._duckdb.utils import duckdb_dtypes
5252
from narwhals._typing import _EagerAllowedImpl
5353
from narwhals._utils import _LimitedContext
5454
from narwhals.dataframe import LazyFrame
@@ -76,7 +76,7 @@ def __init__(
7676
) -> None:
7777
self._native_frame: duckdb.DuckDBPyRelation = df
7878
self._version = version
79-
self._cached_native_schema: dict[str, DuckDBPyType] | None = None
79+
self._cached_native_schema: dict[str, duckdb_dtypes.DuckDBPyType] | None = None
8080
self._cached_columns: list[str] | None = None
8181
if validate_backend_version:
8282
self._validate_backend_version()

narwhals/_duckdb/expr_list.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from narwhals._compliant import LazyExprNamespace
66
from narwhals._compliant.any_namespace import ListNamespace
77
from narwhals._duckdb.utils import F, lit, when
8+
from narwhals._utils import requires
89

910
if TYPE_CHECKING:
1011
from duckdb import Expression
@@ -19,6 +20,7 @@ class DuckDBExprListNamespace(
1920
def len(self) -> DuckDBExpr:
2021
return self.compliant._with_elementwise(lambda expr: F("len", expr))
2122

23+
@requires.backend_version((1, 3)) # bugged before 1.3
2224
def unique(self) -> DuckDBExpr:
2325
def func(expr: Expression) -> Expression:
2426
expr_distinct = F("list_distinct", expr)

narwhals/_duckdb/namespace.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import duckdb
99
from duckdb import CoalesceOperator, Expression
10-
from duckdb.typing import BIGINT, VARCHAR
1110

1211
from narwhals._duckdb.dataframe import DuckDBLazyFrame
1312
from narwhals._duckdb.expr import DuckDBExpr
@@ -16,6 +15,7 @@
1615
DeferredTimeZone,
1716
F,
1817
concat_str,
18+
duckdb_dtypes,
1919
function,
2020
lit,
2121
narwhals_to_native_dtype,
@@ -108,9 +108,9 @@ def func(df: DuckDBLazyFrame) -> list[Expression]:
108108
cols_separated = [
109109
y
110110
for x in [
111-
(col.cast(VARCHAR),)
111+
(col.cast(duckdb_dtypes.VARCHAR),)
112112
if i == len(cols) - 1
113-
else (col.cast(VARCHAR), lit(separator))
113+
else (col.cast(duckdb_dtypes.VARCHAR), lit(separator))
114114
for i, col in enumerate(cols)
115115
]
116116
for y in x
@@ -130,7 +130,9 @@ def func(cols: Iterable[Expression]) -> Expression:
130130
cols = list(cols)
131131
return reduce(
132132
operator.add, (CoalesceOperator(col, lit(0)) for col in cols)
133-
) / reduce(operator.add, (col.isnotnull().cast(BIGINT) for col in cols))
133+
) / reduce(
134+
operator.add, (col.isnotnull().cast(duckdb_dtypes.BIGINT) for col in cols)
135+
)
134136

135137
return self._expr._from_elementwise_horizontal_op(func, *exprs)
136138

narwhals/_duckdb/utils.py

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,13 @@
44
from typing import TYPE_CHECKING
55

66
import duckdb
7-
import duckdb.typing as duckdb_dtypes
87
from duckdb import Expression
9-
from duckdb.typing import DuckDBPyType
8+
9+
try:
10+
import duckdb.sqltypes as duckdb_dtypes
11+
except ModuleNotFoundError:
12+
# DuckDB pre 1.3
13+
import duckdb.typing as duckdb_dtypes
1014

1115
from narwhals._utils import Version, isinstance_or_issubclass, zip_strict
1216
from narwhals.exceptions import ColumnNotFoundError
@@ -131,7 +135,9 @@ def time_zone(self) -> str:
131135

132136

133137
def native_to_narwhals_dtype(
134-
duckdb_dtype: DuckDBPyType, version: Version, deferred_time_zone: DeferredTimeZone
138+
duckdb_dtype: duckdb_dtypes.DuckDBPyType,
139+
version: Version,
140+
deferred_time_zone: DeferredTimeZone,
135141
) -> DType:
136142
duckdb_dtype_id = duckdb_dtype.id
137143
dtypes = version.dtypes
@@ -216,7 +222,7 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version)
216222

217223

218224
dtypes = Version.MAIN.dtypes
219-
NW_TO_DUCKDB_DTYPES: Mapping[type[DType], DuckDBPyType] = {
225+
NW_TO_DUCKDB_DTYPES: Mapping[type[DType], duckdb_dtypes.DuckDBPyType] = {
220226
dtypes.Float64: duckdb_dtypes.DOUBLE,
221227
dtypes.Float32: duckdb_dtypes.FLOAT,
222228
dtypes.Binary: duckdb_dtypes.BLOB,
@@ -228,14 +234,14 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version)
228234
dtypes.Int16: duckdb_dtypes.SMALLINT,
229235
dtypes.Int32: duckdb_dtypes.INTEGER,
230236
dtypes.Int64: duckdb_dtypes.BIGINT,
231-
dtypes.Int128: DuckDBPyType("INT128"),
237+
dtypes.Int128: duckdb_dtypes.HUGEINT,
232238
dtypes.UInt8: duckdb_dtypes.UTINYINT,
233239
dtypes.UInt16: duckdb_dtypes.USMALLINT,
234240
dtypes.UInt32: duckdb_dtypes.UINTEGER,
235241
dtypes.UInt64: duckdb_dtypes.UBIGINT,
236-
dtypes.UInt128: DuckDBPyType("UINT128"),
242+
dtypes.UInt128: duckdb_dtypes.UHUGEINT,
237243
}
238-
TIME_UNIT_TO_TIMESTAMP: Mapping[TimeUnit, DuckDBPyType] = {
244+
TIME_UNIT_TO_TIMESTAMP: Mapping[TimeUnit, duckdb_dtypes.DuckDBPyType] = {
239245
"s": duckdb_dtypes.TIMESTAMP_S,
240246
"ms": duckdb_dtypes.TIMESTAMP_MS,
241247
"us": duckdb_dtypes.TIMESTAMP,
@@ -246,7 +252,7 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version)
246252

247253
def narwhals_to_native_dtype( # noqa: PLR0912, C901
248254
dtype: IntoDType, version: Version, deferred_time_zone: DeferredTimeZone
249-
) -> DuckDBPyType:
255+
) -> duckdb_dtypes.DuckDBPyType:
250256
dtypes = version.dtypes
251257
base_type = dtype.base_type()
252258
if duckdb_type := NW_TO_DUCKDB_DTYPES.get(base_type):
@@ -256,7 +262,7 @@ def narwhals_to_native_dtype( # noqa: PLR0912, C901
256262
msg = "Converting to Enum is not supported in narwhals.stable.v1"
257263
raise NotImplementedError(msg)
258264
if isinstance(dtype, dtypes.Enum):
259-
return DuckDBPyType(f"ENUM{dtype.categories!r}")
265+
return duckdb_dtypes.DuckDBPyType(f"ENUM{dtype.categories!r}")
260266
msg = "Can not cast / initialize Enum without categories present"
261267
raise ValueError(msg)
262268
if isinstance_or_issubclass(dtype, dtypes.Datetime):
@@ -291,7 +297,7 @@ def narwhals_to_native_dtype( # noqa: PLR0912, C901
291297
nw_inner = nw_inner.inner
292298
duckdb_inner = narwhals_to_native_dtype(nw_inner, version, deferred_time_zone)
293299
duckdb_shape_fmt = "".join(f"[{item}]" for item in dtype.shape)
294-
return DuckDBPyType(f"{duckdb_inner}{duckdb_shape_fmt}")
300+
return duckdb_dtypes.DuckDBPyType(f"{duckdb_inner}{duckdb_shape_fmt}")
295301
if issubclass(base_type, UNSUPPORTED_DTYPES):
296302
msg = f"Converting to {base_type.__name__} dtype is not supported for DuckDB."
297303
raise NotImplementedError(msg)
@@ -378,19 +384,39 @@ def function(name: str, *args: Expression) -> Expression:
378384
if name == "isnull":
379385
return args[0].isnull()
380386
if name == "count_distinct":
381-
try:
382-
from duckdb import SQLExpression
383-
except ModuleNotFoundError as exc: # pragma: no cover
384-
msg = f"DuckDB>=1.3.0 is required for this operation. Found: DuckDB {duckdb.__version__}"
385-
raise NotImplementedError(msg) from exc
386-
return SQLExpression(f"count(distinct {args[0]})")
387+
return sql_expression(f"count(distinct {args[0]})")
387388
return F(name, *args)
388389

389390

390391
def sql_expression(expr: str) -> Expression:
391392
try:
392393
from duckdb import SQLExpression
393-
except ModuleNotFoundError as exc: # pragma: no cover
394+
except ImportError as exc: # pragma: no cover
394395
msg = f"DuckDB>=1.3.0 is required for this operation. Found: DuckDB {duckdb.__version__}"
395396
raise NotImplementedError(msg) from exc
396397
return SQLExpression(expr)
398+
399+
400+
__all__ = [
401+
"UNITS_DICT",
402+
"DeferredTimeZone",
403+
"F",
404+
"catch_duckdb_exception",
405+
"col",
406+
"concat_str",
407+
"duckdb_dtypes",
408+
"evaluate_exprs",
409+
"fetch_rel_time_zone",
410+
"function",
411+
"generate_order_by_sql",
412+
"generate_partition_by_sql",
413+
"join_column_names",
414+
"lambda_expr",
415+
"lit",
416+
"narwhals_to_native_dtype",
417+
"native_to_narwhals_dtype",
418+
"parse_into_expression",
419+
"sql_expression",
420+
"when",
421+
"window_expression",
422+
]

narwhals/_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,7 @@ def _backend_version(self) -> tuple[int, ...]:
604604
Implementation.PYSPARK_CONNECT: (3, 5),
605605
Implementation.POLARS: (0, 20, 4),
606606
Implementation.DASK: (2024, 8),
607-
Implementation.DUCKDB: (1,),
607+
Implementation.DUCKDB: (1, 1),
608608
Implementation.IBIS: (6,),
609609
Implementation.SQLFRAME: (3, 22, 0),
610610
}

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ pyspark = ["pyspark>=3.5.0"]
4242
pyspark-connect = ["pyspark[connect]>=3.5.0"]
4343
polars = ["polars>=0.20.4"]
4444
dask = ["dask[dataframe]>=2024.8"]
45-
duckdb = ["duckdb>=1.0"]
45+
duckdb = ["duckdb>=1.1"]
4646
ibis = ["ibis-framework>=6.0.0", "rich", "packaging", "pyarrow_hotfix"]
4747
sqlframe = ["sqlframe>=3.22.0,!=3.39.3"]
4848

tests/dtypes_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,7 @@ def test_datetime_w_tz_duckdb() -> None:
517517
assert result["b"] == nw.List(nw.List(nw.Datetime("us", "Asia/Kathmandu")))
518518

519519

520+
@pytest.mark.slow
520521
def test_datetime_w_tz_pyspark() -> None: # pragma: no cover
521522
pytest.importorskip("pyspark")
522523
session = pyspark_session()

tests/expr_and_series/dt/convert_time_zone_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ def test_convert_time_zone_to_connection_tz_duckdb() -> None:
154154
)
155155

156156

157+
@pytest.mark.slow
157158
def test_convert_time_zone_to_connection_tz_pyspark() -> None: # pragma: no cover
158159
pytest.importorskip("pyspark")
159160

tests/expr_and_series/dt/replace_time_zone_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ def test_replace_time_zone_to_connection_tz_duckdb() -> None:
142142
)
143143

144144

145+
@pytest.mark.slow
145146
def test_replace_time_zone_to_connection_tz_pyspark() -> None: # pragma: no cover
146147
pytest.importorskip("pyspark")
147148

0 commit comments

Comments
 (0)