Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/extremes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
cache-suffix: min-versions-${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: install-minimum-versions
run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.3 polars==0.20.4 numpy==1.19.3 pyarrow==13.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.3 polars==0.20.4 numpy==1.19.3 pyarrow==13.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.1 tzdata --system
- name: install-reqs
run: |
uv pip install -e . --group tests --system
Expand All @@ -44,9 +44,9 @@ jobs:
echo "$DEPS" | grep 'pyarrow==13.0.0'
echo "$DEPS" | grep 'scipy==1.6.0'
echo "$DEPS" | grep 'scikit-learn==1.1.0'
echo "$DEPS" | grep 'duckdb==1.0'
echo "$DEPS" | grep 'duckdb==1.1'
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy]
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],duckdb
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😭


pretty_old_versions:
strategy:
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_duckdb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@
import pandas as pd
import pyarrow as pa
from duckdb import Expression
from duckdb.typing import DuckDBPyType
from typing_extensions import Self, TypeIs

from narwhals._compliant.typing import CompliantDataFrameAny
from narwhals._duckdb.expr import DuckDBExpr
from narwhals._duckdb.group_by import DuckDBGroupBy
from narwhals._duckdb.namespace import DuckDBNamespace
from narwhals._duckdb.series import DuckDBInterchangeSeries
from narwhals._duckdb.utils import duckdb_dtypes
from narwhals._typing import _EagerAllowedImpl
from narwhals._utils import _LimitedContext
from narwhals.dataframe import LazyFrame
Expand Down Expand Up @@ -76,7 +76,7 @@ def __init__(
) -> None:
self._native_frame: duckdb.DuckDBPyRelation = df
self._version = version
self._cached_native_schema: dict[str, DuckDBPyType] | None = None
self._cached_native_schema: dict[str, duckdb_dtypes.DuckDBPyType] | None = None
self._cached_columns: list[str] | None = None
if validate_backend_version:
self._validate_backend_version()
Expand Down
2 changes: 2 additions & 0 deletions narwhals/_duckdb/expr_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from narwhals._compliant import LazyExprNamespace
from narwhals._compliant.any_namespace import ListNamespace
from narwhals._duckdb.utils import F, lit, when
from narwhals._utils import requires

if TYPE_CHECKING:
from duckdb import Expression
Expand All @@ -19,6 +20,7 @@ class DuckDBExprListNamespace(
def len(self) -> DuckDBExpr:
return self.compliant._with_elementwise(lambda expr: F("len", expr))

@requires.backend_version((1, 3)) # bugged before 1.3
def unique(self) -> DuckDBExpr:
def func(expr: Expression) -> Expression:
expr_distinct = F("list_distinct", expr)
Expand Down
10 changes: 6 additions & 4 deletions narwhals/_duckdb/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import duckdb
from duckdb import CoalesceOperator, Expression
from duckdb.typing import BIGINT, VARCHAR

from narwhals._duckdb.dataframe import DuckDBLazyFrame
from narwhals._duckdb.expr import DuckDBExpr
Expand All @@ -16,6 +15,7 @@
DeferredTimeZone,
F,
concat_str,
duckdb_dtypes,
function,
lit,
narwhals_to_native_dtype,
Expand Down Expand Up @@ -108,9 +108,9 @@ def func(df: DuckDBLazyFrame) -> list[Expression]:
cols_separated = [
y
for x in [
(col.cast(VARCHAR),)
(col.cast(duckdb_dtypes.VARCHAR),)
if i == len(cols) - 1
else (col.cast(VARCHAR), lit(separator))
else (col.cast(duckdb_dtypes.VARCHAR), lit(separator))
for i, col in enumerate(cols)
]
for y in x
Expand All @@ -130,7 +130,9 @@ def func(cols: Iterable[Expression]) -> Expression:
cols = list(cols)
return reduce(
operator.add, (CoalesceOperator(col, lit(0)) for col in cols)
) / reduce(operator.add, (col.isnotnull().cast(BIGINT) for col in cols))
) / reduce(
operator.add, (col.isnotnull().cast(duckdb_dtypes.BIGINT) for col in cols)
)

return self._expr._from_elementwise_horizontal_op(func, *exprs)

Expand Down
60 changes: 43 additions & 17 deletions narwhals/_duckdb/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@
from typing import TYPE_CHECKING

import duckdb
import duckdb.typing as duckdb_dtypes
from duckdb import Expression
from duckdb.typing import DuckDBPyType

try:
import duckdb.sqltypes as duckdb_dtypes
except ModuleNotFoundError:
# DuckDB pre 1.3
import duckdb.typing as duckdb_dtypes

from narwhals._utils import Version, isinstance_or_issubclass, zip_strict
from narwhals.exceptions import ColumnNotFoundError
Expand Down Expand Up @@ -131,7 +135,9 @@ def time_zone(self) -> str:


def native_to_narwhals_dtype(
duckdb_dtype: DuckDBPyType, version: Version, deferred_time_zone: DeferredTimeZone
duckdb_dtype: duckdb_dtypes.DuckDBPyType,
version: Version,
deferred_time_zone: DeferredTimeZone,
) -> DType:
duckdb_dtype_id = duckdb_dtype.id
dtypes = version.dtypes
Expand Down Expand Up @@ -216,7 +222,7 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version)


dtypes = Version.MAIN.dtypes
NW_TO_DUCKDB_DTYPES: Mapping[type[DType], DuckDBPyType] = {
NW_TO_DUCKDB_DTYPES: Mapping[type[DType], duckdb_dtypes.DuckDBPyType] = {
dtypes.Float64: duckdb_dtypes.DOUBLE,
dtypes.Float32: duckdb_dtypes.FLOAT,
dtypes.Binary: duckdb_dtypes.BLOB,
Expand All @@ -228,14 +234,14 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version)
dtypes.Int16: duckdb_dtypes.SMALLINT,
dtypes.Int32: duckdb_dtypes.INTEGER,
dtypes.Int64: duckdb_dtypes.BIGINT,
dtypes.Int128: DuckDBPyType("INT128"),
dtypes.Int128: duckdb_dtypes.HUGEINT,
dtypes.UInt8: duckdb_dtypes.UTINYINT,
dtypes.UInt16: duckdb_dtypes.USMALLINT,
dtypes.UInt32: duckdb_dtypes.UINTEGER,
dtypes.UInt64: duckdb_dtypes.UBIGINT,
dtypes.UInt128: DuckDBPyType("UINT128"),
dtypes.UInt128: duckdb_dtypes.UHUGEINT,
}
TIME_UNIT_TO_TIMESTAMP: Mapping[TimeUnit, DuckDBPyType] = {
TIME_UNIT_TO_TIMESTAMP: Mapping[TimeUnit, duckdb_dtypes.DuckDBPyType] = {
"s": duckdb_dtypes.TIMESTAMP_S,
"ms": duckdb_dtypes.TIMESTAMP_MS,
"us": duckdb_dtypes.TIMESTAMP,
Expand All @@ -246,7 +252,7 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version)

def narwhals_to_native_dtype( # noqa: PLR0912, C901
dtype: IntoDType, version: Version, deferred_time_zone: DeferredTimeZone
) -> DuckDBPyType:
) -> duckdb_dtypes.DuckDBPyType:
dtypes = version.dtypes
base_type = dtype.base_type()
if duckdb_type := NW_TO_DUCKDB_DTYPES.get(base_type):
Expand All @@ -256,7 +262,7 @@ def narwhals_to_native_dtype( # noqa: PLR0912, C901
msg = "Converting to Enum is not supported in narwhals.stable.v1"
raise NotImplementedError(msg)
if isinstance(dtype, dtypes.Enum):
return DuckDBPyType(f"ENUM{dtype.categories!r}")
return duckdb_dtypes.DuckDBPyType(f"ENUM{dtype.categories!r}")
msg = "Can not cast / initialize Enum without categories present"
raise ValueError(msg)
if isinstance_or_issubclass(dtype, dtypes.Datetime):
Expand Down Expand Up @@ -291,7 +297,7 @@ def narwhals_to_native_dtype( # noqa: PLR0912, C901
nw_inner = nw_inner.inner
duckdb_inner = narwhals_to_native_dtype(nw_inner, version, deferred_time_zone)
duckdb_shape_fmt = "".join(f"[{item}]" for item in dtype.shape)
return DuckDBPyType(f"{duckdb_inner}{duckdb_shape_fmt}")
return duckdb_dtypes.DuckDBPyType(f"{duckdb_inner}{duckdb_shape_fmt}")
if issubclass(base_type, UNSUPPORTED_DTYPES):
msg = f"Converting to {base_type.__name__} dtype is not supported for DuckDB."
raise NotImplementedError(msg)
Expand Down Expand Up @@ -378,19 +384,39 @@ def function(name: str, *args: Expression) -> Expression:
if name == "isnull":
return args[0].isnull()
if name == "count_distinct":
try:
from duckdb import SQLExpression
except ModuleNotFoundError as exc: # pragma: no cover
msg = f"DuckDB>=1.3.0 is required for this operation. Found: DuckDB {duckdb.__version__}"
raise NotImplementedError(msg) from exc
return SQLExpression(f"count(distinct {args[0]})")
return sql_expression(f"count(distinct {args[0]})")
return F(name, *args)


def sql_expression(expr: str) -> Expression:
try:
from duckdb import SQLExpression
except ModuleNotFoundError as exc: # pragma: no cover
except ImportError as exc: # pragma: no cover
msg = f"DuckDB>=1.3.0 is required for this operation. Found: DuckDB {duckdb.__version__}"
raise NotImplementedError(msg) from exc
return SQLExpression(expr)


__all__ = [
"UNITS_DICT",
"DeferredTimeZone",
"F",
"catch_duckdb_exception",
"col",
"concat_str",
"duckdb_dtypes",
"evaluate_exprs",
"fetch_rel_time_zone",
"function",
"generate_order_by_sql",
"generate_partition_by_sql",
"join_column_names",
"lambda_expr",
"lit",
"narwhals_to_native_dtype",
"native_to_narwhals_dtype",
"parse_into_expression",
"sql_expression",
"when",
"window_expression",
]
2 changes: 1 addition & 1 deletion narwhals/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ def _backend_version(self) -> tuple[int, ...]:
Implementation.PYSPARK_CONNECT: (3, 5),
Implementation.POLARS: (0, 20, 4),
Implementation.DASK: (2024, 8),
Implementation.DUCKDB: (1,),
Implementation.DUCKDB: (1, 1),
Implementation.IBIS: (6,),
Implementation.SQLFRAME: (3, 22, 0),
}
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pyspark = ["pyspark>=3.5.0"]
pyspark-connect = ["pyspark[connect]>=3.5.0"]
polars = ["polars>=0.20.4"]
dask = ["dask[dataframe]>=2024.8"]
duckdb = ["duckdb>=1.0"]
duckdb = ["duckdb>=1.1"]
ibis = ["ibis-framework>=6.0.0", "rich", "packaging", "pyarrow_hotfix"]
sqlframe = ["sqlframe>=3.22.0,!=3.39.3"]

Expand Down
1 change: 1 addition & 0 deletions tests/dtypes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,7 @@ def test_datetime_w_tz_duckdb() -> None:
assert result["b"] == nw.List(nw.List(nw.Datetime("us", "Asia/Kathmandu")))


@pytest.mark.slow
def test_datetime_w_tz_pyspark() -> None: # pragma: no cover
pytest.importorskip("pyspark")
session = pyspark_session()
Expand Down
1 change: 1 addition & 0 deletions tests/expr_and_series/dt/convert_time_zone_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def test_convert_time_zone_to_connection_tz_duckdb() -> None:
)


@pytest.mark.slow
def test_convert_time_zone_to_connection_tz_pyspark() -> None: # pragma: no cover
pytest.importorskip("pyspark")

Expand Down
1 change: 1 addition & 0 deletions tests/expr_and_series/dt/replace_time_zone_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def test_replace_time_zone_to_connection_tz_duckdb() -> None:
)


@pytest.mark.slow
def test_replace_time_zone_to_connection_tz_pyspark() -> None: # pragma: no cover
pytest.importorskip("pyspark")

Expand Down
7 changes: 6 additions & 1 deletion tests/expr_and_series/fill_nan_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
modin_constructor,
pandas_constructor,
)
from tests.utils import Constructor, ConstructorEager, assert_equal_data
from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data

NON_NULLABLE_CONSTRUCTORS = [
pandas_constructor,
Expand All @@ -31,6 +31,9 @@ def test_fill_nan(constructor: Constructor) -> None:
# no nan vs null distinction
expected = {"float": [-1.0, 1.0, 3.0], "float_na": [3.0, 1.0, 3.0]}
assert result.lazy().collect()["float_na"].null_count() == 0
elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,):
expected = {"float": [-1.0, 1.0, None], "float_na": [None, 1.0, None]}
assert result.lazy().collect()["float_na"].null_count() == 2
else:
expected = {"float": [-1.0, 1.0, None], "float_na": [3.0, 1.0, None]}
assert result.lazy().collect()["float_na"].null_count() == 1
Expand All @@ -46,5 +49,7 @@ def test_fill_nan_series(constructor_eager: ConstructorEager) -> None:
if any(constructor_eager is c for c in NON_NULLABLE_CONSTRUCTORS):
# no nan vs null distinction
assert_equal_data({"a": result}, {"a": [999.0, 1.0, 999.0]})
elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,):
assert_equal_data({"a": result}, {"a": [None, 1.0, None]})
else:
assert_equal_data({"a": result}, {"a": [999.0, 1.0, None]})
2 changes: 2 additions & 0 deletions tests/expr_and_series/fill_null_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def test_fill_null(constructor: Constructor) -> None:


def test_fill_null_w_aggregate(constructor: Constructor) -> None:
if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
pytest.skip()
data = {"a": [0.5, None, 2.0, 3.0, 4.5], "b": ["xx", "yy", "zz", None, "yy"]}
df = nw.from_native(constructor(data))

Expand Down
12 changes: 11 additions & 1 deletion tests/expr_and_series/first_last_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@
import pytest

import narwhals as nw
from tests.utils import POLARS_VERSION, PYARROW_VERSION, Constructor, assert_equal_data
from tests.utils import (
DUCKDB_VERSION,
POLARS_VERSION,
PYARROW_VERSION,
Constructor,
assert_equal_data,
)

if TYPE_CHECKING:
from narwhals.typing import PythonLiteral
Expand Down Expand Up @@ -95,6 +101,8 @@ def test_first_expr_over_order_by(
if "ibis" in str(constructor):
# https://github.com/ibis-project/ibis/issues/11656
request.applymarker(pytest.mark.xfail)
if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
pytest.skip()
frame = nw.from_native(
constructor(
{
Expand Down Expand Up @@ -139,6 +147,8 @@ def test_first_expr_over_order_by_partition_by(
if "ibis" in str(constructor):
# https://github.com/ibis-project/ibis/issues/11656
request.applymarker(pytest.mark.xfail)
if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
pytest.skip()
frame = nw.from_native(
constructor(
{"a": [1, 1, 2], "b": [4, 5, 6], "c": [None, 7, 8], "i": [1, None, 2]}
Expand Down
Loading
Loading