diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml index f4c9488df4..a94dcfaa2f 100644 --- a/.github/workflows/extremes.yml +++ b/.github/workflows/extremes.yml @@ -29,7 +29,7 @@ jobs: cache-suffix: min-versions-${{ matrix.python-version }} cache-dependency-glob: "pyproject.toml" - name: install-minimum-versions - run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.3 polars==0.20.4 numpy==1.19.3 pyarrow==13.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system + run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.3 polars==0.20.4 numpy==1.19.3 pyarrow==13.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.1 tzdata --system - name: install-reqs run: | uv pip install -e . --group tests --system @@ -44,9 +44,9 @@ jobs: echo "$DEPS" | grep 'pyarrow==13.0.0' echo "$DEPS" | grep 'scipy==1.6.0' echo "$DEPS" | grep 'scikit-learn==1.1.0' - echo "$DEPS" | grep 'duckdb==1.0' + echo "$DEPS" | grep 'duckdb==1.1' - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy] + run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],duckdb pretty_old_versions: strategy: @@ -66,7 +66,7 @@ jobs: cache-suffix: pretty-old-versions-${{ matrix.python-version }} cache-dependency-glob: "pyproject.toml" - name: install-pretty-old-versions - run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.4 numpy==1.19.3 pyarrow==14.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system + run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.4 numpy==1.19.3 pyarrow==14.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.2 tzdata --system - name: install-reqs run: uv pip install -e . --group tests --system - name: show-deps @@ -82,9 +82,9 @@ jobs: echo "$DEPS" | grep 'pyarrow==14.0.0' echo "$DEPS" | grep 'scipy==1.6.0' echo "$DEPS" | grep 'scikit-learn==1.1.0' - echo "$DEPS" | grep 'duckdb==1.0' + echo "$DEPS" | grep 'duckdb==1.2' - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy] + run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],duckdb not_so_old_versions: strategy: @@ -104,7 +104,7 @@ jobs: cache-suffix: not-so-old-versions-${{ matrix.python-version }} cache-dependency-glob: "pyproject.toml" - name: install-not-so-old-versions - run: uv pip install tox virtualenv setuptools pandas==2.0.3 polars==0.20.8 numpy==1.24.4 pyarrow==15.0.0 "pyarrow-stubs<17" scipy==1.8.0 scikit-learn==1.3.0 duckdb==1.0 dask[dataframe]==2024.10 tzdata --system + run: uv pip install tox virtualenv setuptools pandas==2.0.3 polars==0.20.8 numpy==1.24.4 pyarrow==15.0.0 "pyarrow-stubs<17" scipy==1.8.0 scikit-learn==1.3.0 duckdb==1.3 dask[dataframe]==2024.10 tzdata --system - name: install-reqs run: uv pip install -e . --group tests --system - name: show-deps @@ -119,9 +119,9 @@ jobs: echo "$DEPS" | grep 'scipy==1.8.0' echo "$DEPS" | grep 'scikit-learn==1.3.0' echo "$DEPS" | grep 'dask==2024.10' - echo "$DEPS" | grep 'duckdb==1.0' + echo "$DEPS" | grep 'duckdb==1.3' - name: Run pytest - run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],dask + run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],dask,duckdb nightlies: strategy: diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 4c42a73d2e..0736c1e99a 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -41,7 +41,6 @@ import pandas as pd import pyarrow as pa from duckdb import Expression - from duckdb.typing import DuckDBPyType from typing_extensions import Self, TypeIs from narwhals._compliant.typing import CompliantDataFrameAny @@ -49,6 +48,7 @@ from narwhals._duckdb.group_by import DuckDBGroupBy from narwhals._duckdb.namespace import DuckDBNamespace from narwhals._duckdb.series import DuckDBInterchangeSeries + from narwhals._duckdb.utils import duckdb_dtypes from narwhals._typing import _EagerAllowedImpl from narwhals._utils import _LimitedContext from narwhals.dataframe import LazyFrame @@ -76,7 +76,7 @@ def __init__( ) -> None: self._native_frame: duckdb.DuckDBPyRelation = df self._version = version - self._cached_native_schema: dict[str, DuckDBPyType] | None = None + self._cached_native_schema: dict[str, duckdb_dtypes.DuckDBPyType] | None = None self._cached_columns: list[str] | None = None if validate_backend_version: self._validate_backend_version() diff --git a/narwhals/_duckdb/expr_list.py b/narwhals/_duckdb/expr_list.py index 08a12b3f28..b726f2fc78 100644 --- a/narwhals/_duckdb/expr_list.py +++ b/narwhals/_duckdb/expr_list.py @@ -5,6 +5,7 @@ from narwhals._compliant import LazyExprNamespace from narwhals._compliant.any_namespace import ListNamespace from narwhals._duckdb.utils import F, lit, when +from narwhals._utils import requires if TYPE_CHECKING: from duckdb import Expression @@ -19,6 +20,7 @@ class DuckDBExprListNamespace( def len(self) -> DuckDBExpr: return self.compliant._with_elementwise(lambda expr: F("len", expr)) + @requires.backend_version((1, 3)) # bugged before 1.3 def unique(self) -> DuckDBExpr: def func(expr: Expression) -> Expression: expr_distinct = F("list_distinct", expr) diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py index 009404c428..c0e8f541dd 100644 --- a/narwhals/_duckdb/namespace.py +++ b/narwhals/_duckdb/namespace.py @@ -7,7 +7,6 @@ import duckdb from duckdb import CoalesceOperator, Expression -from duckdb.typing import BIGINT, VARCHAR from narwhals._duckdb.dataframe import DuckDBLazyFrame from narwhals._duckdb.expr import DuckDBExpr @@ -16,6 +15,7 @@ DeferredTimeZone, F, concat_str, + duckdb_dtypes, function, lit, narwhals_to_native_dtype, @@ -108,9 +108,9 @@ def func(df: DuckDBLazyFrame) -> list[Expression]: cols_separated = [ y for x in [ - (col.cast(VARCHAR),) + (col.cast(duckdb_dtypes.VARCHAR),) if i == len(cols) - 1 - else (col.cast(VARCHAR), lit(separator)) + else (col.cast(duckdb_dtypes.VARCHAR), lit(separator)) for i, col in enumerate(cols) ] for y in x @@ -130,7 +130,9 @@ def func(cols: Iterable[Expression]) -> Expression: cols = list(cols) return reduce( operator.add, (CoalesceOperator(col, lit(0)) for col in cols) - ) / reduce(operator.add, (col.isnotnull().cast(BIGINT) for col in cols)) + ) / reduce( + operator.add, (col.isnotnull().cast(duckdb_dtypes.BIGINT) for col in cols) + ) return self._expr._from_elementwise_horizontal_op(func, *exprs) diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index 243c9bf0bc..a2576af249 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -4,9 +4,13 @@ from typing import TYPE_CHECKING import duckdb -import duckdb.typing as duckdb_dtypes from duckdb import Expression -from duckdb.typing import DuckDBPyType + +try: + import duckdb.sqltypes as duckdb_dtypes +except ModuleNotFoundError: + # DuckDB pre 1.3 + import duckdb.typing as duckdb_dtypes from narwhals._utils import Version, isinstance_or_issubclass, zip_strict from narwhals.exceptions import ColumnNotFoundError @@ -131,7 +135,9 @@ def time_zone(self) -> str: def native_to_narwhals_dtype( - duckdb_dtype: DuckDBPyType, version: Version, deferred_time_zone: DeferredTimeZone + duckdb_dtype: duckdb_dtypes.DuckDBPyType, + version: Version, + deferred_time_zone: DeferredTimeZone, ) -> DType: duckdb_dtype_id = duckdb_dtype.id dtypes = version.dtypes @@ -216,7 +222,7 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version) dtypes = Version.MAIN.dtypes -NW_TO_DUCKDB_DTYPES: Mapping[type[DType], DuckDBPyType] = { +NW_TO_DUCKDB_DTYPES: Mapping[type[DType], duckdb_dtypes.DuckDBPyType] = { dtypes.Float64: duckdb_dtypes.DOUBLE, dtypes.Float32: duckdb_dtypes.FLOAT, dtypes.Binary: duckdb_dtypes.BLOB, @@ -228,14 +234,14 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version) dtypes.Int16: duckdb_dtypes.SMALLINT, dtypes.Int32: duckdb_dtypes.INTEGER, dtypes.Int64: duckdb_dtypes.BIGINT, - dtypes.Int128: DuckDBPyType("INT128"), + dtypes.Int128: duckdb_dtypes.HUGEINT, dtypes.UInt8: duckdb_dtypes.UTINYINT, dtypes.UInt16: duckdb_dtypes.USMALLINT, dtypes.UInt32: duckdb_dtypes.UINTEGER, dtypes.UInt64: duckdb_dtypes.UBIGINT, - dtypes.UInt128: DuckDBPyType("UINT128"), + dtypes.UInt128: duckdb_dtypes.UHUGEINT, } -TIME_UNIT_TO_TIMESTAMP: Mapping[TimeUnit, DuckDBPyType] = { +TIME_UNIT_TO_TIMESTAMP: Mapping[TimeUnit, duckdb_dtypes.DuckDBPyType] = { "s": duckdb_dtypes.TIMESTAMP_S, "ms": duckdb_dtypes.TIMESTAMP_MS, "us": duckdb_dtypes.TIMESTAMP, @@ -246,7 +252,7 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version) def narwhals_to_native_dtype( # noqa: PLR0912, C901 dtype: IntoDType, version: Version, deferred_time_zone: DeferredTimeZone -) -> DuckDBPyType: +) -> duckdb_dtypes.DuckDBPyType: dtypes = version.dtypes base_type = dtype.base_type() if duckdb_type := NW_TO_DUCKDB_DTYPES.get(base_type): @@ -256,7 +262,7 @@ def narwhals_to_native_dtype( # noqa: PLR0912, C901 msg = "Converting to Enum is not supported in narwhals.stable.v1" raise NotImplementedError(msg) if isinstance(dtype, dtypes.Enum): - return DuckDBPyType(f"ENUM{dtype.categories!r}") + return duckdb_dtypes.DuckDBPyType(f"ENUM{dtype.categories!r}") msg = "Can not cast / initialize Enum without categories present" raise ValueError(msg) if isinstance_or_issubclass(dtype, dtypes.Datetime): @@ -291,7 +297,7 @@ def narwhals_to_native_dtype( # noqa: PLR0912, C901 nw_inner = nw_inner.inner duckdb_inner = narwhals_to_native_dtype(nw_inner, version, deferred_time_zone) duckdb_shape_fmt = "".join(f"[{item}]" for item in dtype.shape) - return DuckDBPyType(f"{duckdb_inner}{duckdb_shape_fmt}") + return duckdb_dtypes.DuckDBPyType(f"{duckdb_inner}{duckdb_shape_fmt}") if issubclass(base_type, UNSUPPORTED_DTYPES): msg = f"Converting to {base_type.__name__} dtype is not supported for DuckDB." raise NotImplementedError(msg) @@ -378,19 +384,39 @@ def function(name: str, *args: Expression) -> Expression: if name == "isnull": return args[0].isnull() if name == "count_distinct": - try: - from duckdb import SQLExpression - except ModuleNotFoundError as exc: # pragma: no cover - msg = f"DuckDB>=1.3.0 is required for this operation. Found: DuckDB {duckdb.__version__}" - raise NotImplementedError(msg) from exc - return SQLExpression(f"count(distinct {args[0]})") + return sql_expression(f"count(distinct {args[0]})") return F(name, *args) def sql_expression(expr: str) -> Expression: try: from duckdb import SQLExpression - except ModuleNotFoundError as exc: # pragma: no cover + except ImportError as exc: # pragma: no cover msg = f"DuckDB>=1.3.0 is required for this operation. Found: DuckDB {duckdb.__version__}" raise NotImplementedError(msg) from exc return SQLExpression(expr) + + +__all__ = [ + "UNITS_DICT", + "DeferredTimeZone", + "F", + "catch_duckdb_exception", + "col", + "concat_str", + "duckdb_dtypes", + "evaluate_exprs", + "fetch_rel_time_zone", + "function", + "generate_order_by_sql", + "generate_partition_by_sql", + "join_column_names", + "lambda_expr", + "lit", + "narwhals_to_native_dtype", + "native_to_narwhals_dtype", + "parse_into_expression", + "sql_expression", + "when", + "window_expression", +] diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 1bd94fe547..3647717812 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -604,7 +604,7 @@ def _backend_version(self) -> tuple[int, ...]: Implementation.PYSPARK_CONNECT: (3, 5), Implementation.POLARS: (0, 20, 4), Implementation.DASK: (2024, 8), - Implementation.DUCKDB: (1,), + Implementation.DUCKDB: (1, 1), Implementation.IBIS: (6,), Implementation.SQLFRAME: (3, 22, 0), } diff --git a/pyproject.toml b/pyproject.toml index eb1b3bdc4f..da62cde2e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ pyspark = ["pyspark>=3.5.0"] pyspark-connect = ["pyspark[connect]>=3.5.0"] polars = ["polars>=0.20.4"] dask = ["dask[dataframe]>=2024.8"] -duckdb = ["duckdb>=1.0"] +duckdb = ["duckdb>=1.1"] ibis = ["ibis-framework>=6.0.0", "rich", "packaging", "pyarrow_hotfix"] sqlframe = ["sqlframe>=3.22.0,!=3.39.3"] diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py index 4ff9134c21..d63384647f 100644 --- a/tests/dtypes_test.py +++ b/tests/dtypes_test.py @@ -517,6 +517,7 @@ def test_datetime_w_tz_duckdb() -> None: assert result["b"] == nw.List(nw.List(nw.Datetime("us", "Asia/Kathmandu"))) +@pytest.mark.slow def test_datetime_w_tz_pyspark() -> None: # pragma: no cover pytest.importorskip("pyspark") session = pyspark_session() diff --git a/tests/expr_and_series/dt/convert_time_zone_test.py b/tests/expr_and_series/dt/convert_time_zone_test.py index 40e5f08d77..65d1a6e3b6 100644 --- a/tests/expr_and_series/dt/convert_time_zone_test.py +++ b/tests/expr_and_series/dt/convert_time_zone_test.py @@ -154,6 +154,7 @@ def test_convert_time_zone_to_connection_tz_duckdb() -> None: ) +@pytest.mark.slow def test_convert_time_zone_to_connection_tz_pyspark() -> None: # pragma: no cover pytest.importorskip("pyspark") diff --git a/tests/expr_and_series/dt/replace_time_zone_test.py b/tests/expr_and_series/dt/replace_time_zone_test.py index d0e90cdadd..1c9dff7d59 100644 --- a/tests/expr_and_series/dt/replace_time_zone_test.py +++ b/tests/expr_and_series/dt/replace_time_zone_test.py @@ -142,6 +142,7 @@ def test_replace_time_zone_to_connection_tz_duckdb() -> None: ) +@pytest.mark.slow def test_replace_time_zone_to_connection_tz_pyspark() -> None: # pragma: no cover pytest.importorskip("pyspark") diff --git a/tests/expr_and_series/fill_nan_test.py b/tests/expr_and_series/fill_nan_test.py index b1c2b4c228..82ba374013 100644 --- a/tests/expr_and_series/fill_nan_test.py +++ b/tests/expr_and_series/fill_nan_test.py @@ -7,7 +7,7 @@ modin_constructor, pandas_constructor, ) -from tests.utils import Constructor, ConstructorEager, assert_equal_data +from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data NON_NULLABLE_CONSTRUCTORS = [ pandas_constructor, @@ -31,6 +31,9 @@ def test_fill_nan(constructor: Constructor) -> None: # no nan vs null distinction expected = {"float": [-1.0, 1.0, 3.0], "float_na": [3.0, 1.0, 3.0]} assert result.lazy().collect()["float_na"].null_count() == 0 + elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,): + expected = {"float": [-1.0, 1.0, None], "float_na": [None, 1.0, None]} + assert result.lazy().collect()["float_na"].null_count() == 2 else: expected = {"float": [-1.0, 1.0, None], "float_na": [3.0, 1.0, None]} assert result.lazy().collect()["float_na"].null_count() == 1 @@ -46,5 +49,7 @@ def test_fill_nan_series(constructor_eager: ConstructorEager) -> None: if any(constructor_eager is c for c in NON_NULLABLE_CONSTRUCTORS): # no nan vs null distinction assert_equal_data({"a": result}, {"a": [999.0, 1.0, 999.0]}) + elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,): + assert_equal_data({"a": result}, {"a": [None, 1.0, None]}) else: assert_equal_data({"a": result}, {"a": [999.0, 1.0, None]}) diff --git a/tests/expr_and_series/fill_null_test.py b/tests/expr_and_series/fill_null_test.py index 014e92ccfb..139c862fff 100644 --- a/tests/expr_and_series/fill_null_test.py +++ b/tests/expr_and_series/fill_null_test.py @@ -34,6 +34,8 @@ def test_fill_null(constructor: Constructor) -> None: def test_fill_null_w_aggregate(constructor: Constructor) -> None: + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"a": [0.5, None, 2.0, 3.0, 4.5], "b": ["xx", "yy", "zz", None, "yy"]} df = nw.from_native(constructor(data)) diff --git a/tests/expr_and_series/first_last_test.py b/tests/expr_and_series/first_last_test.py index 67a4ff843a..c1e35673ec 100644 --- a/tests/expr_and_series/first_last_test.py +++ b/tests/expr_and_series/first_last_test.py @@ -5,7 +5,13 @@ import pytest import narwhals as nw -from tests.utils import POLARS_VERSION, PYARROW_VERSION, Constructor, assert_equal_data +from tests.utils import ( + DUCKDB_VERSION, + POLARS_VERSION, + PYARROW_VERSION, + Constructor, + assert_equal_data, +) if TYPE_CHECKING: from narwhals.typing import PythonLiteral @@ -95,6 +101,8 @@ def test_first_expr_over_order_by( if "ibis" in str(constructor): # https://github.com/ibis-project/ibis/issues/11656 request.applymarker(pytest.mark.xfail) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() frame = nw.from_native( constructor( { @@ -139,6 +147,8 @@ def test_first_expr_over_order_by_partition_by( if "ibis" in str(constructor): # https://github.com/ibis-project/ibis/issues/11656 request.applymarker(pytest.mark.xfail) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() frame = nw.from_native( constructor( {"a": [1, 1, 2], "b": [4, 5, 6], "c": [None, 7, 8], "i": [1, None, 2]} diff --git a/tests/expr_and_series/is_close_test.py b/tests/expr_and_series/is_close_test.py index c5bb4df551..0835357ceb 100644 --- a/tests/expr_and_series/is_close_test.py +++ b/tests/expr_and_series/is_close_test.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import pytest @@ -18,7 +18,7 @@ modin_constructor, pandas_constructor, ) -from tests.utils import Constructor, ConstructorEager, assert_equal_data +from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data if TYPE_CHECKING: from narwhals.typing import NumericLiteral @@ -32,7 +32,7 @@ NULL_PLACEHOLDER, NAN_PLACEHOLDER = 9999.0, -1.0 INF_POS, INF_NEG = float("inf"), float("-inf") -data = { +data: dict[str, Any] = { "x": [1.001, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF_POS, INF_NEG, INF_POS], "y": [1.005, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF_POS, 3.0, INF_NEG], "non_numeric": list("number"), @@ -109,7 +109,7 @@ def test_is_close_series_with_series( rel_tol: float, *, nans_equal: bool, - expected: list[float], + expected: list[Any], ) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) x, y = df["x"], df["y"] @@ -122,6 +122,11 @@ def test_is_close_series_with_series( if constructor_eager in NON_NULLABLE_CONSTRUCTORS: expected = [v if v is not None else nans_equal for v in expected] + elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,): + expected = [ + v if data["y"][i] not in {NULL_PLACEHOLDER, NAN_PLACEHOLDER} else None + for i, v in enumerate(expected) + ] assert_equal_data({"result": result}, {"result": expected}) @@ -133,7 +138,7 @@ def test_is_close_series_with_scalar( rel_tol: float, *, nans_equal: bool, - expected: list[float], + expected: list[Any], ) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) y = df["y"] @@ -145,6 +150,11 @@ def test_is_close_series_with_scalar( if constructor_eager in NON_NULLABLE_CONSTRUCTORS: expected = [v if v is not None else False for v in expected] + elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,): + expected = [ + v if data["y"][i] not in {NULL_PLACEHOLDER, NAN_PLACEHOLDER} else None + for i, v in enumerate(expected) + ] assert_equal_data({"result": result}, {"result": expected}) @@ -157,7 +167,7 @@ def test_is_close_expr_with_expr( rel_tol: float, *, nans_equal: bool, - expected: list[float], + expected: list[Any], ) -> None: if "sqlframe" in str(constructor): # TODO(FBruzzesi): Figure out a MRE and report upstream @@ -185,6 +195,11 @@ def test_is_close_expr_with_expr( ) if constructor in NON_NULLABLE_CONSTRUCTORS: expected = [v if v is not None else nans_equal for v in expected] + elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,): + expected = [ + v if data["y"][i] not in {NULL_PLACEHOLDER, NAN_PLACEHOLDER} else None + for i, v in enumerate(expected) + ] assert_equal_data(result, {"idx": data["idx"], "result": expected}) @@ -197,7 +212,7 @@ def test_is_close_expr_with_scalar( rel_tol: float, *, nans_equal: bool, - expected: list[float], + expected: list[Any], ) -> None: if "sqlframe" in str(constructor): # TODO(FBruzzesi): Figure out a MRE and report upstream @@ -221,4 +236,9 @@ def test_is_close_expr_with_scalar( ) if constructor in NON_NULLABLE_CONSTRUCTORS: expected = [v if v is not None else False for v in expected] + elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,): + expected = [ + v if data["y"][i] not in {NULL_PLACEHOLDER, NAN_PLACEHOLDER} else None + for i, v in enumerate(expected) + ] assert_equal_data(result, {"idx": data["idx"], "result": expected}) diff --git a/tests/expr_and_series/is_nan_test.py b/tests/expr_and_series/is_nan_test.py index 8027065a35..92af6446d0 100644 --- a/tests/expr_and_series/is_nan_test.py +++ b/tests/expr_and_series/is_nan_test.py @@ -14,7 +14,7 @@ modin_constructor, pandas_constructor, ) -from tests.utils import Constructor, ConstructorEager, assert_equal_data +from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data NON_NULLABLE_CONSTRUCTORS = [ pandas_constructor, @@ -43,6 +43,13 @@ def test_nan(constructor: Constructor) -> None: "float": [False, False, True], "float_na": [True, False, True], } + elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,): + # NaN values are coerced into NA for nullable datatypes by default + expected = { + "int": [False, False, None], + "float": [False, False, None], + "float_na": [None, False, None], + } else: # Null are preserved and should be differentiated for nullable datatypes expected = { @@ -82,6 +89,13 @@ def test_nan_series(constructor_eager: ConstructorEager) -> None: "float": [False, False, True], "float_na": [True, False, True], } + elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,): + # NaN values are coerced into NA for nullable datatypes by default + expected = { + "int": [False, False, None], + "float": [False, False, None], + "float_na": [None, False, None], + } else: # Null are preserved and should be differentiated for nullable datatypes expected = { diff --git a/tests/expr_and_series/list/unique_test.py b/tests/expr_and_series/list/unique_test.py index 057843d9e3..3d7c9dd039 100644 --- a/tests/expr_and_series/list/unique_test.py +++ b/tests/expr_and_series/list/unique_test.py @@ -5,6 +5,7 @@ import pytest import narwhals as nw +from tests.utils import DUCKDB_VERSION if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager @@ -19,6 +20,8 @@ def test_unique_expr(request: pytest.FixtureRequest, constructor: Constructor) - for backend in ("dask", "modin", "cudf", "pyarrow", "pandas") ): request.applymarker(pytest.mark.xfail) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() result = ( nw.from_native(constructor(data)) .select(nw.col("a").cast(nw.List(nw.Int32())).list.unique()) diff --git a/tests/expr_and_series/n_unique_test.py b/tests/expr_and_series/n_unique_test.py index 3b9e75d343..66af2ac705 100644 --- a/tests/expr_and_series/n_unique_test.py +++ b/tests/expr_and_series/n_unique_test.py @@ -3,12 +3,14 @@ import pytest import narwhals as nw -from tests.utils import Constructor, ConstructorEager, assert_equal_data +from tests.utils import DUCKDB_VERSION, Constructor, ConstructorEager, assert_equal_data data = {"a": [1.0, None, None, 3.0], "b": [1.0, None, 4.0, 5.0]} def test_n_unique(constructor: Constructor) -> None: + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() df = nw.from_native(constructor(data)) result = df.select(nw.all().n_unique()) expected = {"a": [3], "b": [4]} @@ -22,6 +24,8 @@ def test_n_unique_over(constructor: Constructor, request: pytest.FixtureRequest) if "pyspark" in str(constructor) and "sqlframe" not in str(constructor): # "Distinct window functions are not supported" request.applymarker(pytest.mark.xfail) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"a": [1, None, None, 1, 2, 2, 2, None, 3], "b": [1, 1, 1, 1, 1, 1, 1, 2, 2]} df = nw.from_native(constructor(data)) result = df.with_columns( diff --git a/tests/expr_and_series/over_test.py b/tests/expr_and_series/over_test.py index b32368f6e4..91612d1d2a 100644 --- a/tests/expr_and_series/over_test.py +++ b/tests/expr_and_series/over_test.py @@ -454,6 +454,8 @@ def test_over_quantile(constructor: Constructor, request: pytest.FixtureRequest) if any(x in str(constructor) for x in ("pyarrow_table", "pyspark", "cudf")): # cudf: https://github.com/rapidsai/cudf/issues/18159 request.applymarker(pytest.mark.xfail) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"a": [1, 2, 3, 4, 5, 6], "b": ["x", "x", "x", "y", "y", "y"]} diff --git a/tests/expr_and_series/reduction_test.py b/tests/expr_and_series/reduction_test.py index 7d5149e551..2c672ce341 100644 --- a/tests/expr_and_series/reduction_test.py +++ b/tests/expr_and_series/reduction_test.py @@ -96,9 +96,10 @@ def test_empty_scalar_reduction_with_columns( ) -> None: if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): pytest.skip() - if any( - x in str(constructor) for x in ("duckdb", "sqlframe", "ibis") - ) and DUCKDB_VERSION >= (1, 4): + if any(x in str(constructor) for x in ("sqlframe", "ibis")) and DUCKDB_VERSION >= ( + 1, + 4, + ): request.applymarker(pytest.mark.xfail) from itertools import chain diff --git a/tests/expr_and_series/skew_test.py b/tests/expr_and_series/skew_test.py index 8ac8820711..6bfd8f0d68 100644 --- a/tests/expr_and_series/skew_test.py +++ b/tests/expr_and_series/skew_test.py @@ -3,7 +3,7 @@ import pytest import narwhals as nw -from tests.utils import Constructor, ConstructorEager, assert_equal_data +from tests.utils import DUCKDB_VERSION, Constructor, ConstructorEager, assert_equal_data @pytest.mark.parametrize( @@ -44,6 +44,8 @@ def test_skew_expr( if "ibis" in str(constructor): # https://github.com/ibis-project/ibis/issues/11176 request.applymarker(pytest.mark.xfail) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() if "pyspark" in str(constructor) and int(request.node.callspec.id[-1]) == 0: # Can not infer schema from empty dataset. diff --git a/tests/expr_and_series/str/zfill_test.py b/tests/expr_and_series/str/zfill_test.py index 7ecc3d36fd..78f59cecb1 100644 --- a/tests/expr_and_series/str/zfill_test.py +++ b/tests/expr_and_series/str/zfill_test.py @@ -17,7 +17,7 @@ def test_str_zfill(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if uses_pyarrow_backend(constructor): + if uses_pyarrow_backend(constructor) and PANDAS_VERSION < (3,): reason = ( "pandas with pyarrow backend doesn't support str.zfill, see " "https://github.com/pandas-dev/pandas/issues/61485" @@ -43,7 +43,7 @@ def test_str_zfill(request: pytest.FixtureRequest, constructor: Constructor) -> def test_str_zfill_series( request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: - if uses_pyarrow_backend(constructor_eager): + if uses_pyarrow_backend(constructor_eager) and PANDAS_VERSION < (3,): reason = ( "pandas with pyarrow backend doesn't support str.zfill, see " "https://github.com/pandas-dev/pandas/issues/61485" diff --git a/tests/expr_and_series/unary_test.py b/tests/expr_and_series/unary_test.py index 3af34b6333..038136a902 100644 --- a/tests/expr_and_series/unary_test.py +++ b/tests/expr_and_series/unary_test.py @@ -5,12 +5,14 @@ import pytest import narwhals as nw -from tests.utils import Constructor, ConstructorEager, assert_equal_data +from tests.utils import DUCKDB_VERSION, Constructor, ConstructorEager, assert_equal_data def test_unary(constructor: Constructor, request: pytest.FixtureRequest) -> None: if "ibis" in str(constructor): request.applymarker(pytest.mark.xfail) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"a": [1, 3, 2], "b": [4, 4, 6], "c": [7.0, 8.0, None], "z": [7.0, 8.0, 9.0]} result = nw.from_native(constructor(data)).select( @@ -73,6 +75,8 @@ def test_unary_two_elements( ) -> None: if "ibis" in str(constructor): request.applymarker(pytest.mark.xfail) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"a": [1, 2], "b": [2, 10], "c": [2.0, None]} result = nw.from_native(constructor(data)).select( a_nunique=nw.col("a").n_unique(), @@ -122,6 +126,8 @@ def test_unary_one_element( request.applymarker(pytest.mark.xfail) if "ibis" in str(constructor): request.applymarker(pytest.mark.xfail) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"a": [1], "b": [2], "c": [None]} # Dask runs into a divide by zero RuntimeWarning for 1 element skew. context = ( diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index cfcaad680c..46b07c66d7 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -7,7 +7,7 @@ import narwhals as nw from narwhals.exceptions import InvalidOperationError, MultiOutputExpressionError -from tests.utils import Constructor, ConstructorEager, assert_equal_data +from tests.utils import DUCKDB_VERSION, Constructor, ConstructorEager, assert_equal_data if TYPE_CHECKING: from narwhals.typing import _1DArray @@ -198,6 +198,8 @@ def test_when_then_otherwise_aggregate_with_columns( expected: list[int], constructor: Constructor, ) -> None: + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() df = nw.from_native(constructor({"a": [1, 2, 3], "b": [4, 5, 6]})) expr = nw.when(condition).then(then).otherwise(otherwise) result = df.with_columns(a_when=expr) diff --git a/tests/frame/group_by_test.py b/tests/frame/group_by_test.py index 20e8153c9e..0f99ca583b 100644 --- a/tests/frame/group_by_test.py +++ b/tests/frame/group_by_test.py @@ -13,6 +13,7 @@ import narwhals as nw from narwhals.exceptions import DuplicateError, InvalidOperationError from tests.utils import ( + DUCKDB_VERSION, PANDAS_VERSION, POLARS_VERSION, PYARROW_VERSION, @@ -134,6 +135,8 @@ def test_group_by_depth_1_agg( pytest.skip( "Known issue with variance calculation in pandas 2.0.x with pyarrow backend in groupby operations" ) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"a": [1, 1, 1, 2], "b": [1, None, 2, 3]} expr = getattr(nw.col("b"), attr)() result = nw.from_native(constructor(data)).group_by("a").agg(expr).sort("a") @@ -204,6 +207,8 @@ def test_group_by_median(constructor: Constructor) -> None: def test_group_by_n_unique_w_missing(constructor: Constructor) -> None: + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"a": [1, 1, 2], "b": [4, None, 5], "c": [None, None, 7], "d": [1, 1, 3]} result = ( nw.from_native(constructor(data)) @@ -391,6 +396,8 @@ def test_all_kind_of_aggs( pytest.skip( "Pandas < 1.4.0 does not support multiple aggregations with the same column" ) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() df = nw.from_native(constructor({"a": [1, 1, 1, 2, 2, 2], "b": [4, 5, 6, 0, 5, 5]})) result = ( df.group_by("a") @@ -530,6 +537,8 @@ def test_group_by_raise_if_not_preserves_length( def test_group_by_window(constructor: Constructor) -> None: + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"a": [1, 2, 2, None], "b": [1, 1, 2, 2], "x": [1, 2, 3, 4]} df = nw.from_native(constructor(data)) result = ( diff --git a/tests/frame/top_k_test.py b/tests/frame/top_k_test.py index d46961b21f..d0ba228df0 100644 --- a/tests/frame/top_k_test.py +++ b/tests/frame/top_k_test.py @@ -3,13 +3,15 @@ import pytest import narwhals as nw -from tests.utils import POLARS_VERSION, Constructor, assert_equal_data +from tests.utils import DUCKDB_VERSION, POLARS_VERSION, Constructor, assert_equal_data def test_top_k(constructor: Constructor) -> None: if "polars" in str(constructor) and POLARS_VERSION < (1, 0): # old polars versions do not sort nulls last pytest.skip() + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"a": ["a", "f", "a", "d", "b", "c"], "b c": [None, None, 2, 3, 6, 1]} df = nw.from_native(constructor(data)) result = df.top_k(4, by="b c") @@ -25,6 +27,8 @@ def test_top_k_by_multiple(constructor: Constructor) -> None: if "polars" in str(constructor) and POLARS_VERSION < (0, 20, 22): # bug in old version pytest.skip() + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = { "a": ["a", "f", "a", "d", "b", "c"], "b": [2, 2, 2, 3, 1, 1], diff --git a/tests/frame/unique_test.py b/tests/frame/unique_test.py index c80e4a677a..0aa3f175f0 100644 --- a/tests/frame/unique_test.py +++ b/tests/frame/unique_test.py @@ -47,6 +47,8 @@ def test_unique_first_last( if "dask" in str(constructor): # https://github.com/dask/dask/issues/12073 request.applymarker(pytest.mark.xfail) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"i": [0, 1, None, 2], "a": [1, 3, 2, 1], "b": [4, 4, 4, 6]} df_raw = constructor(data) df = nw.from_native(df_raw) @@ -70,6 +72,8 @@ def test_unique_first_last_no_subset( keep: Literal["first", "last"], expected: dict[str, list[float]], ) -> None: + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"i": [0, 1, 1, 2], "b": [4, 4, 4, 6]} df_raw = constructor(data) df = nw.from_native(df_raw) @@ -139,6 +143,8 @@ def test_unique_invalid_keep(constructor: Constructor) -> None: @pytest.mark.filterwarnings("ignore:.*backwards-compatibility:UserWarning") def test_unique_none(constructor: Constructor) -> None: + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() df_raw = constructor(data) df = nw.from_native(df_raw) @@ -154,6 +160,8 @@ def test_unique_3069(constructor: Constructor, request: pytest.FixtureRequest) - if "ibis" in str(constructor): # https://github.com/ibis-project/ibis/issues/11591 request.applymarker(pytest.mark.xfail) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"name": ["a", "b", "c"], "group": ["d", "e", "f"], "value": [1, 2, 3]} df = nw.from_native(constructor(data)) unique_to_get = "group" diff --git a/tests/frame/with_row_index_test.py b/tests/frame/with_row_index_test.py index c9d5f59fe0..85f3447e6e 100644 --- a/tests/frame/with_row_index_test.py +++ b/tests/frame/with_row_index_test.py @@ -5,7 +5,13 @@ import pytest import narwhals as nw -from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data +from tests.utils import ( + DUCKDB_VERSION, + PANDAS_VERSION, + Constructor, + ConstructorEager, + assert_equal_data, +) if TYPE_CHECKING: from collections.abc import Sequence @@ -36,6 +42,8 @@ def test_with_row_index_lazy( ): # pragma: no cover reason = "ValueError: first not supported for non-numeric data." pytest.skip(reason=reason) + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() result = ( nw.from_native(constructor(data)) diff --git a/tests/utils.py b/tests/utils.py index e32ad0bbbd..4e06c35063 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -121,7 +121,7 @@ def assert_equal_data(result: Any, expected: Mapping[str, Any]) -> None: elif isinstance(lhs, float) and math.isnan(lhs): are_equivalent_values = rhs is None or math.isnan(rhs) elif isinstance(rhs, float) and math.isnan(rhs): - are_equivalent_values = lhs is None or math.isnan(lhs) + are_equivalent_values = lhs is None or pd.isna(lhs) or math.isnan(lhs) elif lhs is None: are_equivalent_values = rhs is None elif isinstance(lhs, list) and isinstance(rhs, list): diff --git a/tests/v1_test.py b/tests/v1_test.py index 5ef46423ea..37d5dc1779 100644 --- a/tests/v1_test.py +++ b/tests/v1_test.py @@ -34,6 +34,7 @@ ) from narwhals.utils import Version from tests.utils import ( + DUCKDB_VERSION, PANDAS_VERSION, POLARS_VERSION, PYARROW_VERSION, @@ -421,6 +422,8 @@ def test_all_horizontal() -> None: def test_with_row_index(constructor: Constructor) -> None: + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): + pytest.skip() data = {"abc": ["foo", "bars"], "xyz": [100, 200], "const": [42, 42]} frame = nw_v1.from_native(constructor(data))