diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml
index f4c9488df4..a94dcfaa2f 100644
--- a/.github/workflows/extremes.yml
+++ b/.github/workflows/extremes.yml
@@ -29,7 +29,7 @@ jobs:
           cache-suffix: min-versions-${{ matrix.python-version }}
           cache-dependency-glob: "pyproject.toml"
       - name: install-minimum-versions
-        run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.3 polars==0.20.4 numpy==1.19.3 pyarrow==13.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
+        run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.3 polars==0.20.4 numpy==1.19.3 pyarrow==13.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.1 tzdata --system
       - name: install-reqs
         run: |
           uv pip install -e . --group tests --system
@@ -44,9 +44,9 @@ jobs:
           echo "$DEPS" | grep 'pyarrow==13.0.0'
           echo "$DEPS" | grep 'scipy==1.6.0'
           echo "$DEPS" | grep 'scikit-learn==1.1.0'
-          echo "$DEPS" | grep 'duckdb==1.0'
+          echo "$DEPS" | grep 'duckdb==1.1'
       - name: Run pytest
-        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy]
+        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],duckdb
 
   pretty_old_versions:
     strategy:
@@ -66,7 +66,7 @@ jobs:
           cache-suffix: pretty-old-versions-${{ matrix.python-version }}
           cache-dependency-glob: "pyproject.toml"
       - name: install-pretty-old-versions
-        run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.4 numpy==1.19.3 pyarrow==14.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
+        run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.4 numpy==1.19.3 pyarrow==14.0.0 "pyarrow-stubs<17" scipy==1.6.0 scikit-learn==1.1.0 duckdb==1.2 tzdata --system
       - name: install-reqs
         run: uv pip install -e . --group tests --system
       - name: show-deps
@@ -82,9 +82,9 @@ jobs:
           echo "$DEPS" | grep 'pyarrow==14.0.0'
           echo "$DEPS" | grep 'scipy==1.6.0'
           echo "$DEPS" | grep 'scikit-learn==1.1.0'
-          echo "$DEPS" | grep 'duckdb==1.0'
+          echo "$DEPS" | grep 'duckdb==1.2'
       - name: Run pytest
-        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy]
+        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],duckdb
 
   not_so_old_versions:
     strategy:
@@ -104,7 +104,7 @@ jobs:
           cache-suffix: not-so-old-versions-${{ matrix.python-version }}
           cache-dependency-glob: "pyproject.toml"
       - name: install-not-so-old-versions
-        run: uv pip install tox virtualenv setuptools pandas==2.0.3 polars==0.20.8 numpy==1.24.4 pyarrow==15.0.0 "pyarrow-stubs<17" scipy==1.8.0 scikit-learn==1.3.0 duckdb==1.0 dask[dataframe]==2024.10 tzdata --system
+        run: uv pip install tox virtualenv setuptools pandas==2.0.3 polars==0.20.8 numpy==1.24.4 pyarrow==15.0.0 "pyarrow-stubs<17" scipy==1.8.0 scikit-learn==1.3.0 duckdb==1.3 dask[dataframe]==2024.10 tzdata --system
       - name: install-reqs
         run: uv pip install -e . --group tests --system
       - name: show-deps
@@ -119,9 +119,9 @@ jobs:
           echo "$DEPS" | grep 'scipy==1.8.0'
           echo "$DEPS" | grep 'scikit-learn==1.3.0'
           echo "$DEPS" | grep 'dask==2024.10'
-          echo "$DEPS" | grep 'duckdb==1.0'
+          echo "$DEPS" | grep 'duckdb==1.3'
       - name: Run pytest
-        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],dask
+        run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],dask,duckdb
 
   nightlies:
     strategy:
diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py
index 4c42a73d2e..0736c1e99a 100644
--- a/narwhals/_duckdb/dataframe.py
+++ b/narwhals/_duckdb/dataframe.py
@@ -41,7 +41,6 @@
     import pandas as pd
     import pyarrow as pa
     from duckdb import Expression
-    from duckdb.typing import DuckDBPyType
     from typing_extensions import Self, TypeIs
 
     from narwhals._compliant.typing import CompliantDataFrameAny
@@ -49,6 +48,7 @@
     from narwhals._duckdb.group_by import DuckDBGroupBy
     from narwhals._duckdb.namespace import DuckDBNamespace
     from narwhals._duckdb.series import DuckDBInterchangeSeries
+    from narwhals._duckdb.utils import duckdb_dtypes
     from narwhals._typing import _EagerAllowedImpl
     from narwhals._utils import _LimitedContext
     from narwhals.dataframe import LazyFrame
@@ -76,7 +76,7 @@ def __init__(
     ) -> None:
         self._native_frame: duckdb.DuckDBPyRelation = df
         self._version = version
-        self._cached_native_schema: dict[str, DuckDBPyType] | None = None
+        self._cached_native_schema: dict[str, duckdb_dtypes.DuckDBPyType] | None = None
         self._cached_columns: list[str] | None = None
         if validate_backend_version:
             self._validate_backend_version()
diff --git a/narwhals/_duckdb/expr_list.py b/narwhals/_duckdb/expr_list.py
index 08a12b3f28..b726f2fc78 100644
--- a/narwhals/_duckdb/expr_list.py
+++ b/narwhals/_duckdb/expr_list.py
@@ -5,6 +5,7 @@
 from narwhals._compliant import LazyExprNamespace
 from narwhals._compliant.any_namespace import ListNamespace
 from narwhals._duckdb.utils import F, lit, when
+from narwhals._utils import requires
 
 if TYPE_CHECKING:
     from duckdb import Expression
@@ -19,6 +20,7 @@ class DuckDBExprListNamespace(
     def len(self) -> DuckDBExpr:
         return self.compliant._with_elementwise(lambda expr: F("len", expr))
 
+    @requires.backend_version((1, 3))  # bugged before 1.3
     def unique(self) -> DuckDBExpr:
         def func(expr: Expression) -> Expression:
             expr_distinct = F("list_distinct", expr)
diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py
index 009404c428..c0e8f541dd 100644
--- a/narwhals/_duckdb/namespace.py
+++ b/narwhals/_duckdb/namespace.py
@@ -7,7 +7,6 @@
 
 import duckdb
 from duckdb import CoalesceOperator, Expression
-from duckdb.typing import BIGINT, VARCHAR
 
 from narwhals._duckdb.dataframe import DuckDBLazyFrame
 from narwhals._duckdb.expr import DuckDBExpr
@@ -16,6 +15,7 @@
     DeferredTimeZone,
     F,
     concat_str,
+    duckdb_dtypes,
     function,
     lit,
     narwhals_to_native_dtype,
@@ -108,9 +108,9 @@ def func(df: DuckDBLazyFrame) -> list[Expression]:
                 cols_separated = [
                     y
                     for x in [
-                        (col.cast(VARCHAR),)
+                        (col.cast(duckdb_dtypes.VARCHAR),)
                         if i == len(cols) - 1
-                        else (col.cast(VARCHAR), lit(separator))
+                        else (col.cast(duckdb_dtypes.VARCHAR), lit(separator))
                         for i, col in enumerate(cols)
                     ]
                     for y in x
@@ -130,7 +130,9 @@ def func(cols: Iterable[Expression]) -> Expression:
             cols = list(cols)
             return reduce(
                 operator.add, (CoalesceOperator(col, lit(0)) for col in cols)
-            ) / reduce(operator.add, (col.isnotnull().cast(BIGINT) for col in cols))
+            ) / reduce(
+                operator.add, (col.isnotnull().cast(duckdb_dtypes.BIGINT) for col in cols)
+            )
 
         return self._expr._from_elementwise_horizontal_op(func, *exprs)
 
diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py
index 243c9bf0bc..a2576af249 100644
--- a/narwhals/_duckdb/utils.py
+++ b/narwhals/_duckdb/utils.py
@@ -4,9 +4,13 @@
 from typing import TYPE_CHECKING
 
 import duckdb
-import duckdb.typing as duckdb_dtypes
 from duckdb import Expression
-from duckdb.typing import DuckDBPyType
+
+try:
+    import duckdb.sqltypes as duckdb_dtypes
+except ModuleNotFoundError:
+    # DuckDB pre 1.3
+    import duckdb.typing as duckdb_dtypes
 
 from narwhals._utils import Version, isinstance_or_issubclass, zip_strict
 from narwhals.exceptions import ColumnNotFoundError
@@ -131,7 +135,9 @@ def time_zone(self) -> str:
 
 
 def native_to_narwhals_dtype(
-    duckdb_dtype: DuckDBPyType, version: Version, deferred_time_zone: DeferredTimeZone
+    duckdb_dtype: duckdb_dtypes.DuckDBPyType,
+    version: Version,
+    deferred_time_zone: DeferredTimeZone,
 ) -> DType:
     duckdb_dtype_id = duckdb_dtype.id
     dtypes = version.dtypes
@@ -216,7 +222,7 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version)
 
 
 dtypes = Version.MAIN.dtypes
-NW_TO_DUCKDB_DTYPES: Mapping[type[DType], DuckDBPyType] = {
+NW_TO_DUCKDB_DTYPES: Mapping[type[DType], duckdb_dtypes.DuckDBPyType] = {
     dtypes.Float64: duckdb_dtypes.DOUBLE,
     dtypes.Float32: duckdb_dtypes.FLOAT,
     dtypes.Binary: duckdb_dtypes.BLOB,
@@ -228,14 +234,14 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version)
     dtypes.Int16: duckdb_dtypes.SMALLINT,
     dtypes.Int32: duckdb_dtypes.INTEGER,
     dtypes.Int64: duckdb_dtypes.BIGINT,
-    dtypes.Int128: DuckDBPyType("INT128"),
+    dtypes.Int128: duckdb_dtypes.HUGEINT,
     dtypes.UInt8: duckdb_dtypes.UTINYINT,
     dtypes.UInt16: duckdb_dtypes.USMALLINT,
     dtypes.UInt32: duckdb_dtypes.UINTEGER,
     dtypes.UInt64: duckdb_dtypes.UBIGINT,
-    dtypes.UInt128: DuckDBPyType("UINT128"),
+    dtypes.UInt128: duckdb_dtypes.UHUGEINT,
 }
-TIME_UNIT_TO_TIMESTAMP: Mapping[TimeUnit, DuckDBPyType] = {
+TIME_UNIT_TO_TIMESTAMP: Mapping[TimeUnit, duckdb_dtypes.DuckDBPyType] = {
     "s": duckdb_dtypes.TIMESTAMP_S,
     "ms": duckdb_dtypes.TIMESTAMP_MS,
     "us": duckdb_dtypes.TIMESTAMP,
@@ -246,7 +252,7 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version)
 
 def narwhals_to_native_dtype(  # noqa: PLR0912, C901
     dtype: IntoDType, version: Version, deferred_time_zone: DeferredTimeZone
-) -> DuckDBPyType:
+) -> duckdb_dtypes.DuckDBPyType:
     dtypes = version.dtypes
     base_type = dtype.base_type()
     if duckdb_type := NW_TO_DUCKDB_DTYPES.get(base_type):
@@ -256,7 +262,7 @@ def narwhals_to_native_dtype(  # noqa: PLR0912, C901
             msg = "Converting to Enum is not supported in narwhals.stable.v1"
             raise NotImplementedError(msg)
         if isinstance(dtype, dtypes.Enum):
-            return DuckDBPyType(f"ENUM{dtype.categories!r}")
+            return duckdb_dtypes.DuckDBPyType(f"ENUM{dtype.categories!r}")
         msg = "Can not cast / initialize Enum without categories present"
         raise ValueError(msg)
     if isinstance_or_issubclass(dtype, dtypes.Datetime):
@@ -291,7 +297,7 @@ def narwhals_to_native_dtype(  # noqa: PLR0912, C901
             nw_inner = nw_inner.inner
         duckdb_inner = narwhals_to_native_dtype(nw_inner, version, deferred_time_zone)
         duckdb_shape_fmt = "".join(f"[{item}]" for item in dtype.shape)
-        return DuckDBPyType(f"{duckdb_inner}{duckdb_shape_fmt}")
+        return duckdb_dtypes.DuckDBPyType(f"{duckdb_inner}{duckdb_shape_fmt}")
     if issubclass(base_type, UNSUPPORTED_DTYPES):
         msg = f"Converting to {base_type.__name__} dtype is not supported for DuckDB."
         raise NotImplementedError(msg)
@@ -378,19 +384,39 @@ def function(name: str, *args: Expression) -> Expression:
     if name == "isnull":
         return args[0].isnull()
     if name == "count_distinct":
-        try:
-            from duckdb import SQLExpression
-        except ModuleNotFoundError as exc:  # pragma: no cover
-            msg = f"DuckDB>=1.3.0 is required for this operation. Found: DuckDB {duckdb.__version__}"
-            raise NotImplementedError(msg) from exc
-        return SQLExpression(f"count(distinct {args[0]})")
+        return sql_expression(f"count(distinct {args[0]})")
     return F(name, *args)
 
 
 def sql_expression(expr: str) -> Expression:
     try:
         from duckdb import SQLExpression
-    except ModuleNotFoundError as exc:  # pragma: no cover
+    except ImportError as exc:  # pragma: no cover
         msg = f"DuckDB>=1.3.0 is required for this operation. Found: DuckDB {duckdb.__version__}"
         raise NotImplementedError(msg) from exc
     return SQLExpression(expr)
+
+
+__all__ = [
+    "UNITS_DICT",
+    "DeferredTimeZone",
+    "F",
+    "catch_duckdb_exception",
+    "col",
+    "concat_str",
+    "duckdb_dtypes",
+    "evaluate_exprs",
+    "fetch_rel_time_zone",
+    "function",
+    "generate_order_by_sql",
+    "generate_partition_by_sql",
+    "join_column_names",
+    "lambda_expr",
+    "lit",
+    "narwhals_to_native_dtype",
+    "native_to_narwhals_dtype",
+    "parse_into_expression",
+    "sql_expression",
+    "when",
+    "window_expression",
+]
diff --git a/narwhals/_utils.py b/narwhals/_utils.py
index 1bd94fe547..3647717812 100644
--- a/narwhals/_utils.py
+++ b/narwhals/_utils.py
@@ -604,7 +604,7 @@ def _backend_version(self) -> tuple[int, ...]:
     Implementation.PYSPARK_CONNECT: (3, 5),
     Implementation.POLARS: (0, 20, 4),
     Implementation.DASK: (2024, 8),
-    Implementation.DUCKDB: (1,),
+    Implementation.DUCKDB: (1, 1),
     Implementation.IBIS: (6,),
     Implementation.SQLFRAME: (3, 22, 0),
 }
diff --git a/pyproject.toml b/pyproject.toml
index eb1b3bdc4f..da62cde2e4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,7 @@ pyspark = ["pyspark>=3.5.0"]
 pyspark-connect = ["pyspark[connect]>=3.5.0"]
 polars = ["polars>=0.20.4"]
 dask = ["dask[dataframe]>=2024.8"]
-duckdb = ["duckdb>=1.0"]
+duckdb = ["duckdb>=1.1"]
 ibis = ["ibis-framework>=6.0.0", "rich", "packaging", "pyarrow_hotfix"]
 sqlframe = ["sqlframe>=3.22.0,!=3.39.3"]
 
diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py
index 4ff9134c21..d63384647f 100644
--- a/tests/dtypes_test.py
+++ b/tests/dtypes_test.py
@@ -517,6 +517,7 @@ def test_datetime_w_tz_duckdb() -> None:
     assert result["b"] == nw.List(nw.List(nw.Datetime("us", "Asia/Kathmandu")))
 
 
+@pytest.mark.slow
 def test_datetime_w_tz_pyspark() -> None:  # pragma: no cover
     pytest.importorskip("pyspark")
     session = pyspark_session()
diff --git a/tests/expr_and_series/dt/convert_time_zone_test.py b/tests/expr_and_series/dt/convert_time_zone_test.py
index 40e5f08d77..65d1a6e3b6 100644
--- a/tests/expr_and_series/dt/convert_time_zone_test.py
+++ b/tests/expr_and_series/dt/convert_time_zone_test.py
@@ -154,6 +154,7 @@ def test_convert_time_zone_to_connection_tz_duckdb() -> None:
         )
 
 
+@pytest.mark.slow
 def test_convert_time_zone_to_connection_tz_pyspark() -> None:  # pragma: no cover
     pytest.importorskip("pyspark")
 
diff --git a/tests/expr_and_series/dt/replace_time_zone_test.py b/tests/expr_and_series/dt/replace_time_zone_test.py
index d0e90cdadd..1c9dff7d59 100644
--- a/tests/expr_and_series/dt/replace_time_zone_test.py
+++ b/tests/expr_and_series/dt/replace_time_zone_test.py
@@ -142,6 +142,7 @@ def test_replace_time_zone_to_connection_tz_duckdb() -> None:
         )
 
 
+@pytest.mark.slow
 def test_replace_time_zone_to_connection_tz_pyspark() -> None:  # pragma: no cover
     pytest.importorskip("pyspark")
 
diff --git a/tests/expr_and_series/fill_nan_test.py b/tests/expr_and_series/fill_nan_test.py
index b1c2b4c228..82ba374013 100644
--- a/tests/expr_and_series/fill_nan_test.py
+++ b/tests/expr_and_series/fill_nan_test.py
@@ -7,7 +7,7 @@
     modin_constructor,
     pandas_constructor,
 )
-from tests.utils import Constructor, ConstructorEager, assert_equal_data
+from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data
 
 NON_NULLABLE_CONSTRUCTORS = [
     pandas_constructor,
@@ -31,6 +31,9 @@ def test_fill_nan(constructor: Constructor) -> None:
         # no nan vs null distinction
         expected = {"float": [-1.0, 1.0, 3.0], "float_na": [3.0, 1.0, 3.0]}
         assert result.lazy().collect()["float_na"].null_count() == 0
+    elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,):
+        expected = {"float": [-1.0, 1.0, None], "float_na": [None, 1.0, None]}
+        assert result.lazy().collect()["float_na"].null_count() == 2
     else:
         expected = {"float": [-1.0, 1.0, None], "float_na": [3.0, 1.0, None]}
         assert result.lazy().collect()["float_na"].null_count() == 1
@@ -46,5 +49,7 @@ def test_fill_nan_series(constructor_eager: ConstructorEager) -> None:
     if any(constructor_eager is c for c in NON_NULLABLE_CONSTRUCTORS):
         # no nan vs null distinction
         assert_equal_data({"a": result}, {"a": [999.0, 1.0, 999.0]})
+    elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,):
+        assert_equal_data({"a": result}, {"a": [None, 1.0, None]})
     else:
         assert_equal_data({"a": result}, {"a": [999.0, 1.0, None]})
diff --git a/tests/expr_and_series/fill_null_test.py b/tests/expr_and_series/fill_null_test.py
index 014e92ccfb..139c862fff 100644
--- a/tests/expr_and_series/fill_null_test.py
+++ b/tests/expr_and_series/fill_null_test.py
@@ -34,6 +34,8 @@ def test_fill_null(constructor: Constructor) -> None:
 
 
 def test_fill_null_w_aggregate(constructor: Constructor) -> None:
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     data = {"a": [0.5, None, 2.0, 3.0, 4.5], "b": ["xx", "yy", "zz", None, "yy"]}
     df = nw.from_native(constructor(data))
 
diff --git a/tests/expr_and_series/first_last_test.py b/tests/expr_and_series/first_last_test.py
index 67a4ff843a..c1e35673ec 100644
--- a/tests/expr_and_series/first_last_test.py
+++ b/tests/expr_and_series/first_last_test.py
@@ -5,7 +5,13 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import POLARS_VERSION, PYARROW_VERSION, Constructor, assert_equal_data
+from tests.utils import (
+    DUCKDB_VERSION,
+    POLARS_VERSION,
+    PYARROW_VERSION,
+    Constructor,
+    assert_equal_data,
+)
 
 if TYPE_CHECKING:
     from narwhals.typing import PythonLiteral
@@ -95,6 +101,8 @@ def test_first_expr_over_order_by(
     if "ibis" in str(constructor):
         # https://github.com/ibis-project/ibis/issues/11656
         request.applymarker(pytest.mark.xfail)
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     frame = nw.from_native(
         constructor(
             {
@@ -139,6 +147,8 @@ def test_first_expr_over_order_by_partition_by(
     if "ibis" in str(constructor):
         # https://github.com/ibis-project/ibis/issues/11656
         request.applymarker(pytest.mark.xfail)
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     frame = nw.from_native(
         constructor(
             {"a": [1, 1, 2], "b": [4, 5, 6], "c": [None, 7, 8], "i": [1, None, 2]}
diff --git a/tests/expr_and_series/is_close_test.py b/tests/expr_and_series/is_close_test.py
index c5bb4df551..0835357ceb 100644
--- a/tests/expr_and_series/is_close_test.py
+++ b/tests/expr_and_series/is_close_test.py
@@ -6,7 +6,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 import pytest
 
@@ -18,7 +18,7 @@
     modin_constructor,
     pandas_constructor,
 )
-from tests.utils import Constructor, ConstructorEager, assert_equal_data
+from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data
 
 if TYPE_CHECKING:
     from narwhals.typing import NumericLiteral
@@ -32,7 +32,7 @@
 NULL_PLACEHOLDER, NAN_PLACEHOLDER = 9999.0, -1.0
 INF_POS, INF_NEG = float("inf"), float("-inf")
 
-data = {
+data: dict[str, Any] = {
     "x": [1.001, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF_POS, INF_NEG, INF_POS],
     "y": [1.005, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF_POS, 3.0, INF_NEG],
     "non_numeric": list("number"),
@@ -109,7 +109,7 @@ def test_is_close_series_with_series(
     rel_tol: float,
     *,
     nans_equal: bool,
-    expected: list[float],
+    expected: list[Any],
 ) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     x, y = df["x"], df["y"]
@@ -122,6 +122,11 @@ def test_is_close_series_with_series(
 
     if constructor_eager in NON_NULLABLE_CONSTRUCTORS:
         expected = [v if v is not None else nans_equal for v in expected]
+    elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,):
+        expected = [
+            v if data["y"][i] not in {NULL_PLACEHOLDER, NAN_PLACEHOLDER} else None
+            for i, v in enumerate(expected)
+        ]
     assert_equal_data({"result": result}, {"result": expected})
 
 
@@ -133,7 +138,7 @@ def test_is_close_series_with_scalar(
     rel_tol: float,
     *,
     nans_equal: bool,
-    expected: list[float],
+    expected: list[Any],
 ) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     y = df["y"]
@@ -145,6 +150,11 @@ def test_is_close_series_with_scalar(
 
     if constructor_eager in NON_NULLABLE_CONSTRUCTORS:
         expected = [v if v is not None else False for v in expected]
+    elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,):
+        expected = [
+            v if data["y"][i] not in {NULL_PLACEHOLDER, NAN_PLACEHOLDER} else None
+            for i, v in enumerate(expected)
+        ]
     assert_equal_data({"result": result}, {"result": expected})
 
 
@@ -157,7 +167,7 @@ def test_is_close_expr_with_expr(
     rel_tol: float,
     *,
     nans_equal: bool,
-    expected: list[float],
+    expected: list[Any],
 ) -> None:
     if "sqlframe" in str(constructor):
         # TODO(FBruzzesi): Figure out a MRE and report upstream
@@ -185,6 +195,11 @@ def test_is_close_expr_with_expr(
     )
     if constructor in NON_NULLABLE_CONSTRUCTORS:
         expected = [v if v is not None else nans_equal for v in expected]
+    elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,):
+        expected = [
+            v if data["y"][i] not in {NULL_PLACEHOLDER, NAN_PLACEHOLDER} else None
+            for i, v in enumerate(expected)
+        ]
     assert_equal_data(result, {"idx": data["idx"], "result": expected})
 
 
@@ -197,7 +212,7 @@ def test_is_close_expr_with_scalar(
     rel_tol: float,
     *,
     nans_equal: bool,
-    expected: list[float],
+    expected: list[Any],
 ) -> None:
     if "sqlframe" in str(constructor):
         # TODO(FBruzzesi): Figure out a MRE and report upstream
@@ -221,4 +236,9 @@ def test_is_close_expr_with_scalar(
     )
     if constructor in NON_NULLABLE_CONSTRUCTORS:
         expected = [v if v is not None else False for v in expected]
+    elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,):
+        expected = [
+            v if data["y"][i] not in {NULL_PLACEHOLDER, NAN_PLACEHOLDER} else None
+            for i, v in enumerate(expected)
+        ]
     assert_equal_data(result, {"idx": data["idx"], "result": expected})
diff --git a/tests/expr_and_series/is_nan_test.py b/tests/expr_and_series/is_nan_test.py
index 8027065a35..92af6446d0 100644
--- a/tests/expr_and_series/is_nan_test.py
+++ b/tests/expr_and_series/is_nan_test.py
@@ -14,7 +14,7 @@
     modin_constructor,
     pandas_constructor,
 )
-from tests.utils import Constructor, ConstructorEager, assert_equal_data
+from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data
 
 NON_NULLABLE_CONSTRUCTORS = [
     pandas_constructor,
@@ -43,6 +43,13 @@ def test_nan(constructor: Constructor) -> None:
             "float": [False, False, True],
             "float_na": [True, False, True],
         }
+    elif "pandas" in str(constructor) and PANDAS_VERSION >= (3,):
+        # NaN values are coerced into NA for nullable datatypes by default
+        expected = {
+            "int": [False, False, None],
+            "float": [False, False, None],
+            "float_na": [None, False, None],
+        }
     else:
         # Null are preserved and should be differentiated for nullable datatypes
         expected = {
@@ -82,6 +89,13 @@ def test_nan_series(constructor_eager: ConstructorEager) -> None:
             "float": [False, False, True],
             "float_na": [True, False, True],
         }
+    elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,):
+        # NaN values are coerced into NA for nullable datatypes by default
+        expected = {
+            "int": [False, False, None],
+            "float": [False, False, None],
+            "float_na": [None, False, None],
+        }
     else:
         # Null are preserved and should be differentiated for nullable datatypes
         expected = {
diff --git a/tests/expr_and_series/list/unique_test.py b/tests/expr_and_series/list/unique_test.py
index 057843d9e3..3d7c9dd039 100644
--- a/tests/expr_and_series/list/unique_test.py
+++ b/tests/expr_and_series/list/unique_test.py
@@ -5,6 +5,7 @@
 import pytest
 
 import narwhals as nw
+from tests.utils import DUCKDB_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
@@ -19,6 +20,8 @@ def test_unique_expr(request: pytest.FixtureRequest, constructor: Constructor) -
         for backend in ("dask", "modin", "cudf", "pyarrow", "pandas")
     ):
         request.applymarker(pytest.mark.xfail)
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     result = (
         nw.from_native(constructor(data))
         .select(nw.col("a").cast(nw.List(nw.Int32())).list.unique())
diff --git a/tests/expr_and_series/n_unique_test.py b/tests/expr_and_series/n_unique_test.py
index 3b9e75d343..66af2ac705 100644
--- a/tests/expr_and_series/n_unique_test.py
+++ b/tests/expr_and_series/n_unique_test.py
@@ -3,12 +3,14 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import Constructor, ConstructorEager, assert_equal_data
+from tests.utils import DUCKDB_VERSION, Constructor, ConstructorEager, assert_equal_data
 
 data = {"a": [1.0, None, None, 3.0], "b": [1.0, None, 4.0, 5.0]}
 
 
 def test_n_unique(constructor: Constructor) -> None:
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     df = nw.from_native(constructor(data))
     result = df.select(nw.all().n_unique())
     expected = {"a": [3], "b": [4]}
@@ -22,6 +24,8 @@ def test_n_unique_over(constructor: Constructor, request: pytest.FixtureRequest)
     if "pyspark" in str(constructor) and "sqlframe" not in str(constructor):
         # "Distinct window functions are not supported"
         request.applymarker(pytest.mark.xfail)
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     data = {"a": [1, None, None, 1, 2, 2, 2, None, 3], "b": [1, 1, 1, 1, 1, 1, 1, 2, 2]}
     df = nw.from_native(constructor(data))
     result = df.with_columns(
diff --git a/tests/expr_and_series/over_test.py b/tests/expr_and_series/over_test.py
index b32368f6e4..91612d1d2a 100644
--- a/tests/expr_and_series/over_test.py
+++ b/tests/expr_and_series/over_test.py
@@ -454,6 +454,8 @@ def test_over_quantile(constructor: Constructor, request: pytest.FixtureRequest)
     if any(x in str(constructor) for x in ("pyarrow_table", "pyspark", "cudf")):
         # cudf: https://github.com/rapidsai/cudf/issues/18159
         request.applymarker(pytest.mark.xfail)
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
 
     data = {"a": [1, 2, 3, 4, 5, 6], "b": ["x", "x", "x", "y", "y", "y"]}
 
diff --git a/tests/expr_and_series/reduction_test.py b/tests/expr_and_series/reduction_test.py
index 7d5149e551..2c672ce341 100644
--- a/tests/expr_and_series/reduction_test.py
+++ b/tests/expr_and_series/reduction_test.py
@@ -96,9 +96,10 @@ def test_empty_scalar_reduction_with_columns(
 ) -> None:
     if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
         pytest.skip()
-    if any(
-        x in str(constructor) for x in ("duckdb", "sqlframe", "ibis")
-    ) and DUCKDB_VERSION >= (1, 4):
+    if any(x in str(constructor) for x in ("sqlframe", "ibis")) and DUCKDB_VERSION >= (
+        1,
+        4,
+    ):
         request.applymarker(pytest.mark.xfail)
     from itertools import chain
 
diff --git a/tests/expr_and_series/skew_test.py b/tests/expr_and_series/skew_test.py
index 8ac8820711..6bfd8f0d68 100644
--- a/tests/expr_and_series/skew_test.py
+++ b/tests/expr_and_series/skew_test.py
@@ -3,7 +3,7 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import Constructor, ConstructorEager, assert_equal_data
+from tests.utils import DUCKDB_VERSION, Constructor, ConstructorEager, assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -44,6 +44,8 @@ def test_skew_expr(
     if "ibis" in str(constructor):
         # https://github.com/ibis-project/ibis/issues/11176
         request.applymarker(pytest.mark.xfail)
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
 
     if "pyspark" in str(constructor) and int(request.node.callspec.id[-1]) == 0:
         # Can not infer schema from empty dataset.
diff --git a/tests/expr_and_series/str/zfill_test.py b/tests/expr_and_series/str/zfill_test.py
index 7ecc3d36fd..78f59cecb1 100644
--- a/tests/expr_and_series/str/zfill_test.py
+++ b/tests/expr_and_series/str/zfill_test.py
@@ -17,7 +17,7 @@
 
 
 def test_str_zfill(request: pytest.FixtureRequest, constructor: Constructor) -> None:
-    if uses_pyarrow_backend(constructor):
+    if uses_pyarrow_backend(constructor) and PANDAS_VERSION < (3,):
         reason = (
             "pandas with pyarrow backend doesn't support str.zfill, see "
             "https://github.com/pandas-dev/pandas/issues/61485"
@@ -43,7 +43,7 @@ def test_str_zfill(request: pytest.FixtureRequest, constructor: Constructor) ->
 def test_str_zfill_series(
     request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
-    if uses_pyarrow_backend(constructor_eager):
+    if uses_pyarrow_backend(constructor_eager) and PANDAS_VERSION < (3,):
         reason = (
             "pandas with pyarrow backend doesn't support str.zfill, see "
             "https://github.com/pandas-dev/pandas/issues/61485"
diff --git a/tests/expr_and_series/unary_test.py b/tests/expr_and_series/unary_test.py
index 3af34b6333..038136a902 100644
--- a/tests/expr_and_series/unary_test.py
+++ b/tests/expr_and_series/unary_test.py
@@ -5,12 +5,14 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import Constructor, ConstructorEager, assert_equal_data
+from tests.utils import DUCKDB_VERSION, Constructor, ConstructorEager, assert_equal_data
 
 
 def test_unary(constructor: Constructor, request: pytest.FixtureRequest) -> None:
     if "ibis" in str(constructor):
         request.applymarker(pytest.mark.xfail)
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
 
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "c": [7.0, 8.0, None], "z": [7.0, 8.0, 9.0]}
     result = nw.from_native(constructor(data)).select(
@@ -73,6 +75,8 @@ def test_unary_two_elements(
 ) -> None:
     if "ibis" in str(constructor):
         request.applymarker(pytest.mark.xfail)
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     data = {"a": [1, 2], "b": [2, 10], "c": [2.0, None]}
     result = nw.from_native(constructor(data)).select(
         a_nunique=nw.col("a").n_unique(),
@@ -122,6 +126,8 @@ def test_unary_one_element(
         request.applymarker(pytest.mark.xfail)
     if "ibis" in str(constructor):
         request.applymarker(pytest.mark.xfail)
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     data = {"a": [1], "b": [2], "c": [None]}
     # Dask runs into a divide by zero RuntimeWarning for 1 element skew.
     context = (
diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py
index cfcaad680c..46b07c66d7 100644
--- a/tests/expr_and_series/when_test.py
+++ b/tests/expr_and_series/when_test.py
@@ -7,7 +7,7 @@
 
 import narwhals as nw
 from narwhals.exceptions import InvalidOperationError, MultiOutputExpressionError
-from tests.utils import Constructor, ConstructorEager, assert_equal_data
+from tests.utils import DUCKDB_VERSION, Constructor, ConstructorEager, assert_equal_data
 
 if TYPE_CHECKING:
     from narwhals.typing import _1DArray
@@ -198,6 +198,8 @@ def test_when_then_otherwise_aggregate_with_columns(
     expected: list[int],
     constructor: Constructor,
 ) -> None:
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     df = nw.from_native(constructor({"a": [1, 2, 3], "b": [4, 5, 6]}))
     expr = nw.when(condition).then(then).otherwise(otherwise)
     result = df.with_columns(a_when=expr)
diff --git a/tests/frame/group_by_test.py b/tests/frame/group_by_test.py
index 20e8153c9e..0f99ca583b 100644
--- a/tests/frame/group_by_test.py
+++ b/tests/frame/group_by_test.py
@@ -13,6 +13,7 @@
 import narwhals as nw
 from narwhals.exceptions import DuplicateError, InvalidOperationError
 from tests.utils import (
+    DUCKDB_VERSION,
     PANDAS_VERSION,
     POLARS_VERSION,
     PYARROW_VERSION,
@@ -134,6 +135,8 @@ def test_group_by_depth_1_agg(
         pytest.skip(
             "Known issue with variance calculation in pandas 2.0.x with pyarrow backend in groupby operations"
         )
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     data = {"a": [1, 1, 1, 2], "b": [1, None, 2, 3]}
     expr = getattr(nw.col("b"), attr)()
     result = nw.from_native(constructor(data)).group_by("a").agg(expr).sort("a")
@@ -204,6 +207,8 @@ def test_group_by_median(constructor: Constructor) -> None:
 
 
 def test_group_by_n_unique_w_missing(constructor: Constructor) -> None:
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     data = {"a": [1, 1, 2], "b": [4, None, 5], "c": [None, None, 7], "d": [1, 1, 3]}
     result = (
         nw.from_native(constructor(data))
@@ -391,6 +396,8 @@ def test_all_kind_of_aggs(
         pytest.skip(
             "Pandas < 1.4.0 does not support multiple aggregations with the same column"
         )
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     df = nw.from_native(constructor({"a": [1, 1, 1, 2, 2, 2], "b": [4, 5, 6, 0, 5, 5]}))
     result = (
         df.group_by("a")
@@ -530,6 +537,8 @@ def test_group_by_raise_if_not_preserves_length(
 
 
 def test_group_by_window(constructor: Constructor) -> None:
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     data = {"a": [1, 2, 2, None], "b": [1, 1, 2, 2], "x": [1, 2, 3, 4]}
     df = nw.from_native(constructor(data))
     result = (
diff --git a/tests/frame/top_k_test.py b/tests/frame/top_k_test.py
index d46961b21f..d0ba228df0 100644
--- a/tests/frame/top_k_test.py
+++ b/tests/frame/top_k_test.py
@@ -3,13 +3,15 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import POLARS_VERSION, Constructor, assert_equal_data
+from tests.utils import DUCKDB_VERSION, POLARS_VERSION, Constructor, assert_equal_data
 
 
 def test_top_k(constructor: Constructor) -> None:
     if "polars" in str(constructor) and POLARS_VERSION < (1, 0):
         # old polars versions do not sort nulls last
         pytest.skip()
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     data = {"a": ["a", "f", "a", "d", "b", "c"], "b c": [None, None, 2, 3, 6, 1]}
     df = nw.from_native(constructor(data))
     result = df.top_k(4, by="b c")
@@ -25,6 +27,8 @@ def test_top_k_by_multiple(constructor: Constructor) -> None:
     if "polars" in str(constructor) and POLARS_VERSION < (0, 20, 22):
         # bug in old version
         pytest.skip()
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     data = {
         "a": ["a", "f", "a", "d", "b", "c"],
         "b": [2, 2, 2, 3, 1, 1],
diff --git a/tests/frame/unique_test.py b/tests/frame/unique_test.py
index c80e4a677a..0aa3f175f0 100644
--- a/tests/frame/unique_test.py
+++ b/tests/frame/unique_test.py
@@ -47,6 +47,8 @@ def test_unique_first_last(
     if "dask" in str(constructor):
         # https://github.com/dask/dask/issues/12073
         request.applymarker(pytest.mark.xfail)
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     data = {"i": [0, 1, None, 2], "a": [1, 3, 2, 1], "b": [4, 4, 4, 6]}
     df_raw = constructor(data)
     df = nw.from_native(df_raw)
@@ -70,6 +72,8 @@ def test_unique_first_last_no_subset(
     keep: Literal["first", "last"],
     expected: dict[str, list[float]],
 ) -> None:
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     data = {"i": [0, 1, 1, 2], "b": [4, 4, 4, 6]}
     df_raw = constructor(data)
     df = nw.from_native(df_raw)
@@ -139,6 +143,8 @@ def test_unique_invalid_keep(constructor: Constructor) -> None:
 
 @pytest.mark.filterwarnings("ignore:.*backwards-compatibility:UserWarning")
 def test_unique_none(constructor: Constructor) -> None:
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     df_raw = constructor(data)
     df = nw.from_native(df_raw)
 
@@ -154,6 +160,8 @@ def test_unique_3069(constructor: Constructor, request: pytest.FixtureRequest) -
     if "ibis" in str(constructor):
         # https://github.com/ibis-project/ibis/issues/11591
         request.applymarker(pytest.mark.xfail)
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     data = {"name": ["a", "b", "c"], "group": ["d", "e", "f"], "value": [1, 2, 3]}
     df = nw.from_native(constructor(data))
     unique_to_get = "group"
diff --git a/tests/frame/with_row_index_test.py b/tests/frame/with_row_index_test.py
index c9d5f59fe0..85f3447e6e 100644
--- a/tests/frame/with_row_index_test.py
+++ b/tests/frame/with_row_index_test.py
@@ -5,7 +5,13 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data
+from tests.utils import (
+    DUCKDB_VERSION,
+    PANDAS_VERSION,
+    Constructor,
+    ConstructorEager,
+    assert_equal_data,
+)
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -36,6 +42,8 @@ def test_with_row_index_lazy(
     ):  # pragma: no cover
         reason = "ValueError: first not supported for non-numeric data."
         pytest.skip(reason=reason)
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
 
     result = (
         nw.from_native(constructor(data))
diff --git a/tests/utils.py b/tests/utils.py
index e32ad0bbbd..4e06c35063 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -121,7 +121,7 @@ def assert_equal_data(result: Any, expected: Mapping[str, Any]) -> None:
             elif isinstance(lhs, float) and math.isnan(lhs):
                 are_equivalent_values = rhs is None or math.isnan(rhs)
             elif isinstance(rhs, float) and math.isnan(rhs):
-                are_equivalent_values = lhs is None or math.isnan(lhs)
+                are_equivalent_values = lhs is None or pd.isna(lhs) or math.isnan(lhs)
             elif lhs is None:
                 are_equivalent_values = rhs is None
             elif isinstance(lhs, list) and isinstance(rhs, list):
diff --git a/tests/v1_test.py b/tests/v1_test.py
index 5ef46423ea..37d5dc1779 100644
--- a/tests/v1_test.py
+++ b/tests/v1_test.py
@@ -34,6 +34,7 @@
 )
 from narwhals.utils import Version
 from tests.utils import (
+    DUCKDB_VERSION,
     PANDAS_VERSION,
     POLARS_VERSION,
     PYARROW_VERSION,
@@ -421,6 +422,8 @@ def test_all_horizontal() -> None:
 
 
 def test_with_row_index(constructor: Constructor) -> None:
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3):
+        pytest.skip()
     data = {"abc": ["foo", "bars"], "xyz": [100, 200], "const": [42, 42]}
 
     frame = nw_v1.from_native(constructor(data))