Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion narwhals/_duckdb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
import pandas as pd
import pyarrow as pa
from duckdb import Expression
from duckdb.typing import DuckDBPyType
from duckdb.sqltypes import DuckDBPyType
from typing_extensions import Self, TypeIs

from narwhals._compliant.typing import CompliantDataFrameAny
Expand Down
10 changes: 6 additions & 4 deletions narwhals/_duckdb/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import duckdb
from duckdb import CoalesceOperator, Expression
from duckdb.typing import BIGINT, VARCHAR

from narwhals._duckdb.dataframe import DuckDBLazyFrame
from narwhals._duckdb.expr import DuckDBExpr
Expand All @@ -16,6 +15,7 @@
DeferredTimeZone,
F,
concat_str,
duckdb_dtypes,
function,
lit,
narwhals_to_native_dtype,
Expand Down Expand Up @@ -108,9 +108,9 @@ def func(df: DuckDBLazyFrame) -> list[Expression]:
cols_separated = [
y
for x in [
(col.cast(VARCHAR),)
(col.cast(duckdb_dtypes.VARCHAR),)
if i == len(cols) - 1
else (col.cast(VARCHAR), lit(separator))
else (col.cast(duckdb_dtypes.VARCHAR), lit(separator))
for i, col in enumerate(cols)
]
for y in x
Expand All @@ -130,7 +130,9 @@ def func(cols: Iterable[Expression]) -> Expression:
cols = list(cols)
return reduce(
operator.add, (CoalesceOperator(col, lit(0)) for col in cols)
) / reduce(operator.add, (col.isnotnull().cast(BIGINT) for col in cols))
) / reduce(
operator.add, (col.isnotnull().cast(duckdb_dtypes.BIGINT) for col in cols)
)

return self._expr._from_elementwise_horizontal_op(func, *exprs)

Expand Down
25 changes: 15 additions & 10 deletions narwhals/_duckdb/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
from typing import TYPE_CHECKING

import duckdb
import duckdb.typing as duckdb_dtypes
from duckdb import Expression
from duckdb.typing import DuckDBPyType

try:
import duckdb.sqltypes as duckdb_dtypes
except ModuleNotFoundError:
import duckdb.typing as duckdb_dtypes

from narwhals._utils import Version, isinstance_or_issubclass, zip_strict
from narwhals.exceptions import ColumnNotFoundError
Expand Down Expand Up @@ -131,7 +134,9 @@ def time_zone(self) -> str:


def native_to_narwhals_dtype(
duckdb_dtype: DuckDBPyType, version: Version, deferred_time_zone: DeferredTimeZone
duckdb_dtype: duckdb_dtypes.DuckDBPyType,
version: Version,
deferred_time_zone: DeferredTimeZone,
) -> DType:
duckdb_dtype_id = duckdb_dtype.id
dtypes = version.dtypes
Expand Down Expand Up @@ -216,7 +221,7 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version)


dtypes = Version.MAIN.dtypes
NW_TO_DUCKDB_DTYPES: Mapping[type[DType], DuckDBPyType] = {
NW_TO_DUCKDB_DTYPES: Mapping[type[DType], duckdb_dtypes.DuckDBPyType] = {
dtypes.Float64: duckdb_dtypes.DOUBLE,
dtypes.Float32: duckdb_dtypes.FLOAT,
dtypes.Binary: duckdb_dtypes.BLOB,
Expand All @@ -228,14 +233,14 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version)
dtypes.Int16: duckdb_dtypes.SMALLINT,
dtypes.Int32: duckdb_dtypes.INTEGER,
dtypes.Int64: duckdb_dtypes.BIGINT,
dtypes.Int128: DuckDBPyType("INT128"),
dtypes.Int128: duckdb_dtypes.DuckDBPyType("INT128"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we use

Suggested change
dtypes.Int128: duckdb_dtypes.DuckDBPyType("INT128"),
dtypes.Int128: duckdb_dtypes.HUGEINT,

(and dtypes.UInt128: duckdb_dtypes.UHUGEINT, below)?

dtypes.UInt8: duckdb_dtypes.UTINYINT,
dtypes.UInt16: duckdb_dtypes.USMALLINT,
dtypes.UInt32: duckdb_dtypes.UINTEGER,
dtypes.UInt64: duckdb_dtypes.UBIGINT,
dtypes.UInt128: DuckDBPyType("UINT128"),
dtypes.UInt128: duckdb_dtypes.DuckDBPyType("UINT128"),
}
TIME_UNIT_TO_TIMESTAMP: Mapping[TimeUnit, DuckDBPyType] = {
TIME_UNIT_TO_TIMESTAMP: Mapping[TimeUnit, duckdb_dtypes.DuckDBPyType] = {
"s": duckdb_dtypes.TIMESTAMP_S,
"ms": duckdb_dtypes.TIMESTAMP_MS,
"us": duckdb_dtypes.TIMESTAMP,
Expand All @@ -246,7 +251,7 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version)

def narwhals_to_native_dtype( # noqa: PLR0912, C901
dtype: IntoDType, version: Version, deferred_time_zone: DeferredTimeZone
) -> DuckDBPyType:
) -> duckdb_dtypes.DuckDBPyType:
dtypes = version.dtypes
base_type = dtype.base_type()
if duckdb_type := NW_TO_DUCKDB_DTYPES.get(base_type):
Expand All @@ -256,7 +261,7 @@ def narwhals_to_native_dtype( # noqa: PLR0912, C901
msg = "Converting to Enum is not supported in narwhals.stable.v1"
raise NotImplementedError(msg)
if isinstance(dtype, dtypes.Enum):
return DuckDBPyType(f"ENUM{dtype.categories!r}")
return duckdb_dtypes.DuckDBPyType(f"ENUM{dtype.categories!r}")
msg = "Can not cast / initialize Enum without categories present"
raise ValueError(msg)
if isinstance_or_issubclass(dtype, dtypes.Datetime):
Expand Down Expand Up @@ -291,7 +296,7 @@ def narwhals_to_native_dtype( # noqa: PLR0912, C901
nw_inner = nw_inner.inner
duckdb_inner = narwhals_to_native_dtype(nw_inner, version, deferred_time_zone)
duckdb_shape_fmt = "".join(f"[{item}]" for item in dtype.shape)
return DuckDBPyType(f"{duckdb_inner}{duckdb_shape_fmt}")
return duckdb_dtypes.DuckDBPyType(f"{duckdb_inner}{duckdb_shape_fmt}")
if issubclass(base_type, UNSUPPORTED_DTYPES):
msg = f"Converting to {base_type.__name__} dtype is not supported for DuckDB."
raise NotImplementedError(msg)
Expand Down
7 changes: 7 additions & 0 deletions tests/expr_and_series/is_nan_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,13 @@ def test_nan_series(constructor_eager: ConstructorEager) -> None:
"float": [False, False, True],
"float_na": [True, False, True],
}
elif "pandas" in str(constructor_eager) and PANDAS_VERSION >= (3,):
# NaN values are coerced into NA for nullable datatypes by default
expected = {
"int": [False, False, None],
"float": [False, False, None],
"float_na": [None, False, None],
}
else:
# Null are preserved and should be differentiated for nullable datatypes
expected = {
Expand Down
4 changes: 2 additions & 2 deletions tests/expr_and_series/str/zfill_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


def test_str_zfill(request: pytest.FixtureRequest, constructor: Constructor) -> None:
if uses_pyarrow_backend(constructor):
if uses_pyarrow_backend(constructor) and PANDAS_VERSION < (3,):
reason = (
"pandas with pyarrow backend doesn't support str.zfill, see "
"https://github.com/pandas-dev/pandas/issues/61485"
Expand All @@ -43,7 +43,7 @@ def test_str_zfill(request: pytest.FixtureRequest, constructor: Constructor) ->
def test_str_zfill_series(
request: pytest.FixtureRequest, constructor_eager: ConstructorEager
) -> None:
if uses_pyarrow_backend(constructor_eager):
if uses_pyarrow_backend(constructor_eager) and PANDAS_VERSION < (3,):
reason = (
"pandas with pyarrow backend doesn't support str.zfill, see "
"https://github.com/pandas-dev/pandas/issues/61485"
Expand Down
Loading