Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/dtypes.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
- Unknown
- UnsignedIntegerType
- Time
- Binary
show_root_heading: false
show_source: false
show_bases: false
2 changes: 2 additions & 0 deletions narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from narwhals.dataframe import DataFrame
from narwhals.dataframe import LazyFrame
from narwhals.dtypes import Array
from narwhals.dtypes import Binary
from narwhals.dtypes import Boolean
from narwhals.dtypes import Categorical
from narwhals.dtypes import Date
Expand Down Expand Up @@ -85,6 +86,7 @@

__all__ = [
"Array",
"Binary",
"Boolean",
"Categorical",
"DataFrame",
Expand Down
4 changes: 4 additions & 0 deletions narwhals/_arrow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ def native_to_narwhals_dtype(dtype: pa.DataType, version: Version) -> DType:
return dtypes.Decimal()
if pa.types.is_time32(dtype) or pa.types.is_time64(dtype):
return dtypes.Time()
if pa.types.is_binary(dtype):
return dtypes.Binary()
return dtypes.Unknown() # pragma: no cover


Expand Down Expand Up @@ -211,6 +213,8 @@ def narwhals_to_native_dtype(dtype: DType | type[DType], version: Version) -> pa
return pa.list_(inner, list_size=list_size)
if isinstance_or_issubclass(dtype, dtypes.Time):
return pa.time64("ns")
if isinstance_or_issubclass(dtype, dtypes.Binary):
return pa.binary()

msg = f"Unknown dtype: {dtype}" # pragma: no cover
raise AssertionError(msg)
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_dask/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ def narwhals_to_native_dtype(dtype: DType | type[DType], version: Version) -> An
if isinstance_or_issubclass(dtype, dtypes.Time): # pragma: no cover
msg = "Converting to Time dtype is not supported yet"
return NotImplementedError(msg)
if isinstance_or_issubclass(dtype, dtypes.Binary): # pragma: no cover
msg = "Converting to Binary dtype is not supported yet"
return NotImplementedError(msg)

msg = f"Unknown dtype: {dtype}" # pragma: no cover
raise AssertionError(msg)
Expand Down
4 changes: 4 additions & 0 deletions narwhals/_duckdb/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ def native_to_narwhals_dtype(duckdb_dtype: str, version: Version) -> DType:
return dtypes.Decimal()
if duckdb_dtype == "TIME":
return dtypes.Time()
if duckdb_dtype == "BLOB":
return dtypes.Binary()
return dtypes.Unknown() # pragma: no cover


Expand Down Expand Up @@ -154,6 +156,8 @@ def narwhals_to_native_dtype(dtype: DType | type[DType], version: Version) -> st
return "BOOLEAN"
if isinstance_or_issubclass(dtype, dtypes.Time):
return "TIME"
if isinstance_or_issubclass(dtype, dtypes.Binary):
return "BLOB"
if isinstance_or_issubclass(dtype, dtypes.Categorical):
msg = "Categorical not supported by DuckDB"
raise NotImplementedError(msg)
Expand Down
2 changes: 2 additions & 0 deletions narwhals/_ibis/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ def native_to_narwhals_dtype(ibis_dtype: Any, version: Version) -> DType:
return dtypes.Decimal()
if ibis_dtype.is_time():
return dtypes.Time()
if ibis_dtype.is_binary():
return dtypes.Binary()
return dtypes.Unknown() # pragma: no cover


Expand Down
4 changes: 3 additions & 1 deletion narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,8 @@ def non_object_native_to_narwhals_dtype(dtype: str, version: Version) -> DType:
return dtypes.Decimal()
if dtype.startswith("time") and dtype.endswith("[pyarrow]"):
return dtypes.Time()
if dtype.startswith("binary") and dtype.endswith("[pyarrow]"):
return dtypes.Binary()
return dtypes.Unknown() # pragma: no cover


Expand Down Expand Up @@ -593,7 +595,7 @@ def narwhals_to_native_dtype( # noqa: PLR0915
msg = "Converting to Enum is not (yet) supported"
raise NotImplementedError(msg)
if isinstance_or_issubclass(
dtype, (dtypes.Struct, dtypes.Array, dtypes.List, dtypes.Time)
dtype, (dtypes.Struct, dtypes.Array, dtypes.List, dtypes.Time, dtypes.Binary)
):
if implementation is Implementation.PANDAS and backend_version >= (2, 2):
try:
Expand Down
4 changes: 4 additions & 0 deletions narwhals/_polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ def native_to_narwhals_dtype(
return dtypes.Decimal()
if dtype == pl.Time:
return dtypes.Time()
if dtype == pl.Binary:
return dtypes.Binary()
return dtypes.Unknown()


Expand Down Expand Up @@ -192,6 +194,8 @@ def narwhals_to_native_dtype(
return pl.Date()
if dtype == dtypes.Time:
return pl.Time()
if dtype == dtypes.Binary:
return pl.Binary()
if dtype == dtypes.Decimal:
msg = "Casting to Decimal is not supported yet."
raise NotImplementedError(msg)
Expand Down
4 changes: 4 additions & 0 deletions narwhals/_spark_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ def native_to_narwhals_dtype(
for field in dtype
]
)
if isinstance(dtype, native.BinaryType):
return dtypes.Binary()
return dtypes.Unknown()


Expand Down Expand Up @@ -135,6 +137,8 @@ def narwhals_to_native_dtype(
for field in dtype.fields
]
)
if isinstance_or_issubclass(dtype, dtypes.Binary):
return native.BinaryType()

if isinstance_or_issubclass(
dtype,
Expand Down
24 changes: 24 additions & 0 deletions narwhals/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,3 +696,27 @@ class Time(TemporalType):
>>> nw.from_native(rel).schema["t"]
Time
"""


class Binary(DType):
"""Binary type.

Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> import pyarrow as pa
>>> import duckdb
>>> data = [b"test1", b"test2"]
>>> ser_pl = pl.Series(data, dtype=pl.Binary)
>>> ser_pa = pa.chunked_array([pa.array(data, type=pa.binary())])
>>> rel = duckdb.sql(
... "SELECT * FROM (VALUES (BLOB 'test1'), (BLOB 'test2')) AS df(t)"
... )

>>> nw.from_native(ser_pl, series_only=True).dtype
Binary
>>> nw.from_native(ser_pa, series_only=True).dtype
Binary
>>> nw.from_native(rel).schema["t"]
Binary
"""
2 changes: 2 additions & 0 deletions narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from narwhals.series import Series as NwSeries
from narwhals.stable.v1 import dtypes
from narwhals.stable.v1.dtypes import Array
from narwhals.stable.v1.dtypes import Binary
from narwhals.stable.v1.dtypes import Boolean
from narwhals.stable.v1.dtypes import Categorical
from narwhals.stable.v1.dtypes import Date
Expand Down Expand Up @@ -2500,6 +2501,7 @@ def scan_parquet(

__all__ = [
"Array",
"Binary",
"Boolean",
"Categorical",
"DataFrame",
Expand Down
2 changes: 2 additions & 0 deletions narwhals/stable/v1/_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import TYPE_CHECKING

from narwhals.dtypes import Array
from narwhals.dtypes import Binary
from narwhals.dtypes import Boolean
from narwhals.dtypes import Categorical
from narwhals.dtypes import Date
Expand Down Expand Up @@ -73,6 +74,7 @@ def __hash__(self: Self) -> int:

__all__ = [
"Array",
"Binary",
"Boolean",
"Categorical",
"DType",
Expand Down
2 changes: 2 additions & 0 deletions narwhals/stable/v1/dtypes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from narwhals.stable.v1._dtypes import Array
from narwhals.stable.v1._dtypes import Binary
from narwhals.stable.v1._dtypes import Boolean
from narwhals.stable.v1._dtypes import Categorical
from narwhals.stable.v1._dtypes import Date
Expand Down Expand Up @@ -37,6 +38,7 @@

__all__ = [
"Array",
"Binary",
"Boolean",
"Categorical",
"DType",
Expand Down
1 change: 1 addition & 0 deletions narwhals/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ class DTypes:
Array: type[dtypes.Array]
Unknown: type[dtypes.Unknown]
Time: type[dtypes.Time]
Binary: type[dtypes.Binary]


__all__ = [
Expand Down
1 change: 1 addition & 0 deletions tests/dtypes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ def test_dtype_is_x() -> None:
nw.UInt64,
nw.UInt128,
nw.Unknown,
nw.Binary,
)

is_signed_integer = {nw.Int8, nw.Int16, nw.Int32, nw.Int64, nw.Int128}
Expand Down
22 changes: 22 additions & 0 deletions tests/expr_and_series/cast_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,3 +332,25 @@ def test_cast_time(request: pytest.FixtureRequest, constructor: Constructor) ->
df = nw.from_native(constructor(data))
result = df.select(nw.col("a").cast(nw.Time()))
assert result.collect_schema() == {"a": nw.Time()}


def test_cast_binary(request: pytest.FixtureRequest, constructor: Constructor) -> None:
if "pandas" in str(constructor) and PANDAS_VERSION < (2, 2):
request.applymarker(pytest.mark.xfail)

if any(backend in str(constructor) for backend in ("dask", "modin")):
request.applymarker(pytest.mark.xfail)
Comment on lines +337 to +342
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marvinl803 could we also skip this test for cudf please?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nevermind, just did it - thanks again for your pr!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My bad, I just saw this. Absolutely no problem at all!


data = {"a": ["test1", "test2"]}
df = nw.from_native(constructor(data))
result = df.select(
"a",
b=nw.col("a").cast(nw.Binary()),
c=nw.col("a").cast(nw.Binary()).cast(nw.String()),
)
assert result.collect_schema() == {
"a": nw.String(),
"b": nw.Binary(),
"c": nw.String(),
}
assert_equal_data(result.select("c"), {"c": data["a"]})
Loading