Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,26 @@ def __init__(
self._version = version
validate_backend_version(self._implementation, self._backend_version)

@classmethod
def from_dict(
cls,
data: Mapping[str, Any],
/,
*,
context: _FullContext,
schema: Mapping[str, DType] | Schema | None,
) -> Self:
from narwhals.schema import Schema

pa_schema = Schema(schema).to_arrow() if schema is not None else schema
native = pa.Table.from_pydict(data, schema=pa_schema)
return cls(
native,
backend_version=context._backend_version,
version=context._version,
validate_column_names=True,
)

@classmethod
def from_numpy(
cls,
Expand Down
13 changes: 13 additions & 0 deletions narwhals/_compliant/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from narwhals._compliant.typing import EagerSeriesT
from narwhals._compliant.typing import NativeFrameT_co
from narwhals._expression_parsing import evaluate_output_names_and_aliases
from narwhals._translate import DictConvertible
from narwhals._translate import NumpyConvertible
from narwhals.utils import Version
from narwhals.utils import _StoresNative
Expand Down Expand Up @@ -47,9 +48,12 @@

T = TypeVar("T")

_ToDict: TypeAlias = "dict[str, CompliantSeriesT] | dict[str, list[Any]]" # noqa: PYI047


class CompliantDataFrame(
NumpyConvertible["_2DArray", "_2DArray"],
DictConvertible["_ToDict[CompliantSeriesT]", Mapping[str, Any]],
_StoresNative[NativeFrameT_co],
Sized,
Protocol[CompliantSeriesT, CompliantExprT_contra, NativeFrameT_co],
Expand All @@ -62,6 +66,15 @@ class CompliantDataFrame(
def __narwhals_dataframe__(self) -> Self: ...
def __narwhals_namespace__(self) -> Any: ...
@classmethod
def from_dict(
cls,
data: Mapping[str, Any],
/,
*,
context: _FullContext,
schema: Mapping[str, DType] | Schema | None,
) -> Self: ...
@classmethod
def from_numpy(
cls,
data: _2DArray,
Expand Down
50 changes: 50 additions & 0 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from narwhals._compliant import EagerDataFrame
from narwhals._pandas_like.series import PANDAS_TO_NUMPY_DTYPE_MISSING
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals._pandas_like.utils import align_and_extract_native
from narwhals._pandas_like.utils import align_series_full_broadcast
from narwhals._pandas_like.utils import check_column_names_are_unique
from narwhals._pandas_like.utils import convert_str_slice_to_int_slice
Expand Down Expand Up @@ -113,6 +114,55 @@ def __init__(
if validate_column_names:
check_column_names_are_unique(native_dataframe.columns)

@classmethod
def from_dict(
cls,
data: Mapping[str, Any],
/,
*,
context: _FullContext,
schema: Mapping[str, DType] | Schema | None,
) -> Self:
from narwhals.schema import Schema

implementation = context._implementation
backend_version = context._backend_version
version = context._version
ns = implementation.to_native_namespace()
Series = cast("type[pd.Series[Any]]", ns.Series) # noqa: N806
DataFrame = cast("type[pd.DataFrame]", ns.DataFrame) # noqa: N806
aligned_data: dict[str, pd.Series[Any] | Any] = {}
left_most: PandasLikeSeries | None = None
for name, series in data.items():
if isinstance(series, Series):
compliant = PandasLikeSeries(
series,
implementation=implementation,
backend_version=backend_version,
version=version,
)
if left_most is None:
left_most = compliant
aligned_data[name] = series
else:
aligned_data[name] = align_and_extract_native(left_most, compliant)[1]
else:
aligned_data[name] = series

native = DataFrame.from_dict(aligned_data)
if schema:
it: Iterable[DTypeBackend] = (
get_dtype_backend(dtype, implementation) for dtype in native.dtypes
)
native = native.astype(Schema(schema).to_pandas(it))
return cls(
native,
implementation=implementation,
backend_version=backend_version,
version=version,
validate_column_names=True,
)

@classmethod
def from_numpy(
cls,
Expand Down
17 changes: 17 additions & 0 deletions narwhals/_polars/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,23 @@ def __init__(
self._version = version
validate_backend_version(self._implementation, self._backend_version)

@classmethod
def from_dict(
cls,
data: Mapping[str, Any],
/,
*,
context: _FullContext,
schema: Mapping[str, DType] | Schema | None,
) -> Self:
from narwhals.schema import Schema

pl_schema = Schema(schema).to_polars() if schema is not None else schema
native = pl.from_dict(data, pl_schema)
return cls(
native, backend_version=context._backend_version, version=context._version
)

@classmethod
def from_numpy(
cls,
Expand Down
28 changes: 28 additions & 0 deletions narwhals/_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import TYPE_CHECKING
from typing import Any
from typing import Iterable
from typing import Mapping
from typing import Protocol

if TYPE_CHECKING:
Expand Down Expand Up @@ -70,3 +71,30 @@ class FromIterable(Protocol[FromIterableT_contra]):
def from_iterable(
cls, data: Iterable[FromIterableT_contra], *args: Any, **kwds: Any
) -> Self: ...


ToDictDT_co = TypeVar(
"ToDictDT_co", bound=Mapping[str, Any], covariant=True, default="dict[str, Any]"
)
FromDictDT_contra = TypeVar(
"FromDictDT_contra",
bound=Mapping[str, Any],
contravariant=True,
default=Mapping[str, Any],
)


class ToDict(Protocol[ToDictDT_co]):
def to_dict(self, *args: Any, **kwds: Any) -> ToDictDT_co: ...


class FromDict(Protocol[FromDictDT_contra]):
@classmethod
def from_dict(cls, data: FromDictDT_contra, *args: Any, **kwds: Any) -> Self: ...


class DictConvertible(
ToDict[ToDictDT_co],
FromDict[FromDictDT_contra],
Protocol[ToDictDT_co, FromDictDT_contra],
): ...
106 changes: 35 additions & 71 deletions narwhals/functions.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This diff 🎻🎻

Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
from narwhals._expression_parsing import extract_compliant
from narwhals._expression_parsing import infer_kind
from narwhals._expression_parsing import is_scalar_like
from narwhals.dependencies import is_narwhals_series
from narwhals.dependencies import is_numpy_array
from narwhals.dependencies import is_numpy_array_2d
from narwhals.expr import Expr
from narwhals.schema import Schema
from narwhals.series import Series
from narwhals.translate import from_native
from narwhals.translate import to_native
Expand All @@ -50,12 +50,11 @@

from narwhals._compliant import CompliantExpr
from narwhals._compliant import CompliantNamespace
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals.dataframe import DataFrame
from narwhals.dataframe import LazyFrame
from narwhals.dtypes import DType
from narwhals.schema import Schema
from narwhals.series import Series
from narwhals.typing import DTypeBackend
from narwhals.typing import IntoDataFrameT
from narwhals.typing import IntoExpr
from narwhals.typing import IntoFrameT
Expand Down Expand Up @@ -329,91 +328,56 @@ def from_dict(
| 1 2 4 |
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
return _from_dict_impl(data, schema, backend=backend)
return _from_dict_impl(data, schema, backend=backend, version=Version.MAIN)


def _from_dict_impl(
data: Mapping[str, Any],
schema: Mapping[str, DType] | Schema | None = None,
schema: Mapping[str, DType] | Schema | None,
*,
backend: ModuleType | Implementation | str | None = None,
backend: ModuleType | Implementation | str | None,
version: Version,
) -> DataFrame[Any]:
from narwhals.series import Series

if not data:
msg = "from_dict cannot be called with empty dictionary"
raise ValueError(msg)
if backend is None:
for val in data.values():
if isinstance(val, Series):
native_namespace = val.__native_namespace__()
break
else:
msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series"
raise TypeError(msg)
data = {key: to_native(value, pass_through=True) for key, value in data.items()}
eager_backend = Implementation.from_native_namespace(native_namespace)
else:
eager_backend = Implementation.from_backend(backend)
native_namespace = eager_backend.to_native_namespace()

supported_eager_backends = (
Implementation.POLARS,
Implementation.PANDAS,
Implementation.PYARROW,
Implementation.MODIN,
Implementation.CUDF,
)
if eager_backend is not None and eager_backend not in supported_eager_backends:
msg = f"Unsupported `backend` value.\nExpected one of {supported_eager_backends} or None, got: {eager_backend}."
raise ValueError(msg)
if eager_backend is Implementation.POLARS:
schema_pl = Schema(schema).to_polars() if schema else None
native_frame = native_namespace.from_dict(data, schema=schema_pl)
elif eager_backend.is_pandas_like():
from narwhals._pandas_like.utils import align_and_extract_native

aligned_data = {}
left_most_series = None
for key, native_series in data.items():
if isinstance(native_series, native_namespace.Series):
compliant_series = from_native(
native_series, series_only=True
)._compliant_series
if left_most_series is None:
left_most_series = cast("PandasLikeSeries", compliant_series)
aligned_data[key] = native_series
else:
aligned_data[key] = align_and_extract_native(
left_most_series, compliant_series
)[1]
else:
aligned_data[key] = native_series

native_frame = native_namespace.DataFrame.from_dict(aligned_data)

if schema:
from narwhals._pandas_like.utils import get_dtype_backend

it: Iterable[DTypeBackend] = (
get_dtype_backend(native_type, eager_backend)
for native_type in native_frame.dtypes
)
pd_schema = Schema(schema).to_pandas(it)
native_frame = native_frame.astype(pd_schema)

elif eager_backend is Implementation.PYARROW:
pa_schema = Schema(schema).to_arrow() if schema is not None else schema
native_frame = native_namespace.table(data, schema=pa_schema)
else: # pragma: no cover
data, backend = _from_dict_no_backend(data)
implementation = Implementation.from_backend(backend)
if is_eager_allowed(implementation):
ns = _into_compliant_namespace(implementation, version)
frame = ns._dataframe.from_dict(data, schema=schema, context=ns)
return from_native(frame, eager_only=True)
elif implementation is Implementation.UNKNOWN: # pragma: no cover
native_namespace = implementation.to_native_namespace()
try:
# implementation is UNKNOWN, Narwhals extension using this feature should
# implement `from_dict` function in the top-level namespace.
native_frame = native_namespace.from_dict(data, schema=schema)
except AttributeError as e:
msg = "Unknown namespace is expected to implement `from_dict` function."
raise AttributeError(msg) from e
return from_native(native_frame, eager_only=True)
return from_native(native_frame, eager_only=True)
msg = (
f"Unsupported `backend` value.\nExpected one of "
f"{Implementation.POLARS, Implementation.PANDAS, Implementation.PYARROW, Implementation.MODIN, Implementation.CUDF} "
f"or None, got: {implementation}."
)
raise ValueError(msg)


def _from_dict_no_backend(
data: Mapping[str, Series[Any] | Any], /
) -> tuple[dict[str, Series[Any] | Any], ModuleType]:
for val in data.values():
if is_narwhals_series(val):
native_namespace = val.__native_namespace__()
break
else:
msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series"
raise TypeError(msg)
data = {key: to_native(value, pass_through=True) for key, value in data.items()}
return data, native_namespace


@deprecate_native_namespace(warn_version="1.31.0", required=True)
Expand Down
2 changes: 1 addition & 1 deletion narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2296,7 +2296,7 @@ def from_dict(
A new DataFrame.
"""
return _stableify( # type: ignore[no-any-return]
_from_dict_impl(data, schema, backend=backend)
_from_dict_impl(data, schema, backend=backend, version=Version.V1)
)


Expand Down
Loading