Skip to content

Commit 3072d49

Browse files
refactor: Add CompliantDataFrame.from_dict (#2304)
* feat(typing): Add `DictConvertible` protocol * feat(typing): Add `CompliantFrame.from_dict` * feat: Add `PolarsDataFrame.from_dict` * feat: Add `ArrowDataFrame.from_dict` * refactor: Use `Table.from_pydict` instead No typing issues and slightly faster route to the same call * feat: Add `PandasLikeDataFrame.from_dict` * chore: Include `Version` in private signatures * refactor: Update `_from_dict_impl` * refactor: Split out `_from_dict_no_backend` No preference on the name - but I find this a lot easier to read * fix: `3.8` compat https://github.com/narwhals-dev/narwhals/actions/runs/14115367918/job/39544135815?pr=2304 * perf: Skip `from_native` when we know `PandasLikeSeries` Resolves #2304 (comment) * fix: version Co-authored-by: Francesco Bruzzesi <[email protected]> --------- Co-authored-by: Francesco Bruzzesi <[email protected]>
1 parent 5f1f19f commit 3072d49

File tree

7 files changed

+164
-72
lines changed

7 files changed

+164
-72
lines changed

narwhals/_arrow/dataframe.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,26 @@ def __init__(
9393
self._version = version
9494
validate_backend_version(self._implementation, self._backend_version)
9595

96+
@classmethod
97+
def from_dict(
98+
cls,
99+
data: Mapping[str, Any],
100+
/,
101+
*,
102+
context: _FullContext,
103+
schema: Mapping[str, DType] | Schema | None,
104+
) -> Self:
105+
from narwhals.schema import Schema
106+
107+
pa_schema = Schema(schema).to_arrow() if schema is not None else schema
108+
native = pa.Table.from_pydict(data, schema=pa_schema)
109+
return cls(
110+
native,
111+
backend_version=context._backend_version,
112+
version=context._version,
113+
validate_column_names=True,
114+
)
115+
96116
@classmethod
97117
def from_numpy(
98118
cls,

narwhals/_compliant/dataframe.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from narwhals._compliant.typing import EagerSeriesT
1919
from narwhals._compliant.typing import NativeFrameT_co
2020
from narwhals._expression_parsing import evaluate_output_names_and_aliases
21+
from narwhals._translate import DictConvertible
2122
from narwhals._translate import NumpyConvertible
2223
from narwhals.utils import Version
2324
from narwhals.utils import _StoresNative
@@ -47,9 +48,12 @@
4748

4849
T = TypeVar("T")
4950

51+
_ToDict: TypeAlias = "dict[str, CompliantSeriesT] | dict[str, list[Any]]" # noqa: PYI047
52+
5053

5154
class CompliantDataFrame(
5255
NumpyConvertible["_2DArray", "_2DArray"],
56+
DictConvertible["_ToDict[CompliantSeriesT]", Mapping[str, Any]],
5357
_StoresNative[NativeFrameT_co],
5458
Sized,
5559
Protocol[CompliantSeriesT, CompliantExprT_contra, NativeFrameT_co],
@@ -62,6 +66,15 @@ class CompliantDataFrame(
6266
def __narwhals_dataframe__(self) -> Self: ...
6367
def __narwhals_namespace__(self) -> Any: ...
6468
@classmethod
69+
def from_dict(
70+
cls,
71+
data: Mapping[str, Any],
72+
/,
73+
*,
74+
context: _FullContext,
75+
schema: Mapping[str, DType] | Schema | None,
76+
) -> Self: ...
77+
@classmethod
6578
def from_numpy(
6679
cls,
6780
data: _2DArray,

narwhals/_pandas_like/dataframe.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from narwhals._compliant import EagerDataFrame
1717
from narwhals._pandas_like.series import PANDAS_TO_NUMPY_DTYPE_MISSING
1818
from narwhals._pandas_like.series import PandasLikeSeries
19+
from narwhals._pandas_like.utils import align_and_extract_native
1920
from narwhals._pandas_like.utils import align_series_full_broadcast
2021
from narwhals._pandas_like.utils import check_column_names_are_unique
2122
from narwhals._pandas_like.utils import convert_str_slice_to_int_slice
@@ -113,6 +114,55 @@ def __init__(
113114
if validate_column_names:
114115
check_column_names_are_unique(native_dataframe.columns)
115116

117+
@classmethod
118+
def from_dict(
119+
cls,
120+
data: Mapping[str, Any],
121+
/,
122+
*,
123+
context: _FullContext,
124+
schema: Mapping[str, DType] | Schema | None,
125+
) -> Self:
126+
from narwhals.schema import Schema
127+
128+
implementation = context._implementation
129+
backend_version = context._backend_version
130+
version = context._version
131+
ns = implementation.to_native_namespace()
132+
Series = cast("type[pd.Series[Any]]", ns.Series) # noqa: N806
133+
DataFrame = cast("type[pd.DataFrame]", ns.DataFrame) # noqa: N806
134+
aligned_data: dict[str, pd.Series[Any] | Any] = {}
135+
left_most: PandasLikeSeries | None = None
136+
for name, series in data.items():
137+
if isinstance(series, Series):
138+
compliant = PandasLikeSeries(
139+
series,
140+
implementation=implementation,
141+
backend_version=backend_version,
142+
version=version,
143+
)
144+
if left_most is None:
145+
left_most = compliant
146+
aligned_data[name] = series
147+
else:
148+
aligned_data[name] = align_and_extract_native(left_most, compliant)[1]
149+
else:
150+
aligned_data[name] = series
151+
152+
native = DataFrame.from_dict(aligned_data)
153+
if schema:
154+
it: Iterable[DTypeBackend] = (
155+
get_dtype_backend(dtype, implementation) for dtype in native.dtypes
156+
)
157+
native = native.astype(Schema(schema).to_pandas(it))
158+
return cls(
159+
native,
160+
implementation=implementation,
161+
backend_version=backend_version,
162+
version=version,
163+
validate_column_names=True,
164+
)
165+
116166
@classmethod
117167
def from_numpy(
118168
cls,

narwhals/_polars/dataframe.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,23 @@ def __init__(
9494
self._version = version
9595
validate_backend_version(self._implementation, self._backend_version)
9696

97+
@classmethod
98+
def from_dict(
99+
cls,
100+
data: Mapping[str, Any],
101+
/,
102+
*,
103+
context: _FullContext,
104+
schema: Mapping[str, DType] | Schema | None,
105+
) -> Self:
106+
from narwhals.schema import Schema
107+
108+
pl_schema = Schema(schema).to_polars() if schema is not None else schema
109+
native = pl.from_dict(data, pl_schema)
110+
return cls(
111+
native, backend_version=context._backend_version, version=context._version
112+
)
113+
97114
@classmethod
98115
def from_numpy(
99116
cls,

narwhals/_translate.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import TYPE_CHECKING
44
from typing import Any
55
from typing import Iterable
6+
from typing import Mapping
67
from typing import Protocol
78

89
if TYPE_CHECKING:
@@ -70,3 +71,30 @@ class FromIterable(Protocol[FromIterableT_contra]):
7071
def from_iterable(
7172
cls, data: Iterable[FromIterableT_contra], *args: Any, **kwds: Any
7273
) -> Self: ...
74+
75+
76+
ToDictDT_co = TypeVar(
77+
"ToDictDT_co", bound=Mapping[str, Any], covariant=True, default="dict[str, Any]"
78+
)
79+
FromDictDT_contra = TypeVar(
80+
"FromDictDT_contra",
81+
bound=Mapping[str, Any],
82+
contravariant=True,
83+
default=Mapping[str, Any],
84+
)
85+
86+
87+
class ToDict(Protocol[ToDictDT_co]):
88+
def to_dict(self, *args: Any, **kwds: Any) -> ToDictDT_co: ...
89+
90+
91+
class FromDict(Protocol[FromDictDT_contra]):
92+
@classmethod
93+
def from_dict(cls, data: FromDictDT_contra, *args: Any, **kwds: Any) -> Self: ...
94+
95+
96+
class DictConvertible(
97+
ToDict[ToDictDT_co],
98+
FromDict[FromDictDT_contra],
99+
Protocol[ToDictDT_co, FromDictDT_contra],
100+
): ...

narwhals/functions.py

Lines changed: 35 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323
from narwhals._expression_parsing import extract_compliant
2424
from narwhals._expression_parsing import infer_kind
2525
from narwhals._expression_parsing import is_scalar_like
26+
from narwhals.dependencies import is_narwhals_series
2627
from narwhals.dependencies import is_numpy_array
2728
from narwhals.dependencies import is_numpy_array_2d
2829
from narwhals.expr import Expr
29-
from narwhals.schema import Schema
3030
from narwhals.series import Series
3131
from narwhals.translate import from_native
3232
from narwhals.translate import to_native
@@ -50,12 +50,11 @@
5050

5151
from narwhals._compliant import CompliantExpr
5252
from narwhals._compliant import CompliantNamespace
53-
from narwhals._pandas_like.series import PandasLikeSeries
5453
from narwhals.dataframe import DataFrame
5554
from narwhals.dataframe import LazyFrame
5655
from narwhals.dtypes import DType
56+
from narwhals.schema import Schema
5757
from narwhals.series import Series
58-
from narwhals.typing import DTypeBackend
5958
from narwhals.typing import IntoDataFrameT
6059
from narwhals.typing import IntoExpr
6160
from narwhals.typing import IntoFrameT
@@ -329,91 +328,56 @@ def from_dict(
329328
| 1 2 4 |
330329
└──────────────────┘
331330
"""
332-
return _from_dict_impl(data, schema, backend=backend)
331+
return _from_dict_impl(data, schema, backend=backend, version=Version.MAIN)
333332

334333

335334
def _from_dict_impl(
336335
data: Mapping[str, Any],
337-
schema: Mapping[str, DType] | Schema | None = None,
336+
schema: Mapping[str, DType] | Schema | None,
338337
*,
339-
backend: ModuleType | Implementation | str | None = None,
338+
backend: ModuleType | Implementation | str | None,
339+
version: Version,
340340
) -> DataFrame[Any]:
341-
from narwhals.series import Series
342-
343341
if not data:
344342
msg = "from_dict cannot be called with empty dictionary"
345343
raise ValueError(msg)
346344
if backend is None:
347-
for val in data.values():
348-
if isinstance(val, Series):
349-
native_namespace = val.__native_namespace__()
350-
break
351-
else:
352-
msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series"
353-
raise TypeError(msg)
354-
data = {key: to_native(value, pass_through=True) for key, value in data.items()}
355-
eager_backend = Implementation.from_native_namespace(native_namespace)
356-
else:
357-
eager_backend = Implementation.from_backend(backend)
358-
native_namespace = eager_backend.to_native_namespace()
359-
360-
supported_eager_backends = (
361-
Implementation.POLARS,
362-
Implementation.PANDAS,
363-
Implementation.PYARROW,
364-
Implementation.MODIN,
365-
Implementation.CUDF,
366-
)
367-
if eager_backend is not None and eager_backend not in supported_eager_backends:
368-
msg = f"Unsupported `backend` value.\nExpected one of {supported_eager_backends} or None, got: {eager_backend}."
369-
raise ValueError(msg)
370-
if eager_backend is Implementation.POLARS:
371-
schema_pl = Schema(schema).to_polars() if schema else None
372-
native_frame = native_namespace.from_dict(data, schema=schema_pl)
373-
elif eager_backend.is_pandas_like():
374-
from narwhals._pandas_like.utils import align_and_extract_native
375-
376-
aligned_data = {}
377-
left_most_series = None
378-
for key, native_series in data.items():
379-
if isinstance(native_series, native_namespace.Series):
380-
compliant_series = from_native(
381-
native_series, series_only=True
382-
)._compliant_series
383-
if left_most_series is None:
384-
left_most_series = cast("PandasLikeSeries", compliant_series)
385-
aligned_data[key] = native_series
386-
else:
387-
aligned_data[key] = align_and_extract_native(
388-
left_most_series, compliant_series
389-
)[1]
390-
else:
391-
aligned_data[key] = native_series
392-
393-
native_frame = native_namespace.DataFrame.from_dict(aligned_data)
394-
395-
if schema:
396-
from narwhals._pandas_like.utils import get_dtype_backend
397-
398-
it: Iterable[DTypeBackend] = (
399-
get_dtype_backend(native_type, eager_backend)
400-
for native_type in native_frame.dtypes
401-
)
402-
pd_schema = Schema(schema).to_pandas(it)
403-
native_frame = native_frame.astype(pd_schema)
404-
405-
elif eager_backend is Implementation.PYARROW:
406-
pa_schema = Schema(schema).to_arrow() if schema is not None else schema
407-
native_frame = native_namespace.table(data, schema=pa_schema)
408-
else: # pragma: no cover
345+
data, backend = _from_dict_no_backend(data)
346+
implementation = Implementation.from_backend(backend)
347+
if is_eager_allowed(implementation):
348+
ns = _into_compliant_namespace(implementation, version)
349+
frame = ns._dataframe.from_dict(data, schema=schema, context=ns)
350+
return from_native(frame, eager_only=True)
351+
elif implementation is Implementation.UNKNOWN: # pragma: no cover
352+
native_namespace = implementation.to_native_namespace()
409353
try:
410354
# implementation is UNKNOWN, Narwhals extension using this feature should
411355
# implement `from_dict` function in the top-level namespace.
412356
native_frame = native_namespace.from_dict(data, schema=schema)
413357
except AttributeError as e:
414358
msg = "Unknown namespace is expected to implement `from_dict` function."
415359
raise AttributeError(msg) from e
416-
return from_native(native_frame, eager_only=True)
360+
return from_native(native_frame, eager_only=True)
361+
msg = (
362+
f"Unsupported `backend` value.\nExpected one of "
363+
f"{Implementation.POLARS, Implementation.PANDAS, Implementation.PYARROW, Implementation.MODIN, Implementation.CUDF} "
364+
f"or None, got: {implementation}."
365+
)
366+
raise ValueError(msg)
367+
368+
369+
def _from_dict_no_backend(
370+
data: Mapping[str, Series[Any] | Any], /
371+
) -> tuple[dict[str, Series[Any] | Any], ModuleType]:
372+
for val in data.values():
373+
if is_narwhals_series(val):
374+
native_namespace = val.__native_namespace__()
375+
break
376+
else:
377+
msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series"
378+
raise TypeError(msg)
379+
data = {key: to_native(value, pass_through=True) for key, value in data.items()}
380+
return data, native_namespace
417381

418382

419383
@deprecate_native_namespace(warn_version="1.31.0", required=True)

narwhals/stable/v1/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2287,7 +2287,7 @@ def from_dict(
22872287
A new DataFrame.
22882288
"""
22892289
return _stableify( # type: ignore[no-any-return]
2290-
_from_dict_impl(data, schema, backend=backend)
2290+
_from_dict_impl(data, schema, backend=backend, version=Version.V1)
22912291
)
22922292

22932293

0 commit comments

Comments
 (0)