From 3b5093585c8696e127e1531d46fa0db7b35e8f2d Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 26 Mar 2025 22:13:10 +0000 Subject: [PATCH 01/10] chore: Accept `dtype` in `CompliantSeries.from_iterable` --- narwhals/_compliant/series.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index 4e1ef8f354..084ab9eb84 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -70,7 +70,13 @@ def _to_expr(self) -> CompliantExpr[Any, Self]: ... def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: ... @classmethod def from_iterable( - cls, data: Iterable[Any], /, *, context: _FullContext, name: str = "" + cls, + data: Iterable[Any], + /, + *, + context: _FullContext, + name: str = "", + dtype: DType | type[DType] | None = None, ) -> Self: ... def _change_version(self, version: Version) -> Self: ... From 1feb8cce6a456ac7e131b537cb260cbe24a675a9 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 26 Mar 2025 22:13:45 +0000 Subject: [PATCH 02/10] feat: Add `PolarsSeries.from_iterable` --- narwhals/_polars/series.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index cfe8131924..62d27dad03 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING from typing import Any +from typing import Iterable from typing import Sequence from typing import cast from typing import overload @@ -74,6 +75,28 @@ def _change_version(self: Self, version: Version) -> Self: self.native, backend_version=self._backend_version, version=version ) + @classmethod + def from_iterable( + cls, + data: Iterable[Any], + *, + context: _FullContext, + name: str = "", + dtype: DType | type[DType] | None = None, + ) -> Self: + version = context._version + backend_version = context._backend_version + dtype_pl = ( + narwhals_to_native_dtype(dtype, version, backend_version) if dtype else None + ) + # NOTE: `Iterable` is fine, annotation is overly narrow + # https://github.com/pola-rs/polars/blob/82d57a4ee41f87c11ca1b1af15488459727efdd7/py-polars/polars/series/series.py#L332-L333 + return cls( + pl.Series(name=name, values=cast("Sequence[Any]", data), dtype=dtype_pl), + backend_version=backend_version, + version=version, + ) + @classmethod def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: return cls( From d286331ee0cfc2fdadc9eb681aef732b0246cdff Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 26 Mar 2025 22:14:24 +0000 Subject: [PATCH 03/10] feat: Update `ArrowSeries.from_iterable` --- narwhals/_arrow/series.py | 13 ++++++++++--- narwhals/_arrow/utils.py | 4 +++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 08ec2e514a..0f033e29bb 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -141,13 +141,20 @@ def _from_native_series( @classmethod def from_iterable( - cls, data: Iterable[Any], *, context: _FullContext, name: str = "" + cls, + data: Iterable[Any], + *, + context: _FullContext, + name: str = "", + dtype: DType | type[DType] | None = None, ) -> Self: + version = context._version + dtype_pa = narwhals_to_native_dtype(dtype, version) if dtype else None return cls( - chunked_array([data]), + chunked_array([data], dtype_pa), name=name, backend_version=context._backend_version, - version=context._version, + version=version, ) def _from_scalar(self, value: Any) -> Self: diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index ce10365f5c..79c4ccc872 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -74,11 +74,13 @@ def extract_py_scalar(value: Any, /) -> Any: def chunked_array( arr: ArrowArray | list[Iterable[pa.Scalar[Any]]] | ArrowChunkedArray, + dtype: pa.DataType | None = None, + /, ) -> ArrowChunkedArray: if isinstance(arr, pa.ChunkedArray): return arr if isinstance(arr, list): - return pa.chunked_array(cast("Any", arr)) + return pa.chunked_array(cast("Any", arr), dtype) else: return pa.chunked_array([arr], arr.type) From 9f8c7a6ca85ce4f2bca1c8ae04895c0af1fadc24 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 26 Mar 2025 22:15:01 +0000 Subject: [PATCH 04/10] feat: Update `PandasLikeSeries.from_iterable` --- narwhals/_pandas_like/series.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index e5a237be1c..308d85d94e 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -173,18 +173,30 @@ def from_iterable( *, context: _FullContext, name: str = "", + dtype: DType | type[DType] | None = None, index: Any = None, ) -> Self: - return cls( - native_series_from_iterable( + implementation = context._implementation + backend_version = context._backend_version + version = context._version + if dtype: + pd_dtype = narwhals_to_native_dtype( + dtype, None, implementation, backend_version, version + ) + ns = implementation.to_native_namespace() + series = ns.Series(data, name=name, dtype=pd_dtype) + else: + series = native_series_from_iterable( data, - name=name, + name, index=[] if index is None else index, - implementation=context._implementation, - ), - implementation=context._implementation, - backend_version=context._backend_version, - version=context._version, + implementation=implementation, + ) + return cls( + series, + implementation=implementation, + backend_version=backend_version, + version=version, ) @classmethod From cc73797ad6c185e7eb3b62da05b528228b4cf909 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 26 Mar 2025 22:16:28 +0000 Subject: [PATCH 05/10] refactor: Simplify `nw.new_series` Follow up to https://github.com/narwhals-dev/narwhals/pull/2283#issuecomment-2749477440 --- narwhals/functions.py | 46 +++++-------------------------------------- 1 file changed, 5 insertions(+), 41 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index a91d9fd8b6..33122086e0 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -44,7 +44,6 @@ if TYPE_CHECKING: from types import ModuleType - import pyarrow as pa from typing_extensions import Self from typing_extensions import TypeAlias from typing_extensions import TypeIs @@ -260,50 +259,15 @@ def _new_series_impl( version: Version, ) -> Series[Any]: implementation = Implementation.from_backend(backend) - native_namespace = implementation.to_native_namespace() - - if implementation is Implementation.POLARS: - if dtype: - from narwhals._polars.utils import ( - narwhals_to_native_dtype as polars_narwhals_to_native_dtype, - ) - - backend_version = parse_version(native_namespace.__version__) - dtype_pl = polars_narwhals_to_native_dtype( - dtype, version=version, backend_version=backend_version - ) - else: - dtype_pl = None - - native_series = native_namespace.Series(name=name, values=values, dtype=dtype_pl) - elif implementation.is_pandas_like(): - if dtype: - from narwhals._pandas_like.utils import ( - narwhals_to_native_dtype as pandas_like_narwhals_to_native_dtype, - ) - - backend_version = parse_version(native_namespace) - pd_dtype = pandas_like_narwhals_to_native_dtype( - dtype, None, implementation, backend_version, version - ) - native_series = native_namespace.Series(values, name=name, dtype=pd_dtype) - else: - native_series = native_namespace.Series(values, name=name) - - elif implementation is Implementation.PYARROW: - pa_dtype: pa.DataType | None = None - if dtype: - from narwhals._arrow.utils import ( - narwhals_to_native_dtype as arrow_narwhals_to_native_dtype, - ) - - pa_dtype = arrow_narwhals_to_native_dtype(dtype, version=version) - native_series = native_namespace.chunked_array([values], type=pa_dtype) - + if is_eager_allowed(implementation): + ns = _into_compliant_namespace(implementation, version) + series = ns._series.from_iterable(values, name=name, context=ns, dtype=dtype) + return from_native(series, series_only=True) elif implementation is Implementation.DASK: # pragma: no cover msg = "Dask support in Narwhals is lazy-only, so `new_series` is not supported" raise NotImplementedError(msg) else: # pragma: no cover + native_namespace = implementation.to_native_namespace() try: # implementation is UNKNOWN, Narwhals extension using this feature should # implement `from_dict` function in the top-level namespace. From 9f7b34316a8dc414cf1d82def20503fa4c7097aa Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 26 Mar 2025 22:24:14 +0000 Subject: [PATCH 06/10] refactor: Simplify `chunked_array` Don't need much of that now the stubs are working --- narwhals/_arrow/utils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 79c4ccc872..04af96a312 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -34,6 +34,7 @@ # NOTE: stubs don't allow for `ChunkedArray[StructArray]` # Intended to represent the `.chunks` property storing `list[pa.StructArray]` ChunkedArrayStructArray: TypeAlias = ArrowChunkedArray + ArrayAny: TypeAlias = "ArrowArray | ArrowChunkedArray" _T = TypeVar("_T") @@ -73,14 +74,12 @@ def extract_py_scalar(value: Any, /) -> Any: def chunked_array( - arr: ArrowArray | list[Iterable[pa.Scalar[Any]]] | ArrowChunkedArray, - dtype: pa.DataType | None = None, - /, + arr: ArrayAny | list[Iterable[Any]], dtype: pa.DataType | None = None, / ) -> ArrowChunkedArray: if isinstance(arr, pa.ChunkedArray): return arr if isinstance(arr, list): - return pa.chunked_array(cast("Any", arr), dtype) + return pa.chunked_array(arr, dtype) else: return pa.chunked_array([arr], arr.type) From 1b00be29a0313ecfcbd1be769e6d96168116fcfe Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 26 Mar 2025 22:30:20 +0000 Subject: [PATCH 07/10] chore: Remove `from_dict` comment, fix exception message --- narwhals/functions.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 33122086e0..c0f6443d72 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -269,11 +269,9 @@ def _new_series_impl( else: # pragma: no cover native_namespace = implementation.to_native_namespace() try: - # implementation is UNKNOWN, Narwhals extension using this feature should - # implement `from_dict` function in the top-level namespace. native_series = native_namespace.new_series(name, values, dtype) except AttributeError as e: - msg = "Unknown namespace is expected to implement `Series` constructor." + msg = "Unknown namespace is expected to implement `new_series` constructor." raise AttributeError(msg) from e return from_native(native_series, series_only=True).alias(name) From 919e66fe2b3e677560e52008c2eed74d82c0e672 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 26 Mar 2025 22:35:48 +0000 Subject: [PATCH 08/10] fix coverage https://github.com/narwhals-dev/narwhals/actions/runs/14094631540/job/39479303897 --- narwhals/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index c0f6443d72..0702c3cf68 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -273,7 +273,7 @@ def _new_series_impl( except AttributeError as e: msg = "Unknown namespace is expected to implement `new_series` constructor." raise AttributeError(msg) from e - return from_native(native_series, series_only=True).alias(name) + return from_native(native_series, series_only=True).alias(name) @deprecate_native_namespace(warn_version="1.26.0") From 3124d5012aa1f2462be642783295f8cabedc30e4 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 26 Mar 2025 22:40:29 +0000 Subject: [PATCH 09/10] try fix coverage again https://github.com/narwhals-dev/narwhals/actions/runs/14094720896/job/39479560835 --- narwhals/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 0702c3cf68..797f69c349 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -270,10 +270,10 @@ def _new_series_impl( native_namespace = implementation.to_native_namespace() try: native_series = native_namespace.new_series(name, values, dtype) + return from_native(native_series, series_only=True).alias(name) except AttributeError as e: msg = "Unknown namespace is expected to implement `new_series` constructor." raise AttributeError(msg) from e - return from_native(native_series, series_only=True).alias(name) @deprecate_native_namespace(warn_version="1.26.0") From 9f3b03be814d19e48772aa6edd742726d9b4f48c Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 27 Mar 2025 14:45:33 +0000 Subject: [PATCH 10/10] refactor: Factor-out `native_series_from_iterable` --- narwhals/_pandas_like/group_by.py | 9 ++------- narwhals/_pandas_like/series.py | 19 ++++++++----------- narwhals/_pandas_like/utils.py | 21 --------------------- 3 files changed, 10 insertions(+), 39 deletions(-) diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py index bed561f24e..49a96e89d8 100644 --- a/narwhals/_pandas_like/group_by.py +++ b/narwhals/_pandas_like/group_by.py @@ -12,7 +12,6 @@ from narwhals._compliant import EagerGroupBy from narwhals._expression_parsing import evaluate_output_names_and_aliases from narwhals._pandas_like.utils import horizontal_concat -from narwhals._pandas_like.utils import native_series_from_iterable from narwhals._pandas_like.utils import select_columns_by_name from narwhals._pandas_like.utils import set_columns from narwhals.utils import find_stacklevel @@ -283,12 +282,8 @@ def func(df: Any) -> Any: for result_keys in results_keys: out_group.append(result_keys.native.iloc[0]) out_names.append(result_keys.name) - return native_series_from_iterable( - out_group, - index=out_names, - name="", - implementation=implementation, - ) + ns = self.compliant.__narwhals_namespace__() + return ns._series.from_iterable(out_group, index=out_names, context=ns).native if implementation.is_pandas() and backend_version >= (2, 2): result_complex = self._grouped.apply(func, include_groups=False) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 308d85d94e..cf35ffad88 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -21,7 +21,6 @@ from narwhals._pandas_like.utils import align_and_extract_native from narwhals._pandas_like.utils import get_dtype_backend from narwhals._pandas_like.utils import narwhals_to_native_dtype -from narwhals._pandas_like.utils import native_series_from_iterable from narwhals._pandas_like.utils import native_to_narwhals_dtype from narwhals._pandas_like.utils import object_native_to_narwhals_dtype from narwhals._pandas_like.utils import rename @@ -179,21 +178,19 @@ def from_iterable( implementation = context._implementation backend_version = context._backend_version version = context._version + ns = implementation.to_native_namespace() + kwds: dict[str, Any] = {} if dtype: - pd_dtype = narwhals_to_native_dtype( + kwds["dtype"] = narwhals_to_native_dtype( dtype, None, implementation, backend_version, version ) - ns = implementation.to_native_namespace() - series = ns.Series(data, name=name, dtype=pd_dtype) else: - series = native_series_from_iterable( - data, - name, - index=[] if index is None else index, - implementation=implementation, - ) + if implementation.is_pandas(): + kwds["copy"] = False + if index is not None and len(index): + kwds["index"] = index return cls( - series, + ns.Series(data, name=name, **kwds), implementation=implementation, backend_version=backend_version, version=version, diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index fbeeca1d78..c839813d00 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -6,7 +6,6 @@ from contextlib import suppress from typing import TYPE_CHECKING from typing import Any -from typing import Iterable from typing import Sized from typing import TypeVar from typing import cast @@ -218,26 +217,6 @@ def diagonal_concat( raise TypeError(msg) -def native_series_from_iterable( - data: Iterable[Any], - name: str, - index: Any, - implementation: Implementation, -) -> Any: - """Return native series.""" - if implementation in PANDAS_LIKE_IMPLEMENTATION: - extra_kwargs = {"copy": False} if implementation is Implementation.PANDAS else {} - if len(index) == 0: - index = None - return implementation.to_native_namespace().Series( - data, name=name, index=index, **extra_kwargs - ) - - else: # pragma: no cover - msg = f"Expected pandas-like implementation ({PANDAS_LIKE_IMPLEMENTATION}), found {implementation}" - raise TypeError(msg) - - def set_index( obj: T, index: Any,