From 4b812d3498ad5dbb43a17f7bd5b0e4b0aa3bf789 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 12 Mar 2025 19:03:26 +0000 Subject: [PATCH 01/16] feat(typing): Add `NumpyConvertible` protocol --- narwhals/_translate.py | 61 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 narwhals/_translate.py diff --git a/narwhals/_translate.py b/narwhals/_translate.py new file mode 100644 index 0000000000..c48a88d9a4 --- /dev/null +++ b/narwhals/_translate.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing import Any +from typing import Protocol + +if TYPE_CHECKING: + from typing_extensions import Self + from typing_extensions import TypeVar + + +else: + import sys + from importlib.util import find_spec + + if sys.version_info >= (3, 13): + from typing import TypeVar + elif find_spec("typing_extensions"): + from typing_extensions import TypeVar + else: + from typing import TypeVar as _TypeVar + + def TypeVar( # noqa: ANN202, N802 + name: str, + *constraints: Any, + bound: Any | None = None, + covariant: bool = False, + contravariant: bool = False, + **kwds: Any, # noqa: ARG001 + ): + return _TypeVar( + name, + *constraints, + bound=bound, + covariant=covariant, + contravariant=contravariant, + ) + + +ToNumpyT_co = TypeVar("ToNumpyT_co", covariant=True) +FromNumpyDT_contra = TypeVar( + "FromNumpyDT_contra", contravariant=True, default=ToNumpyT_co +) +FromNumpyT_contra = TypeVar("FromNumpyT_contra", contravariant=True) + + +class ToNumpy(Protocol[ToNumpyT_co]): + def to_numpy(self, *args: Any, **kwds: Any) -> ToNumpyT_co: ... + + +class FromNumpy(Protocol[FromNumpyT_contra]): + @classmethod + def from_numpy(cls, data: FromNumpyT_contra, *args: Any, **kwds: Any) -> Self: ... + + +class NumpyConvertible( + ToNumpy[ToNumpyT_co], + FromNumpy[FromNumpyDT_contra], + Protocol[ToNumpyT_co, FromNumpyDT_contra], +): + def to_numpy(self, dtype: Any, *, copy: bool | None) -> ToNumpyT_co: ... From 18b07f92ab9d2deeb79491d6c9b0dce778e2e97f Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 12 Mar 2025 19:04:24 +0000 Subject: [PATCH 02/16] feat(typing): Extend `CompliantSeries` w/ `NumpyConvertible` --- narwhals/_compliant/series.py | 10 +++++++++- narwhals/typing.py | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index e6be377046..89e60ab280 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -6,6 +6,9 @@ from typing import Protocol from typing import TypeVar +from narwhals._translate import NumpyConvertible +from narwhals.typing import _1DArray + if TYPE_CHECKING: from typing_extensions import Self @@ -15,6 +18,7 @@ from narwhals._compliant.namespace import EagerNamespace from narwhals.dtypes import DType from narwhals.typing import NativeSeries + from narwhals.typing import _NumpyScalar from narwhals.utils import Implementation from narwhals.utils import Version from narwhals.utils import _FullContext @@ -24,7 +28,7 @@ NativeSeriesT_co = TypeVar("NativeSeriesT_co", bound="NativeSeries", covariant=True) -class CompliantSeries(Protocol): +class CompliantSeries(NumpyConvertible[_1DArray, "_1DArray | _NumpyScalar"], Protocol): @property def dtype(self) -> DType: ... @property @@ -36,6 +40,10 @@ def alias(self, name: str) -> Self: ... def __narwhals_namespace__(self) -> Any: ... # CompliantNamespace[Any, Self]: ... def _from_native_series(self, series: Any) -> Self: ... def _to_expr(self) -> Any: ... # CompliantExpr[Any, Self]: ... + @classmethod + def from_numpy( + cls, data: _1DArray | _NumpyScalar, /, *, context: _FullContext + ) -> Self: ... class EagerSeries(CompliantSeries, Protocol[NativeSeriesT_co]): diff --git a/narwhals/typing.py b/narwhals/typing.py index 49ad668335..6d1e43e547 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -203,6 +203,7 @@ def __native_namespace__(self) -> ModuleType: ... _1DArray: TypeAlias = "_NDArray[tuple[int]]" # noqa: PYI042, PYI047 _2DArray: TypeAlias = "_NDArray[tuple[int, int]]" # noqa: PYI042, PYI047 _AnyDArray: TypeAlias = "_NDArray[tuple[int, ...]]" # noqa: PYI047 +_NumpyScalar: TypeAlias = "np.generic[Any]" # noqa: PYI047 class DTypes: From 1401aa1611ce5568857c9bbb346646ebb96154fa Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 12 Mar 2025 19:05:08 +0000 Subject: [PATCH 03/16] fix(typing): Add missing args for `ArrowSeries.to_numpy` All other backends already had these --- narwhals/_arrow/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index dbb359b9db..af2b348b4b 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -437,7 +437,7 @@ def to_list(self: Self) -> list[Any]: def __array__(self: Self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray: return self.native.__array__(dtype=dtype, copy=copy) - def to_numpy(self: Self) -> _1DArray: + def to_numpy(self: Self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray: return self.native.to_numpy() def alias(self: Self, name: str) -> Self: From d02d83feab55567d6efff89820b6e7e0007b530b Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 12 Mar 2025 19:14:14 +0000 Subject: [PATCH 04/16] feat: add `ArrowSeries.from_numpy` --- narwhals/_arrow/series.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index af2b348b4b..29a923899c 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -27,6 +27,7 @@ from narwhals._arrow.utils import nulls_like from narwhals._arrow.utils import pad_series from narwhals._compliant import EagerSeries +from narwhals.dependencies import is_numpy_array_1d from narwhals.exceptions import InvalidOperationError from narwhals.utils import Implementation from narwhals.utils import generate_temporary_column_name @@ -54,6 +55,7 @@ from narwhals.dtypes import DType from narwhals.typing import _1DArray from narwhals.typing import _2DArray + from narwhals.typing import _NumpyScalar from narwhals.utils import Version from narwhals.utils import _FullContext @@ -156,6 +158,14 @@ def _from_scalar(self, value: Any) -> Self: value = value.as_py() return super()._from_scalar(value) + @classmethod + def from_numpy( + cls, data: _1DArray | _NumpyScalar, /, *, context: _FullContext + ) -> Self: + return cls._from_iterable( + data if is_numpy_array_1d(data) else [data], name="", context=context + ) + def __narwhals_namespace__(self: Self) -> ArrowNamespace: from narwhals._arrow.namespace import ArrowNamespace From 9c81781542822963ad42e21ea91d3ac857ee8701 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 12 Mar 2025 19:24:25 +0000 Subject: [PATCH 05/16] feat: add `PandasLikeSeries.from_numpy` --- narwhals/_pandas_like/series.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 41c6198398..7d1a46c114 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -26,6 +26,7 @@ from narwhals._pandas_like.utils import rename from narwhals._pandas_like.utils import select_columns_by_name from narwhals._pandas_like.utils import set_index +from narwhals.dependencies import is_numpy_array_1d from narwhals.dependencies import is_numpy_scalar from narwhals.exceptions import InvalidOperationError from narwhals.utils import Implementation @@ -47,6 +48,7 @@ from narwhals.dtypes import DType from narwhals.typing import _1DArray from narwhals.typing import _AnyDArray + from narwhals.typing import _NumpyScalar from narwhals.utils import Version from narwhals.utils import _FullContext @@ -191,6 +193,25 @@ def _from_iterable( version=context._version, ) + @classmethod + def from_numpy( + cls, data: _1DArray | _NumpyScalar, /, *, context: _FullContext + ) -> Self: + implementation = context._implementation + if implementation.is_pandas_like(): + arr = data if is_numpy_array_1d(data) else [data] + return cls( + implementation.to_native_namespace().Series(arr, name=""), + implementation=implementation, + backend_version=context._backend_version, + version=context._version, + ) + else: # pragma: no cover + from narwhals._pandas_like.utils import PANDAS_LIKE_IMPLEMENTATION + + msg = f"Expected pandas-like implementation ({PANDAS_LIKE_IMPLEMENTATION}), found {implementation}" + raise TypeError(msg) + def __len__(self: Self) -> int: return len(self.native) From b575e37066c1ee563dd32a29e54085530a835aa3 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 12 Mar 2025 19:31:06 +0000 Subject: [PATCH 06/16] feat: add `PolarsSeries.from_numpy` --- narwhals/_polars/series.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 830f562f8a..5b2a043072 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -13,6 +13,7 @@ from narwhals._polars.utils import extract_native from narwhals._polars.utils import narwhals_to_native_dtype from narwhals._polars.utils import native_to_narwhals_dtype +from narwhals.dependencies import is_numpy_array_1d from narwhals.utils import Implementation from narwhals.utils import validate_backend_version @@ -27,7 +28,9 @@ from narwhals._polars.namespace import PolarsNamespace from narwhals.dtypes import DType from narwhals.typing import _1DArray + from narwhals.typing import _NumpyScalar from narwhals.utils import Version + from narwhals.utils import _FullContext T = TypeVar("T") @@ -71,6 +74,16 @@ def _change_version(self: Self, version: Version) -> Self: self._native_series, backend_version=self._backend_version, version=version ) + @classmethod + def from_numpy( + cls, data: _1DArray | _NumpyScalar, /, *, context: _FullContext + ) -> Self: + return cls( + pl.Series(data if is_numpy_array_1d(data) else [data]), + backend_version=context._backend_version, + version=context._version, + ) + def _from_native_series(self: Self, series: pl.Series) -> Self: return self.__class__( series, backend_version=self._backend_version, version=self._version From 48c81797450b037b4e8732dd9b0b3bb5f96fa99e Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 12 Mar 2025 19:51:30 +0000 Subject: [PATCH 07/16] feat(DRAFT): add `PolarsSeries.to_numpy` --- narwhals/_polars/series.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 5b2a043072..8f0b0454c6 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -589,6 +589,10 @@ def hist( def to_polars(self: Self) -> pl.Series: return self._native_series + # TODO @dangotbanned: review converting `copy` to a version compat argument + def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray: + return self.native.to_numpy() + @property def dt(self: Self) -> PolarsSeriesDateTimeNamespace: return PolarsSeriesDateTimeNamespace(self) From f5405b4ea0cad17496e696badd126c706559959b Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 12 Mar 2025 19:54:26 +0000 Subject: [PATCH 08/16] fix: resolve circular import --- narwhals/_compliant/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index 89e60ab280..e129870680 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -7,7 +7,6 @@ from typing import TypeVar from narwhals._translate import NumpyConvertible -from narwhals.typing import _1DArray if TYPE_CHECKING: from typing_extensions import Self @@ -18,6 +17,7 @@ from narwhals._compliant.namespace import EagerNamespace from narwhals.dtypes import DType from narwhals.typing import NativeSeries + from narwhals.typing import _1DArray from narwhals.typing import _NumpyScalar from narwhals.utils import Implementation from narwhals.utils import Version @@ -28,7 +28,7 @@ NativeSeriesT_co = TypeVar("NativeSeriesT_co", bound="NativeSeries", covariant=True) -class CompliantSeries(NumpyConvertible[_1DArray, "_1DArray | _NumpyScalar"], Protocol): +class CompliantSeries(NumpyConvertible["_1DArray", "_1DArray | _NumpyScalar"], Protocol): @property def dtype(self) -> DType: ... @property From 2a86bbbf830d180c67278e6cb0cc74df29914e9e Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 12 Mar 2025 20:54:55 +0000 Subject: [PATCH 09/16] refactor: Replace all `_create_compliant_series` --- narwhals/_arrow/expr.py | 22 +++++++++------------- narwhals/_arrow/namespace.py | 3 --- narwhals/_compliant/namespace.py | 17 ++++++----------- narwhals/_compliant/typing.py | 5 +++++ narwhals/_expression_parsing.py | 5 ++++- narwhals/_pandas_like/dataframe.py | 13 +++++-------- narwhals/_pandas_like/expr.py | 10 ++++++---- narwhals/_pandas_like/namespace.py | 9 --------- narwhals/_pandas_like/utils.py | 25 ------------------------- narwhals/_polars/namespace.py | 5 ----- narwhals/dataframe.py | 5 +++-- 11 files changed, 38 insertions(+), 81 deletions(-) diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 360132e80b..2fabb7ed04 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -1,5 +1,6 @@ from __future__ import annotations +from functools import partial from typing import TYPE_CHECKING from typing import Any from typing import Callable @@ -209,22 +210,17 @@ def map_batches( return_dtype: DType | type[DType] | None, ) -> Self: def func(df: ArrowDataFrame) -> list[ArrowSeries]: - input_series_list = self._call(df) + input_series_list = self(df) output_names = [input_series.name for input_series in input_series_list] result = [function(series) for series in input_series_list] - - if is_numpy_array(result[0]): - result = [ - df.__narwhals_namespace__() - ._create_compliant_series(array) - .alias(output_name) - for array, output_name in zip(result, output_names) - ] - elif (np := get_numpy()) is not None and np.isscalar(result[0]): + if is_numpy_array(result[0]) or ( + (np := get_numpy()) is not None and np.isscalar(result[0]) + ): + from_numpy = partial( + self.__narwhals_namespace__()._series.from_numpy, context=self + ) result = [ - df.__narwhals_namespace__() - ._create_compliant_series([array]) - .alias(output_name) + from_numpy(array).alias(output_name) for array, output_name in zip(result, output_names) ] if return_dtype is not None: diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index 22e2e56184..4716e3f917 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -56,9 +56,6 @@ def _expr(self) -> type[ArrowExpr]: def _series(self) -> type[ArrowSeries]: return ArrowSeries - def _create_compliant_series(self: Self, value: Any) -> ArrowSeries: - return self._series._from_iterable(value, name="", context=self) - # --- not in spec --- def __init__( self: Self, *, backend_version: tuple[int, ...], version: Version diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py index 688f2770c2..b29c45921f 100644 --- a/narwhals/_compliant/namespace.py +++ b/narwhals/_compliant/namespace.py @@ -9,12 +9,13 @@ from narwhals._compliant.typing import EagerDataFrameT from narwhals._compliant.typing import EagerExprT from narwhals._compliant.typing import EagerSeriesT_co -from narwhals.utils import deprecated if TYPE_CHECKING: from narwhals._compliant.expr import CompliantExpr from narwhals._compliant.selectors import CompliantSelectorNamespace from narwhals.dtypes import DType + from narwhals.utils import Implementation + from narwhals.utils import Version __all__ = ["CompliantNamespace", "EagerNamespace"] @@ -34,18 +35,12 @@ class EagerNamespace( CompliantNamespace[EagerDataFrameT, EagerSeriesT_co], Protocol[EagerDataFrameT, EagerSeriesT_co, EagerExprT], ): + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + @property def _expr(self) -> type[EagerExprT]: ... @property def _series(self) -> type[EagerSeriesT_co]: ... def all_horizontal(self, *exprs: EagerExprT) -> EagerExprT: ... - - @deprecated( - "Internally used for `numpy.ndarray` -> `CompliantSeries`\n" - "Also referenced in untyped `nw.dataframe.DataFrame._extract_compliant`\n" - "See Also:\n" - " - https://github.com/narwhals-dev/narwhals/pull/2149#discussion_r1986283345\n" - " - https://github.com/narwhals-dev/narwhals/issues/2116\n" - " - https://github.com/narwhals-dev/narwhals/pull/2169" - ) - def _create_compliant_series(self, value: Any) -> EagerSeriesT_co: ... diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index 2513097a50..0e6fb997ce 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -15,6 +15,7 @@ from narwhals._compliant.expr import CompliantExpr from narwhals._compliant.expr import EagerExpr from narwhals._compliant.expr import NativeExpr + from narwhals._compliant.namespace import EagerNamespace from narwhals._compliant.series import CompliantSeries from narwhals._compliant.series import EagerSeries @@ -47,5 +48,9 @@ EagerSeriesT = TypeVar("EagerSeriesT", bound="EagerSeries[Any]") EagerSeriesT_co = TypeVar("EagerSeriesT_co", bound="EagerSeries[Any]", covariant=True) EagerExprT = TypeVar("EagerExprT", bound="EagerExpr[Any, Any]") +EagerNamespaceAny: TypeAlias = ( + "EagerNamespace[EagerDataFrame[Any], EagerSeries[Any], EagerExpr[Any, Any]]" +) + AliasNames: TypeAlias = Callable[[Sequence[str]], Sequence[str]] AliasName: TypeAlias = Callable[[str], str] diff --git a/narwhals/_expression_parsing.py b/narwhals/_expression_parsing.py index f5d091c4eb..9d1487357f 100644 --- a/narwhals/_expression_parsing.py +++ b/narwhals/_expression_parsing.py @@ -12,6 +12,7 @@ from typing import Literal from typing import Sequence from typing import TypeVar +from typing import cast from narwhals.dependencies import is_narwhals_series from narwhals.dependencies import is_numpy_array @@ -27,6 +28,7 @@ from narwhals._compliant import CompliantFrameT from narwhals._compliant import CompliantNamespace from narwhals._compliant import CompliantSeriesOrNativeExprT_co + from narwhals._compliant.typing import EagerNamespaceAny from narwhals.expr import Expr from narwhals.typing import CompliantDataFrame from narwhals.typing import CompliantLazyFrame @@ -103,7 +105,8 @@ def extract_compliant( if is_narwhals_series(other): return other._compliant_series._to_expr() if is_numpy_array(other): - return plx._create_compliant_series(other)._to_expr() # type: ignore[attr-defined] + ns = cast("EagerNamespaceAny", plx) + return ns._series.from_numpy(other, context=ns)._to_expr() return other diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index d3e57bae89..d55e49b874 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -16,7 +16,6 @@ from narwhals._pandas_like.utils import align_series_full_broadcast from narwhals._pandas_like.utils import check_column_names_are_unique from narwhals._pandas_like.utils import convert_str_slice_to_int_slice -from narwhals._pandas_like.utils import create_compliant_series from narwhals._pandas_like.utils import extract_dataframe_comparand from narwhals._pandas_like.utils import horizontal_concat from narwhals._pandas_like.utils import native_to_narwhals_dtype @@ -433,16 +432,14 @@ def estimated_size(self: Self, unit: SizeUnit) -> int | float: return scale_bytes(sz, unit=unit) def with_row_index(self: Self, name: str) -> Self: - row_index = create_compliant_series( - range(len(self._native_frame)), - index=self._native_frame.index, - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, + frame = self._native_frame + namespace = self.__narwhals_namespace__() + row_index = namespace._series._from_iterable( + range(len(frame)), name="", context=self, index=frame.index ).alias(name) return self._from_native_frame( horizontal_concat( - [row_index._native_series, self._native_frame], + [row_index.native, frame], implementation=self._implementation, backend_version=self._backend_version, ) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index bc18626778..fa5c97d862 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -1,6 +1,7 @@ from __future__ import annotations import re +from functools import partial from typing import TYPE_CHECKING from typing import Any from typing import Callable @@ -305,16 +306,17 @@ def map_batches( return_dtype: DType | type[DType] | None, ) -> Self: def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: - input_series_list = self._call(df) + input_series_list = self(df) output_names = [input_series.name for input_series in input_series_list] result = [function(series) for series in input_series_list] if is_numpy_array(result[0]) or ( (np := get_numpy()) is not None and np.isscalar(result[0]) ): + from_numpy = partial( + self.__narwhals_namespace__()._series.from_numpy, context=self + ) result = [ - df.__narwhals_namespace__() - ._create_compliant_series(array) - .alias(output_name) + from_numpy(array).alias(output_name) for array, output_name in zip(result, output_names) ] if return_dtype is not None: diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index e9902e6f8b..0b8ebafa67 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -19,7 +19,6 @@ from narwhals._pandas_like.selectors import PandasSelectorNamespace from narwhals._pandas_like.series import PandasLikeSeries from narwhals._pandas_like.utils import align_series_full_broadcast -from narwhals._pandas_like.utils import create_compliant_series from narwhals._pandas_like.utils import diagonal_concat from narwhals._pandas_like.utils import extract_dataframe_comparand from narwhals._pandas_like.utils import horizontal_concat @@ -66,14 +65,6 @@ def __init__( self._backend_version = backend_version self._version = version - def _create_compliant_series(self: Self, value: Any) -> PandasLikeSeries: - return create_compliant_series( - value, - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, - ) - # --- selection --- def col(self: Self, *column_names: str) -> PandasLikeExpr: return self._expr.from_column_names( diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index a442c64029..0fb7b352b3 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -147,31 +147,6 @@ def extract_dataframe_comparand( return other._native_series -def create_compliant_series( - iterable: Any, - index: Any = None, - *, - implementation: Implementation, - backend_version: tuple[int, ...], - version: Version, -) -> PandasLikeSeries: - from narwhals._pandas_like.series import PandasLikeSeries - - if implementation in PANDAS_LIKE_IMPLEMENTATION: - series = implementation.to_native_namespace().Series( - iterable, index=index, name="" - ) - return PandasLikeSeries( - series, - implementation=implementation, - backend_version=backend_version, - version=version, - ) - else: # pragma: no cover - msg = f"Expected pandas-like implementation ({PANDAS_LIKE_IMPLEMENTATION}), found {implementation}" - raise TypeError(msg) - - def horizontal_concat( dfs: list[Any], *, implementation: Implementation, backend_version: tuple[int, ...] ) -> Any: diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index 2bcf62f949..20d191f941 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -55,11 +55,6 @@ def _expr(self) -> type[PolarsExpr]: def _series(self) -> type[PolarsSeries]: return PolarsSeries - def _create_compliant_series(self, value: Any) -> PolarsSeries: - return self._series( - pl.Series(value), backend_version=self._backend_version, version=self._version - ) - def nth(self: Self, *indices: int) -> PolarsExpr: if self._backend_version < (1, 0, 0): msg = "`nth` is only supported for Polars>=1.0.0. Please use `col` for columns selection instead." diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 2bbeb3918a..fa7f19fa6f 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -50,6 +50,7 @@ from typing_extensions import Self from narwhals._compliant import IntoCompliantExpr + from narwhals._compliant.typing import EagerNamespaceAny from narwhals.group_by import GroupBy from narwhals.group_by import LazyGroupBy from narwhals.series import Series @@ -425,7 +426,7 @@ def _extract_compliant(self: Self, arg: Any) -> Any: from narwhals.expr import Expr from narwhals.series import Series - plx = self.__narwhals_namespace__() + plx: EagerNamespaceAny = self.__narwhals_namespace__() if isinstance(arg, BaseFrame): return arg._compliant_frame if isinstance(arg, Series): @@ -443,7 +444,7 @@ def _extract_compliant(self: Self, arg: Any) -> Any: ) raise TypeError(msg) if is_numpy_array(arg): - return plx._create_compliant_series(arg)._to_expr() + return plx._series.from_numpy(arg, context=plx)._to_expr() raise InvalidIntoExprError.from_invalid_type(type(arg)) @property From ca8fcdfae72ab694e58def524b6b8db0b3380226 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 12 Mar 2025 21:11:45 +0000 Subject: [PATCH 10/16] refactor: Move `map_batches` up to `EagerExpr` --- narwhals/_arrow/expr.py | 37 ---------------------------------- narwhals/_compliant/expr.py | 34 +++++++++++++++++++++++++++++++ narwhals/_compliant/series.py | 2 ++ narwhals/_pandas_like/expr.py | 38 ----------------------------------- 4 files changed, 36 insertions(+), 75 deletions(-) diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 2fabb7ed04..bc72bb4674 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -1,6 +1,5 @@ from __future__ import annotations -from functools import partial from typing import TYPE_CHECKING from typing import Any from typing import Callable @@ -14,8 +13,6 @@ from narwhals._expression_parsing import ExprKind from narwhals._expression_parsing import evaluate_output_names_and_aliases from narwhals._expression_parsing import is_scalar_like -from narwhals.dependencies import get_numpy -from narwhals.dependencies import is_numpy_array from narwhals.exceptions import ColumnNotFoundError from narwhals.utils import Implementation from narwhals.utils import generate_temporary_column_name @@ -26,7 +23,6 @@ from narwhals._arrow.dataframe import ArrowDataFrame from narwhals._arrow.namespace import ArrowNamespace - from narwhals.dtypes import DType from narwhals.utils import Version from narwhals.utils import _FullContext @@ -204,39 +200,6 @@ def func(df: ArrowDataFrame) -> Sequence[ArrowSeries]: version=self._version, ) - def map_batches( - self: Self, - function: Callable[[Any], Any], - return_dtype: DType | type[DType] | None, - ) -> Self: - def func(df: ArrowDataFrame) -> list[ArrowSeries]: - input_series_list = self(df) - output_names = [input_series.name for input_series in input_series_list] - result = [function(series) for series in input_series_list] - if is_numpy_array(result[0]) or ( - (np := get_numpy()) is not None and np.isscalar(result[0]) - ): - from_numpy = partial( - self.__narwhals_namespace__()._series.from_numpy, context=self - ) - result = [ - from_numpy(array).alias(output_name) - for array, output_name in zip(result, output_names) - ] - if return_dtype is not None: - result = [series.cast(return_dtype) for series in result] - return result - - return self.__class__( - func, - depth=self._depth + 1, - function_name=self._function_name + "->map_batches", - evaluate_output_names=self._evaluate_output_names, - alias_output_names=self._alias_output_names, - backend_version=self._backend_version, - version=self._version, - ) - def cum_count(self: Self, *, reverse: bool) -> Self: return self._reuse_series("cum_count", reverse=reverse) diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index 30e9bfa0a7..9e7aed1b63 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -29,6 +29,8 @@ from narwhals._compliant.typing import EagerSeriesT from narwhals._compliant.typing import NativeExprT_co from narwhals._expression_parsing import evaluate_output_names_and_aliases +from narwhals.dependencies import get_numpy +from narwhals.dependencies import is_numpy_array from narwhals.dtypes import DType from narwhals.utils import _ExprNamespace from narwhals.utils import deprecated @@ -764,6 +766,38 @@ def rolling_var( ddof=ddof, ) + def map_batches( + self: Self, + function: Callable[[Any], Any], + return_dtype: DType | type[DType] | None, + ) -> Self: + def func(df: EagerDataFrameT) -> Sequence[EagerSeriesT]: + input_series_list = self(df) + output_names = [input_series.name for input_series in input_series_list] + result = [function(series) for series in input_series_list] + if is_numpy_array(result[0]) or ( + (np := get_numpy()) is not None and np.isscalar(result[0]) + ): + from_numpy = partial( + self.__narwhals_namespace__()._series.from_numpy, context=self + ) + result = [ + from_numpy(array).alias(output_name) + for array, output_name in zip(result, output_names) + ] + if return_dtype is not None: + result = [series.cast(return_dtype) for series in result] + return result + + return self._from_callable( + func, + depth=self._depth + 1, + function_name=self._function_name + "->map_batches", + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + context=self, + ) + @property def cat(self) -> EagerExprCatNamespace[Self]: return EagerExprCatNamespace(self) diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index e129870680..d3e575f859 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -68,3 +68,5 @@ def __narwhals_namespace__(self) -> EagerNamespace[Any, Self, Any]: ... def _to_expr(self) -> EagerExpr[Any, Any]: return self.__narwhals_namespace__()._expr._from_series(self) # type: ignore[no-any-return] + + def cast(self, dtype: DType | type[DType]) -> Self: ... diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index fa5c97d862..104a4efc00 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -1,7 +1,6 @@ from __future__ import annotations import re -from functools import partial from typing import TYPE_CHECKING from typing import Any from typing import Callable @@ -14,8 +13,6 @@ from narwhals._expression_parsing import is_elementary_expression from narwhals._pandas_like.group_by import AGGREGATIONS_TO_PANDAS_EQUIVALENT from narwhals._pandas_like.series import PandasLikeSeries -from narwhals.dependencies import get_numpy -from narwhals.dependencies import is_numpy_array from narwhals.exceptions import ColumnNotFoundError from narwhals.utils import generate_temporary_column_name @@ -24,7 +21,6 @@ from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.namespace import PandasLikeNamespace - from narwhals.dtypes import DType from narwhals.utils import Implementation from narwhals.utils import Version from narwhals.utils import _FullContext @@ -300,40 +296,6 @@ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: version=self._version, ) - def map_batches( - self: Self, - function: Callable[[Any], Any], - return_dtype: DType | type[DType] | None, - ) -> Self: - def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: - input_series_list = self(df) - output_names = [input_series.name for input_series in input_series_list] - result = [function(series) for series in input_series_list] - if is_numpy_array(result[0]) or ( - (np := get_numpy()) is not None and np.isscalar(result[0]) - ): - from_numpy = partial( - self.__narwhals_namespace__()._series.from_numpy, context=self - ) - result = [ - from_numpy(array).alias(output_name) - for array, output_name in zip(result, output_names) - ] - if return_dtype is not None: - result = [series.cast(return_dtype) for series in result] - return result - - return self.__class__( - func, - depth=self._depth + 1, - function_name=self._function_name + "->map_batches", - evaluate_output_names=self._evaluate_output_names, - alias_output_names=self._alias_output_names, - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, - ) - def cum_count(self: Self, *, reverse: bool) -> Self: return self._reuse_series("cum_count", call_kwargs={"reverse": reverse}) From d33f1aef991c45b6d6c8e664eb5ce74a49795444 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 12 Mar 2025 21:21:43 +0000 Subject: [PATCH 11/16] coverage https://github.com/narwhals-dev/narwhals/actions/runs/13821678447/job/38668299244?pr=2196 --- narwhals/_translate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_translate.py b/narwhals/_translate.py index c48a88d9a4..870eadc6b1 100644 --- a/narwhals/_translate.py +++ b/narwhals/_translate.py @@ -9,7 +9,7 @@ from typing_extensions import TypeVar -else: +else: # pragma: no cover import sys from importlib.util import find_spec From 9af92285d05f92dbc7bc087ca59733afdd292a4e Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 13 Mar 2025 11:25:09 +0000 Subject: [PATCH 12/16] refactor(typing): Use `Into1DArray` alias - Less repetition, but also helps document what the 2nd `TypeVar` is for (`from_`) - It has to be in that position to follow the rules of https://typing.python.org/en/latest/spec/generics.html#default-ordering-and-subscription-rules --- narwhals/_arrow/series.py | 6 ++---- narwhals/_compliant/series.py | 10 ++++------ narwhals/_pandas_like/series.py | 6 ++---- narwhals/_polars/series.py | 6 ++---- narwhals/typing.py | 6 ++++-- 5 files changed, 14 insertions(+), 20 deletions(-) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 29a923899c..45171f13d2 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -53,9 +53,9 @@ from narwhals._arrow.typing import _AsPyType from narwhals._arrow.typing import _BasicDataType from narwhals.dtypes import DType + from narwhals.typing import Into1DArray from narwhals.typing import _1DArray from narwhals.typing import _2DArray - from narwhals.typing import _NumpyScalar from narwhals.utils import Version from narwhals.utils import _FullContext @@ -159,9 +159,7 @@ def _from_scalar(self, value: Any) -> Self: return super()._from_scalar(value) @classmethod - def from_numpy( - cls, data: _1DArray | _NumpyScalar, /, *, context: _FullContext - ) -> Self: + def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: return cls._from_iterable( data if is_numpy_array_1d(data) else [data], name="", context=context ) diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index d3e575f859..85f4b0a8c6 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -16,9 +16,9 @@ from narwhals._compliant.namespace import CompliantNamespace # noqa: F401 from narwhals._compliant.namespace import EagerNamespace from narwhals.dtypes import DType + from narwhals.typing import Into1DArray from narwhals.typing import NativeSeries - from narwhals.typing import _1DArray - from narwhals.typing import _NumpyScalar + from narwhals.typing import _1DArray # noqa: F401 from narwhals.utils import Implementation from narwhals.utils import Version from narwhals.utils import _FullContext @@ -28,7 +28,7 @@ NativeSeriesT_co = TypeVar("NativeSeriesT_co", bound="NativeSeries", covariant=True) -class CompliantSeries(NumpyConvertible["_1DArray", "_1DArray | _NumpyScalar"], Protocol): +class CompliantSeries(NumpyConvertible["_1DArray", "Into1DArray"], Protocol): @property def dtype(self) -> DType: ... @property @@ -41,9 +41,7 @@ def __narwhals_namespace__(self) -> Any: ... # CompliantNamespace[Any, Self]: . def _from_native_series(self, series: Any) -> Self: ... def _to_expr(self) -> Any: ... # CompliantExpr[Any, Self]: ... @classmethod - def from_numpy( - cls, data: _1DArray | _NumpyScalar, /, *, context: _FullContext - ) -> Self: ... + def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: ... class EagerSeries(CompliantSeries, Protocol[NativeSeriesT_co]): diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 7d1a46c114..bc6c4af1c6 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -46,9 +46,9 @@ from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals.dtypes import DType + from narwhals.typing import Into1DArray from narwhals.typing import _1DArray from narwhals.typing import _AnyDArray - from narwhals.typing import _NumpyScalar from narwhals.utils import Version from narwhals.utils import _FullContext @@ -194,9 +194,7 @@ def _from_iterable( ) @classmethod - def from_numpy( - cls, data: _1DArray | _NumpyScalar, /, *, context: _FullContext - ) -> Self: + def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: implementation = context._implementation if implementation.is_pandas_like(): arr = data if is_numpy_array_1d(data) else [data] diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 8f0b0454c6..453476505b 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -27,8 +27,8 @@ from narwhals._polars.expr import PolarsExpr from narwhals._polars.namespace import PolarsNamespace from narwhals.dtypes import DType + from narwhals.typing import Into1DArray from narwhals.typing import _1DArray - from narwhals.typing import _NumpyScalar from narwhals.utils import Version from narwhals.utils import _FullContext @@ -75,9 +75,7 @@ def _change_version(self: Self, version: Version) -> Self: ) @classmethod - def from_numpy( - cls, data: _1DArray | _NumpyScalar, /, *, context: _FullContext - ) -> Self: + def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: return cls( pl.Series(data if is_numpy_array_1d(data) else [data]), backend_version=context._backend_version, diff --git a/narwhals/typing.py b/narwhals/typing.py index 6d1e43e547..d739434c56 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -200,10 +200,12 @@ def __native_namespace__(self) -> ModuleType: ... _ShapeT = TypeVar("_ShapeT", bound="tuple[int, ...]") _NDArray: TypeAlias = "np.ndarray[_ShapeT, Any]" -_1DArray: TypeAlias = "_NDArray[tuple[int]]" # noqa: PYI042, PYI047 +_1DArray: TypeAlias = "_NDArray[tuple[int]]" # noqa: PYI042 _2DArray: TypeAlias = "_NDArray[tuple[int, int]]" # noqa: PYI042, PYI047 _AnyDArray: TypeAlias = "_NDArray[tuple[int, ...]]" # noqa: PYI047 -_NumpyScalar: TypeAlias = "np.generic[Any]" # noqa: PYI047 +_NumpyScalar: TypeAlias = "np.generic[Any]" +Into1DArray: TypeAlias = "_1DArray | _NumpyScalar" +"""A 1-dimensional `numpy.ndarray` or scalar that can be converted into one.""" class DTypes: From 3cc3a1cf5b702d0b06f32162403a1e04dde4aef2 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 13 Mar 2025 15:47:36 +0000 Subject: [PATCH 13/16] refactor: Reuse `.__array__` for `PolarsSeries.to_numpy` - We've already got the compat handled there - `polars` handles the rest in https://github.com/pola-rs/polars/blob/889a2a7a57be5da432b6fa854ab698bbaf1b02ff/py-polars/polars/series/series.py#L1357-L1399 --- narwhals/_polars/series.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 453476505b..e5f06ddaf3 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -174,6 +174,9 @@ def replace_strict( raise NotImplementedError(msg) return self._from_native_series(ser.replace_strict(old, new, return_dtype=dtype)) + def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray: + return self.__array__(dtype, copy=copy) + def __array__(self: Self, dtype: Any, *, copy: bool | None) -> _1DArray: if self._backend_version < (0, 20, 29): return self._native_series.__array__(dtype=dtype) @@ -587,10 +590,6 @@ def hist( def to_polars(self: Self) -> pl.Series: return self._native_series - # TODO @dangotbanned: review converting `copy` to a version compat argument - def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray: - return self.native.to_numpy() - @property def dt(self: Self) -> PolarsSeriesDateTimeNamespace: return PolarsSeriesDateTimeNamespace(self) From 1f67693055251e0d8cd2b6530bf1b5a1026dba23 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 13 Mar 2025 16:06:46 +0000 Subject: [PATCH 14/16] chore: force github to let me start a thread --- narwhals/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index fa7f19fa6f..c7d4b1164e 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -432,7 +432,7 @@ def _extract_compliant(self: Self, arg: Any) -> Any: if isinstance(arg, Series): return arg._compliant_series._to_expr() if isinstance(arg, Expr): - return arg._to_compliant_expr(self.__narwhals_namespace__()) + return arg._to_compliant_expr(self.__narwhals_namespace__()) # comment if isinstance(arg, str): return plx.col(arg) if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover From 333bc677bc0872c667dc2f1a83cdfcb5bb7decd6 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 13 Mar 2025 20:46:09 +0000 Subject: [PATCH 15/16] refactor: remove uncovered `Implementation` checl Resolves https://github.com/narwhals-dev/narwhals/pull/2196#discussion_r1994199188 --- narwhals/_pandas_like/series.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index bc6c4af1c6..a62abaff3d 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -196,19 +196,13 @@ def _from_iterable( @classmethod def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: implementation = context._implementation - if implementation.is_pandas_like(): - arr = data if is_numpy_array_1d(data) else [data] - return cls( - implementation.to_native_namespace().Series(arr, name=""), - implementation=implementation, - backend_version=context._backend_version, - version=context._version, - ) - else: # pragma: no cover - from narwhals._pandas_like.utils import PANDAS_LIKE_IMPLEMENTATION - - msg = f"Expected pandas-like implementation ({PANDAS_LIKE_IMPLEMENTATION}), found {implementation}" - raise TypeError(msg) + arr = data if is_numpy_array_1d(data) else [data] + return cls( + implementation.to_native_namespace().Series(arr, name=""), + implementation=implementation, + backend_version=context._backend_version, + version=context._version, + ) def __len__(self: Self) -> int: return len(self.native) From c033c71d820985647b2e9d99a2ac37c71bd7e715 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 15 Mar 2025 15:15:01 +0000 Subject: [PATCH 16/16] remove comment comment https://github.com/narwhals-dev/narwhals/pull/2196#discussion_r1996869417 --- narwhals/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 4246a57998..9b7ddc7c5a 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -429,7 +429,7 @@ def _extract_compliant(self: Self, arg: Any) -> Any: if isinstance(arg, Series): return arg._compliant_series._to_expr() if isinstance(arg, Expr): - return arg._to_compliant_expr(self.__narwhals_namespace__()) # comment + return arg._to_compliant_expr(self.__narwhals_namespace__()) if isinstance(arg, str): return plx.col(arg) if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover