Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
4b812d3
feat(typing): Add `NumpyConvertible` protocol
dangotbanned Mar 12, 2025
18b07f9
feat(typing): Extend `CompliantSeries` w/ `NumpyConvertible`
dangotbanned Mar 12, 2025
1401aa1
fix(typing): Add missing args for `ArrowSeries.to_numpy`
dangotbanned Mar 12, 2025
d02d83f
feat: add `ArrowSeries.from_numpy`
dangotbanned Mar 12, 2025
9c81781
feat: add `PandasLikeSeries.from_numpy`
dangotbanned Mar 12, 2025
b575e37
feat: add `PolarsSeries.from_numpy`
dangotbanned Mar 12, 2025
48c8179
feat(DRAFT): add `PolarsSeries.to_numpy`
dangotbanned Mar 12, 2025
f5405b4
fix: resolve circular import
dangotbanned Mar 12, 2025
2a86bbb
refactor: Replace all `_create_compliant_series`
dangotbanned Mar 12, 2025
ca8fcdf
refactor: Move `map_batches` up to `EagerExpr`
dangotbanned Mar 12, 2025
d33f1ae
coverage
dangotbanned Mar 12, 2025
9af9228
refactor(typing): Use `Into1DArray` alias
dangotbanned Mar 13, 2025
cf0ae31
Merge remote-tracking branch 'upstream/main' into series-from-numpy
dangotbanned Mar 13, 2025
3cc3a1c
refactor: Reuse `.__array__` for `PolarsSeries.to_numpy`
dangotbanned Mar 13, 2025
1f67693
chore: force github to let me start a thread
dangotbanned Mar 13, 2025
82be657
Merge branch 'main' into series-from-numpy
dangotbanned Mar 13, 2025
333bc67
refactor: remove uncovered `Implementation` checl
dangotbanned Mar 13, 2025
6ebbad2
Merge branch 'main' into series-from-numpy
dangotbanned Mar 14, 2025
4176a67
Merge branch 'main' into series-from-numpy
dangotbanned Mar 14, 2025
adb6b7a
Merge branch 'main' into series-from-numpy
dangotbanned Mar 14, 2025
6a5ed1d
Merge remote-tracking branch 'upstream/main' into series-from-numpy
dangotbanned Mar 15, 2025
670345c
Merge branch 'main' into series-from-numpy
dangotbanned Mar 15, 2025
c033c71
remove comment comment
dangotbanned Mar 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 0 additions & 41 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
from narwhals._expression_parsing import ExprKind
from narwhals._expression_parsing import evaluate_output_names_and_aliases
from narwhals._expression_parsing import is_scalar_like
from narwhals.dependencies import get_numpy
from narwhals.dependencies import is_numpy_array
from narwhals.exceptions import ColumnNotFoundError
from narwhals.utils import Implementation
from narwhals.utils import generate_temporary_column_name
Expand All @@ -25,7 +23,6 @@

from narwhals._arrow.dataframe import ArrowDataFrame
from narwhals._arrow.namespace import ArrowNamespace
from narwhals.dtypes import DType
from narwhals.utils import Version
from narwhals.utils import _FullContext

Expand Down Expand Up @@ -203,44 +200,6 @@ def func(df: ArrowDataFrame) -> Sequence[ArrowSeries]:
version=self._version,
)

def map_batches(
self: Self,
function: Callable[[Any], Any],
return_dtype: DType | type[DType] | None,
) -> Self:
def func(df: ArrowDataFrame) -> list[ArrowSeries]:
input_series_list = self._call(df)
output_names = [input_series.name for input_series in input_series_list]
result = [function(series) for series in input_series_list]

if is_numpy_array(result[0]):
result = [
df.__narwhals_namespace__()
._create_compliant_series(array)
.alias(output_name)
for array, output_name in zip(result, output_names)
]
elif (np := get_numpy()) is not None and np.isscalar(result[0]):
result = [
df.__narwhals_namespace__()
._create_compliant_series([array])
.alias(output_name)
for array, output_name in zip(result, output_names)
]
if return_dtype is not None:
result = [series.cast(return_dtype) for series in result]
return result

return self.__class__(
func,
depth=self._depth + 1,
function_name=self._function_name + "->map_batches",
evaluate_output_names=self._evaluate_output_names,
alias_output_names=self._alias_output_names,
backend_version=self._backend_version,
version=self._version,
)

def cum_count(self: Self, *, reverse: bool) -> Self:
return self._reuse_series("cum_count", reverse=reverse)

Expand Down
3 changes: 0 additions & 3 deletions narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,6 @@ def _expr(self) -> type[ArrowExpr]:
def _series(self) -> type[ArrowSeries]:
return ArrowSeries

def _create_compliant_series(self: Self, value: Any) -> ArrowSeries:
return self._series._from_iterable(value, name="", context=self)

# --- not in spec ---
def __init__(
self: Self, *, backend_version: tuple[int, ...], version: Version
Expand Down
10 changes: 9 additions & 1 deletion narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from narwhals._arrow.utils import nulls_like
from narwhals._arrow.utils import pad_series
from narwhals._compliant import EagerSeries
from narwhals.dependencies import is_numpy_array_1d
from narwhals.exceptions import InvalidOperationError
from narwhals.utils import Implementation
from narwhals.utils import generate_temporary_column_name
Expand All @@ -52,6 +53,7 @@
from narwhals._arrow.typing import _AsPyType
from narwhals._arrow.typing import _BasicDataType
from narwhals.dtypes import DType
from narwhals.typing import Into1DArray
from narwhals.typing import _1DArray
from narwhals.typing import _2DArray
from narwhals.utils import Version
Expand Down Expand Up @@ -156,6 +158,12 @@ def _from_scalar(self, value: Any) -> Self:
value = value.as_py()
return super()._from_scalar(value)

@classmethod
def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self:
return cls._from_iterable(
data if is_numpy_array_1d(data) else [data], name="", context=context
)

def __narwhals_namespace__(self: Self) -> ArrowNamespace:
from narwhals._arrow.namespace import ArrowNamespace

Expand Down Expand Up @@ -437,7 +445,7 @@ def to_list(self: Self) -> list[Any]:
def __array__(self: Self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray:
return self.native.__array__(dtype=dtype, copy=copy)

def to_numpy(self: Self) -> _1DArray:
def to_numpy(self: Self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray:
return self.native.to_numpy()

def alias(self: Self, name: str) -> Self:
Expand Down
34 changes: 34 additions & 0 deletions narwhals/_compliant/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
from narwhals._compliant.typing import EagerSeriesT
from narwhals._compliant.typing import NativeExprT_co
from narwhals._expression_parsing import evaluate_output_names_and_aliases
from narwhals.dependencies import get_numpy
from narwhals.dependencies import is_numpy_array
from narwhals.dtypes import DType
from narwhals.utils import _ExprNamespace
from narwhals.utils import deprecated
Expand Down Expand Up @@ -760,6 +762,38 @@ def rolling_var(
ddof=ddof,
)

def map_batches(
self: Self,
function: Callable[[Any], Any],
return_dtype: DType | type[DType] | None,
) -> Self:
def func(df: EagerDataFrameT) -> Sequence[EagerSeriesT]:
input_series_list = self(df)
output_names = [input_series.name for input_series in input_series_list]
result = [function(series) for series in input_series_list]
if is_numpy_array(result[0]) or (
(np := get_numpy()) is not None and np.isscalar(result[0])
):
from_numpy = partial(
self.__narwhals_namespace__()._series.from_numpy, context=self
)
result = [
from_numpy(array).alias(output_name)
for array, output_name in zip(result, output_names)
]
if return_dtype is not None:
result = [series.cast(return_dtype) for series in result]
return result

return self._from_callable(
func,
depth=self._depth + 1,
function_name=self._function_name + "->map_batches",
evaluate_output_names=self._evaluate_output_names,
alias_output_names=self._alias_output_names,
context=self,
)

@property
def cat(self) -> EagerExprCatNamespace[Self]:
return EagerExprCatNamespace(self)
Expand Down
11 changes: 0 additions & 11 deletions narwhals/_compliant/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from narwhals._compliant.typing import EagerDataFrameT
from narwhals._compliant.typing import EagerExprT
from narwhals._compliant.typing import EagerSeriesT_co
from narwhals.utils import deprecated
from narwhals.utils import exclude_column_names
from narwhals.utils import get_column_names
from narwhals.utils import passthrough_column_names
Expand Down Expand Up @@ -85,13 +84,3 @@ class EagerNamespace(
):
@property
def _series(self) -> type[EagerSeriesT_co]: ...

@deprecated(
"Internally used for `numpy.ndarray` -> `CompliantSeries`\n"
"Also referenced in untyped `nw.dataframe.DataFrame._extract_compliant`\n"
"See Also:\n"
" - https://github.com/narwhals-dev/narwhals/pull/2149#discussion_r1986283345\n"
" - https://github.com/narwhals-dev/narwhals/issues/2116\n"
" - https://github.com/narwhals-dev/narwhals/pull/2169"
)
def _create_compliant_series(self, value: Any) -> EagerSeriesT_co: ...
10 changes: 9 additions & 1 deletion narwhals/_compliant/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from typing import Protocol
from typing import TypeVar

from narwhals._translate import NumpyConvertible

if TYPE_CHECKING:
from typing_extensions import Self

Expand All @@ -14,7 +16,9 @@
from narwhals._compliant.namespace import CompliantNamespace # noqa: F401
from narwhals._compliant.namespace import EagerNamespace
from narwhals.dtypes import DType
from narwhals.typing import Into1DArray
from narwhals.typing import NativeSeries
from narwhals.typing import _1DArray # noqa: F401
from narwhals.utils import Implementation
from narwhals.utils import Version
from narwhals.utils import _FullContext
Expand All @@ -24,7 +28,7 @@
NativeSeriesT_co = TypeVar("NativeSeriesT_co", bound="NativeSeries", covariant=True)


class CompliantSeries(Protocol):
class CompliantSeries(NumpyConvertible["_1DArray", "Into1DArray"], Protocol):
@property
def dtype(self) -> DType: ...
@property
Expand All @@ -36,6 +40,8 @@ def alias(self, name: str) -> Self: ...
def __narwhals_namespace__(self) -> Any: ... # CompliantNamespace[Any, Self]: ...
def _from_native_series(self, series: Any) -> Self: ...
def _to_expr(self) -> Any: ... # CompliantExpr[Any, Self]: ...
@classmethod
def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: ...
Comment on lines +43 to +44
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Linking this back to nw.functions.new_series, we might wanna have this as:

    @classmethod
    def from_numpy(
        cls,
        data: Into1DArray,
        /,
        *,
        context: _FullContext,
        name: str = "",
        dtype: DType | type[DType] | None = None,
    ) -> Self: ...

def new_series(
name: str,
values: Any,
dtype: DType | type[DType] | None = None,
*,
native_namespace: ModuleType,
) -> Series[Any]:



class EagerSeries(CompliantSeries, Protocol[NativeSeriesT_co]):
Expand All @@ -60,3 +66,5 @@ def __narwhals_namespace__(self) -> EagerNamespace[Any, Self, Any]: ...

def _to_expr(self) -> EagerExpr[Any, Any]:
return self.__narwhals_namespace__()._expr._from_series(self) # type: ignore[no-any-return]

def cast(self, dtype: DType | type[DType]) -> Self: ...
5 changes: 5 additions & 0 deletions narwhals/_compliant/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from narwhals._compliant.expr import CompliantExpr
from narwhals._compliant.expr import EagerExpr
from narwhals._compliant.expr import NativeExpr
from narwhals._compliant.namespace import EagerNamespace
from narwhals._compliant.series import CompliantSeries
from narwhals._compliant.series import EagerSeries

Expand Down Expand Up @@ -48,5 +49,9 @@
EagerSeriesT = TypeVar("EagerSeriesT", bound="EagerSeries[Any]")
EagerSeriesT_co = TypeVar("EagerSeriesT_co", bound="EagerSeries[Any]", covariant=True)
EagerExprT = TypeVar("EagerExprT", bound="EagerExpr[Any, Any]")
EagerNamespaceAny: TypeAlias = (
"EagerNamespace[EagerDataFrame[Any], EagerSeries[Any], EagerExpr[Any, Any]]"
)

AliasNames: TypeAlias = Callable[[Sequence[str]], Sequence[str]]
AliasName: TypeAlias = Callable[[str], str]
5 changes: 4 additions & 1 deletion narwhals/_expression_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from typing import Literal
from typing import Sequence
from typing import TypeVar
from typing import cast

from narwhals.dependencies import is_narwhals_series
from narwhals.dependencies import is_numpy_array
Expand All @@ -27,6 +28,7 @@
from narwhals._compliant import CompliantExprT
from narwhals._compliant import CompliantFrameT
from narwhals._compliant import CompliantNamespace
from narwhals._compliant.typing import EagerNamespaceAny
from narwhals.expr import Expr
from narwhals.typing import CompliantDataFrame
from narwhals.typing import CompliantLazyFrame
Expand Down Expand Up @@ -100,7 +102,8 @@ def extract_compliant(
if is_narwhals_series(other):
return other._compliant_series._to_expr()
if is_numpy_array(other):
return plx._create_compliant_series(other)._to_expr() # type: ignore[attr-defined]
ns = cast("EagerNamespaceAny", plx)
return ns._series.from_numpy(other, context=ns)._to_expr()
return other


Expand Down
13 changes: 5 additions & 8 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from narwhals._pandas_like.utils import align_series_full_broadcast
from narwhals._pandas_like.utils import check_column_names_are_unique
from narwhals._pandas_like.utils import convert_str_slice_to_int_slice
from narwhals._pandas_like.utils import create_compliant_series
from narwhals._pandas_like.utils import extract_dataframe_comparand
from narwhals._pandas_like.utils import horizontal_concat
from narwhals._pandas_like.utils import native_to_narwhals_dtype
Expand Down Expand Up @@ -433,16 +432,14 @@ def estimated_size(self: Self, unit: SizeUnit) -> int | float:
return scale_bytes(sz, unit=unit)

def with_row_index(self: Self, name: str) -> Self:
row_index = create_compliant_series(
range(len(self._native_frame)),
index=self._native_frame.index,
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
frame = self._native_frame
namespace = self.__narwhals_namespace__()
row_index = namespace._series._from_iterable(
range(len(frame)), name="", context=self, index=frame.index
).alias(name)
return self._from_native_frame(
horizontal_concat(
[row_index._native_series, self._native_frame],
[row_index.native, frame],
implementation=self._implementation,
backend_version=self._backend_version,
)
Expand Down
36 changes: 0 additions & 36 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
from narwhals._expression_parsing import is_elementary_expression
from narwhals._pandas_like.group_by import AGGREGATIONS_TO_PANDAS_EQUIVALENT
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals.dependencies import get_numpy
from narwhals.dependencies import is_numpy_array
from narwhals.exceptions import ColumnNotFoundError
from narwhals.utils import generate_temporary_column_name

Expand All @@ -23,7 +21,6 @@

from narwhals._pandas_like.dataframe import PandasLikeDataFrame
from narwhals._pandas_like.namespace import PandasLikeNamespace
from narwhals.dtypes import DType
from narwhals.utils import Implementation
from narwhals.utils import Version
from narwhals.utils import _FullContext
Expand Down Expand Up @@ -299,39 +296,6 @@ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]:
version=self._version,
)

def map_batches(
self: Self,
function: Callable[[Any], Any],
return_dtype: DType | type[DType] | None,
) -> Self:
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
input_series_list = self._call(df)
output_names = [input_series.name for input_series in input_series_list]
result = [function(series) for series in input_series_list]
if is_numpy_array(result[0]) or (
(np := get_numpy()) is not None and np.isscalar(result[0])
):
result = [
df.__narwhals_namespace__()
._create_compliant_series(array)
.alias(output_name)
for array, output_name in zip(result, output_names)
]
if return_dtype is not None:
result = [series.cast(return_dtype) for series in result]
return result

return self.__class__(
func,
depth=self._depth + 1,
function_name=self._function_name + "->map_batches",
evaluate_output_names=self._evaluate_output_names,
alias_output_names=self._alias_output_names,
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
)

def cum_count(self: Self, *, reverse: bool) -> Self:
return self._reuse_series("cum_count", call_kwargs={"reverse": reverse})

Expand Down
9 changes: 0 additions & 9 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from narwhals._pandas_like.selectors import PandasSelectorNamespace
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals._pandas_like.utils import align_series_full_broadcast
from narwhals._pandas_like.utils import create_compliant_series
from narwhals._pandas_like.utils import diagonal_concat
from narwhals._pandas_like.utils import extract_dataframe_comparand
from narwhals._pandas_like.utils import horizontal_concat
Expand Down Expand Up @@ -61,14 +60,6 @@ def __init__(
self._backend_version = backend_version
self._version = version

def _create_compliant_series(self: Self, value: Any) -> PandasLikeSeries:
return create_compliant_series(
value,
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
)

# --- selection ---
def lit(self: Self, value: Any, dtype: DType | None) -> PandasLikeExpr:
def _lit_pandas_series(df: PandasLikeDataFrame) -> PandasLikeSeries:
Expand Down
Loading
Loading