diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index 6d7cd7a87a..1b885631fe 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -17,8 +17,8 @@ from narwhals._arrow.utils import extract_dataframe_comparand from narwhals._arrow.utils import native_to_narwhals_dtype from narwhals._arrow.utils import select_rows +from narwhals._compliant import EagerDataFrame from narwhals._expression_parsing import ExprKind -from narwhals._expression_parsing import evaluate_into_exprs from narwhals.dependencies import is_numpy_array_1d from narwhals.utils import Implementation from narwhals.utils import Version @@ -71,7 +71,7 @@ from narwhals.typing import CompliantLazyFrame -class ArrowDataFrame(CompliantDataFrame["ArrowSeries"], CompliantLazyFrame): +class ArrowDataFrame(EagerDataFrame["ArrowSeries"], CompliantLazyFrame): # --- not in the spec --- def __init__( self: Self, @@ -360,7 +360,7 @@ def aggregate(self: ArrowDataFrame, *exprs: ArrowExpr) -> ArrowDataFrame: return self.select(*exprs) def select(self: ArrowDataFrame, *exprs: ArrowExpr) -> ArrowDataFrame: - new_series = evaluate_into_exprs(self, *exprs) + new_series = self._evaluate_into_exprs(*exprs) if not new_series: # return empty dataframe, like Polars does return self._from_native_frame( @@ -373,7 +373,7 @@ def select(self: ArrowDataFrame, *exprs: ArrowExpr) -> ArrowDataFrame: def with_columns(self: ArrowDataFrame, *exprs: ArrowExpr) -> ArrowDataFrame: native_frame = self._native_frame - new_columns = evaluate_into_exprs(self, *exprs) + new_columns = self._evaluate_into_exprs(*exprs) length = len(self) columns = self.columns @@ -560,7 +560,7 @@ def filter( mask_native: Mask | ArrowChunkedArray = predicate else: # `[0]` is safe as the predicate's expression only returns a single column - mask_native = evaluate_into_exprs(self, predicate)[0]._native_series + mask_native = self._evaluate_into_exprs(predicate)[0]._native_series return self._from_native_frame( self._native_frame.filter(mask_native), # pyright: ignore[reportArgumentType] validate_column_names=False, @@ -777,7 +777,7 @@ def unique( keep_idx = self.simple_select(*subset).is_unique() plx = self.__narwhals_namespace__() - return self.filter(plx._create_expr_from_series(keep_idx)) + return self.filter(plx._expr._from_series(keep_idx)) def gather_every(self: Self, n: int, offset: int) -> Self: return self._from_native_frame( diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 805c977f75..360132e80b 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -4,26 +4,18 @@ from typing import Any from typing import Callable from typing import Literal -from typing import Mapping from typing import Sequence import pyarrow.compute as pc -from narwhals._arrow.expr_cat import ArrowExprCatNamespace -from narwhals._arrow.expr_dt import ArrowExprDateTimeNamespace -from narwhals._arrow.expr_list import ArrowExprListNamespace -from narwhals._arrow.expr_name import ArrowExprNameNamespace -from narwhals._arrow.expr_str import ArrowExprStringNamespace -from narwhals._arrow.expr_struct import ArrowExprStructNamespace from narwhals._arrow.series import ArrowSeries +from narwhals._compliant import EagerExpr from narwhals._expression_parsing import ExprKind from narwhals._expression_parsing import evaluate_output_names_and_aliases from narwhals._expression_parsing import is_scalar_like -from narwhals._expression_parsing import reuse_series_implementation from narwhals.dependencies import get_numpy from narwhals.dependencies import is_numpy_array from narwhals.exceptions import ColumnNotFoundError -from narwhals.typing import CompliantExpr from narwhals.utils import Implementation from narwhals.utils import generate_temporary_column_name from narwhals.utils import not_implemented @@ -35,9 +27,10 @@ from narwhals._arrow.namespace import ArrowNamespace from narwhals.dtypes import DType from narwhals.utils import Version + from narwhals.utils import _FullContext -class ArrowExpr(CompliantExpr["ArrowDataFrame", ArrowSeries]): +class ArrowExpr(EagerExpr["ArrowDataFrame", ArrowSeries]): _implementation: Implementation = Implementation.PYARROW def __init__( @@ -51,6 +44,7 @@ def __init__( backend_version: tuple[int, ...], version: Version, call_kwargs: dict[str, Any] | None = None, + implementation: Implementation | None = None, ) -> None: self._call = call self._depth = depth @@ -62,34 +56,6 @@ def __init__( self._version = version self._call_kwargs = call_kwargs or {} - def __repr__(self: Self) -> str: # pragma: no cover - return f"ArrowExpr(depth={self._depth}, function_name={self._function_name}, " - - def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]: - return self._call(df) - - def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self: - # Mark the resulting ArrowSeries with `_broadcast = True`. - # Then, when extracting native objects, `extract_native` will - # know what to do. - def func(df: ArrowDataFrame) -> list[ArrowSeries]: - results = [] - for result in self(df): - result._broadcast = True - results.append(result) - return results - - return self.__class__( - func, - depth=self._depth, - function_name=self._function_name, - evaluate_output_names=self._evaluate_output_names, - alias_output_names=self._alias_output_names, - backend_version=self._backend_version, - version=self._version, - call_kwargs=self._call_kwargs, - ) - @classmethod def from_column_names( cls: type[Self], @@ -97,8 +63,7 @@ def from_column_names( /, *, function_name: str, - backend_version: tuple[int, ...], - version: Version, + context: _FullContext, ) -> Self: def func(df: ArrowDataFrame) -> list[ArrowSeries]: try: @@ -125,16 +90,13 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: function_name=function_name, evaluate_output_names=evaluate_column_names, alias_output_names=None, - backend_version=backend_version, - version=version, + backend_version=context._backend_version, + version=context._version, ) @classmethod def from_column_indices( - cls: type[Self], - *column_indices: int, - backend_version: tuple[int, ...], - version: Version, + cls: type[Self], *column_indices: int, context: _FullContext ) -> Self: from narwhals._arrow.series import ArrowSeries @@ -155,8 +117,8 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: function_name="nth", evaluate_output_names=lambda df: [df.columns[i] for i in column_indices], alias_output_names=None, - backend_version=backend_version, - version=version, + backend_version=context._backend_version, + version=context._version, ) def __narwhals_namespace__(self: Self) -> ArrowNamespace: @@ -168,264 +130,16 @@ def __narwhals_namespace__(self: Self) -> ArrowNamespace: def __narwhals_expr__(self: Self) -> None: ... - def __eq__(self: Self, other: ArrowExpr | Any) -> Self: # type: ignore[override] - return reuse_series_implementation(self, "__eq__", other=other) - - def __ne__(self: Self, other: ArrowExpr | Any) -> Self: # type: ignore[override] - return reuse_series_implementation(self, "__ne__", other=other) - - def __ge__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self, "__ge__", other=other) - - def __gt__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self, "__gt__", other=other) - - def __le__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self, "__le__", other=other) - - def __lt__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self, "__lt__", other=other) - - def __and__(self: Self, other: ArrowExpr | bool | Any) -> Self: - return reuse_series_implementation(self, "__and__", other=other) - - def __or__(self: Self, other: ArrowExpr | bool | Any) -> Self: - return reuse_series_implementation(self, "__or__", other=other) - - def __add__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self, "__add__", other=other) - - def __sub__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self, "__sub__", other=other) - - def __rsub__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self.alias("literal"), "__rsub__", other=other) - - def __mul__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self, "__mul__", other=other) - - def __pow__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self, "__pow__", other=other) - - def __rpow__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self.alias("literal"), "__rpow__", other=other) - - def __floordiv__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self, "__floordiv__", other=other) - - def __rfloordiv__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation( - self.alias("literal"), "__rfloordiv__", other=other - ) - - def __truediv__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self, "__truediv__", other=other) - - def __rtruediv__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation( - self.alias("literal"), "__rtruediv__", other=other - ) - - def __mod__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self, "__mod__", other=other) - - def __rmod__(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self.alias("literal"), "__rmod__", other=other) - - def __invert__(self: Self) -> Self: - return reuse_series_implementation(self, "__invert__") - - def len(self: Self) -> Self: - return reuse_series_implementation(self, "len", returns_scalar=True) - - def filter(self: Self, *predicates: ArrowExpr) -> Self: - plx = self.__narwhals_namespace__() - predicate = plx.all_horizontal(*predicates) - return reuse_series_implementation(self, "filter", predicate=predicate) - - def mean(self: Self) -> Self: - return reuse_series_implementation(self, "mean", returns_scalar=True) - - def median(self: Self) -> Self: - return reuse_series_implementation(self, "median", returns_scalar=True) - - def count(self: Self) -> Self: - return reuse_series_implementation(self, "count", returns_scalar=True) - - def n_unique(self: Self) -> Self: - return reuse_series_implementation(self, "n_unique", returns_scalar=True) - - def std(self: Self, ddof: int) -> Self: - return reuse_series_implementation( - self, "std", call_kwargs={"ddof": ddof}, returns_scalar=True - ) - - def var(self: Self, ddof: int) -> Self: - return reuse_series_implementation( - self, "var", call_kwargs={"ddof": ddof}, returns_scalar=True - ) - - def skew(self: Self) -> Self: - return reuse_series_implementation(self, "skew", returns_scalar=True) - - def cast(self: Self, dtype: DType | type[DType]) -> Self: - return reuse_series_implementation(self, "cast", dtype=dtype) - - def abs(self: Self) -> Self: - return reuse_series_implementation(self, "abs") - - def diff(self: Self) -> Self: - return reuse_series_implementation(self, "diff") + def _reuse_series_extra_kwargs( + self, *, returns_scalar: bool = False + ) -> dict[str, Any]: + return {"_return_py_scalar": False} if returns_scalar else {} def cum_sum(self: Self, *, reverse: bool) -> Self: - return reuse_series_implementation(self, "cum_sum", reverse=reverse) - - def round(self: Self, decimals: int) -> Self: - return reuse_series_implementation(self, "round", decimals=decimals) - - def any(self: Self) -> Self: - return reuse_series_implementation(self, "any", returns_scalar=True) - - def min(self: Self) -> Self: - return reuse_series_implementation(self, "min", returns_scalar=True) - - def max(self: Self) -> Self: - return reuse_series_implementation(self, "max", returns_scalar=True) - - def arg_min(self: Self) -> Self: - return reuse_series_implementation(self, "arg_min", returns_scalar=True) - - def arg_max(self: Self) -> Self: - return reuse_series_implementation(self, "arg_max", returns_scalar=True) - - def all(self: Self) -> Self: - return reuse_series_implementation(self, "all", returns_scalar=True) - - def sum(self: Self) -> Self: - return reuse_series_implementation(self, "sum", returns_scalar=True) - - def drop_nulls(self: Self) -> Self: - return reuse_series_implementation(self, "drop_nulls") + return self._reuse_series("cum_sum", reverse=reverse) def shift(self: Self, n: int) -> Self: - return reuse_series_implementation(self, "shift", n=n) - - def alias(self: Self, name: str) -> Self: - def alias_output_names(names: Sequence[str]) -> Sequence[str]: - if len(names) != 1: - msg = f"Expected function with single output, found output names: {names}" - raise ValueError(msg) - return [name] - - # Define this one manually, so that we can - # override `output_names` and not increase depth - return self.__class__( - lambda df: [series.alias(name) for series in self._call(df)], - depth=self._depth, - function_name=self._function_name, - evaluate_output_names=self._evaluate_output_names, - alias_output_names=alias_output_names, - backend_version=self._backend_version, - version=self._version, - call_kwargs=self._call_kwargs, - ) - - def null_count(self: Self) -> Self: - return reuse_series_implementation(self, "null_count", returns_scalar=True) - - def is_null(self: Self) -> Self: - return reuse_series_implementation(self, "is_null") - - def is_nan(self: Self) -> Self: - return reuse_series_implementation(self, "is_nan") - - def head(self: Self, n: int) -> Self: - return reuse_series_implementation(self, "head", n=n) - - def tail(self: Self, n: int) -> Self: - return reuse_series_implementation(self, "tail", n=n) - - def is_in(self: Self, other: ArrowExpr | Any) -> Self: - return reuse_series_implementation(self, "is_in", other=other) - - def arg_true(self: Self) -> Self: - return reuse_series_implementation(self, "arg_true") - - def sample( - self: Self, - n: int | None, - *, - fraction: float | None, - with_replacement: bool, - seed: int | None, - ) -> Self: - return reuse_series_implementation( - self, - "sample", - n=n, - fraction=fraction, - with_replacement=with_replacement, - seed=seed, - ) - - def fill_null( - self: Self, - value: Self | Any | None, - strategy: Literal["forward", "backward"] | None, - limit: int | None, - ) -> Self: - return reuse_series_implementation( - self, "fill_null", value=value, strategy=strategy, limit=limit - ) - - def is_unique(self: Self) -> Self: - return reuse_series_implementation(self, "is_unique") - - def is_first_distinct(self: Self) -> Self: - return reuse_series_implementation(self, "is_first_distinct") - - def is_last_distinct(self: Self) -> Self: - return reuse_series_implementation(self, "is_last_distinct") - - def unique(self: Self) -> Self: - return reuse_series_implementation(self, "unique", maintain_order=False) - - def replace_strict( - self: Self, - old: Sequence[Any] | Mapping[Any, Any], - new: Sequence[Any], - *, - return_dtype: DType | type[DType] | None, - ) -> Self: - return reuse_series_implementation( - self, "replace_strict", old=old, new=new, return_dtype=return_dtype - ) - - def sort(self: Self, *, descending: bool, nulls_last: bool) -> Self: - return reuse_series_implementation( - self, "sort", descending=descending, nulls_last=nulls_last - ) - - def quantile( - self: Self, - quantile: float, - interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"], - ) -> Self: - return reuse_series_implementation( - self, - "quantile", - returns_scalar=True, - quantile=quantile, - interpolation=interpolation, - ) - - def gather_every(self: Self, n: int, offset: int) -> Self: - return reuse_series_implementation(self, "gather_every", n=n, offset=offset) - - def clip(self: Self, lower_bound: Any | None, upper_bound: Any | None) -> Self: - return reuse_series_implementation( - self, "clip", lower_bound=lower_bound, upper_bound=upper_bound - ) + return self._reuse_series("shift", n=n) def over( self: Self, @@ -489,9 +203,6 @@ def func(df: ArrowDataFrame) -> Sequence[ArrowSeries]: version=self._version, ) - def mode(self: Self) -> Self: - return reuse_series_implementation(self, "mode") - def map_batches( self: Self, function: Callable[[Any], Any], @@ -530,80 +241,17 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: version=self._version, ) - def is_finite(self: Self) -> Self: - return reuse_series_implementation(self, "is_finite") - def cum_count(self: Self, *, reverse: bool) -> Self: - return reuse_series_implementation(self, "cum_count", reverse=reverse) + return self._reuse_series("cum_count", reverse=reverse) def cum_min(self: Self, *, reverse: bool) -> Self: - return reuse_series_implementation(self, "cum_min", reverse=reverse) + return self._reuse_series("cum_min", reverse=reverse) def cum_max(self: Self, *, reverse: bool) -> Self: - return reuse_series_implementation(self, "cum_max", reverse=reverse) + return self._reuse_series("cum_max", reverse=reverse) def cum_prod(self: Self, *, reverse: bool) -> Self: - return reuse_series_implementation(self, "cum_prod", reverse=reverse) - - def rolling_sum( - self: Self, window_size: int, *, min_samples: int, center: bool - ) -> Self: - return reuse_series_implementation( - self, - "rolling_sum", - window_size=window_size, - min_samples=min_samples, - center=center, - ) - - def rolling_mean( - self: Self, - window_size: int, - *, - min_samples: int, - center: bool, - ) -> Self: - return reuse_series_implementation( - self, - "rolling_mean", - window_size=window_size, - min_samples=min_samples, - center=center, - ) - - def rolling_var( - self: Self, - window_size: int, - *, - min_samples: int, - center: bool, - ddof: int, - ) -> Self: - return reuse_series_implementation( - self, - "rolling_var", - window_size=window_size, - min_samples=min_samples, - center=center, - ddof=ddof, - ) - - def rolling_std( - self: Self, - window_size: int, - *, - min_samples: int, - center: bool, - ddof: int, - ) -> Self: - return reuse_series_implementation( - self, - "rolling_std", - window_size=window_size, - min_samples=min_samples, - center=center, - ddof=ddof, - ) + return self._reuse_series("cum_prod", reverse=reverse) def rank( self: Self, @@ -611,32 +259,6 @@ def rank( *, descending: bool, ) -> Self: - return reuse_series_implementation( - self, "rank", method=method, descending=descending - ) + return self._reuse_series("rank", method=method, descending=descending) ewm_mean = not_implemented() - - @property - def dt(self: Self) -> ArrowExprDateTimeNamespace: - return ArrowExprDateTimeNamespace(self) - - @property - def str(self: Self) -> ArrowExprStringNamespace: - return ArrowExprStringNamespace(self) - - @property - def cat(self: Self) -> ArrowExprCatNamespace: - return ArrowExprCatNamespace(self) - - @property - def name(self: Self) -> ArrowExprNameNamespace: - return ArrowExprNameNamespace(self) - - @property - def list(self: Self) -> ArrowExprListNamespace: - return ArrowExprListNamespace(self) - - @property - def struct(self: Self) -> ArrowExprStructNamespace: - return ArrowExprStructNamespace(self) diff --git a/narwhals/_arrow/expr_cat.py b/narwhals/_arrow/expr_cat.py deleted file mode 100644 index dbea11c59f..0000000000 --- a/narwhals/_arrow/expr_cat.py +++ /dev/null @@ -1,18 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from narwhals._arrow.utils import ArrowExprNamespace -from narwhals._expression_parsing import reuse_series_namespace_implementation - -if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._arrow.expr import ArrowExpr - - -class ArrowExprCatNamespace(ArrowExprNamespace): - def get_categories(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "cat", "get_categories" - ) diff --git a/narwhals/_arrow/expr_dt.py b/narwhals/_arrow/expr_dt.py deleted file mode 100644 index a090d74920..0000000000 --- a/narwhals/_arrow/expr_dt.py +++ /dev/null @@ -1,95 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from narwhals._arrow.utils import ArrowExprNamespace -from narwhals._expression_parsing import reuse_series_namespace_implementation - -if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._arrow.expr import ArrowExpr - from narwhals.typing import TimeUnit - - -class ArrowExprDateTimeNamespace(ArrowExprNamespace): - def to_string(self: Self, format: str) -> ArrowExpr: # noqa: A002 - return reuse_series_namespace_implementation( - self.compliant, "dt", "to_string", format=format - ) - - def replace_time_zone(self: Self, time_zone: str | None) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "dt", "replace_time_zone", time_zone=time_zone - ) - - def convert_time_zone(self: Self, time_zone: str) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "dt", "convert_time_zone", time_zone=time_zone - ) - - def timestamp(self: Self, time_unit: TimeUnit) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "dt", "timestamp", time_unit=time_unit - ) - - def date(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "dt", "date") - - def year(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "dt", "year") - - def month(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "dt", "month") - - def day(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "dt", "day") - - def hour(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "dt", "hour") - - def minute(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "dt", "minute") - - def second(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "dt", "second") - - def millisecond(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "dt", "millisecond") - - def microsecond(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "dt", "microsecond") - - def nanosecond(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "dt", "nanosecond") - - def ordinal_day(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "dt", "ordinal_day") - - def weekday(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "dt", "weekday") - - def total_minutes(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "dt", "total_minutes" - ) - - def total_seconds(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "dt", "total_seconds" - ) - - def total_milliseconds(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "dt", "total_milliseconds" - ) - - def total_microseconds(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "dt", "total_microseconds" - ) - - def total_nanoseconds(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "dt", "total_nanoseconds" - ) diff --git a/narwhals/_arrow/expr_list.py b/narwhals/_arrow/expr_list.py deleted file mode 100644 index 14c81f5935..0000000000 --- a/narwhals/_arrow/expr_list.py +++ /dev/null @@ -1,16 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from narwhals._arrow.utils import ArrowExprNamespace -from narwhals._expression_parsing import reuse_series_namespace_implementation - -if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._arrow.expr import ArrowExpr - - -class ArrowExprListNamespace(ArrowExprNamespace): - def len(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "list", "len") diff --git a/narwhals/_arrow/expr_name.py b/narwhals/_arrow/expr_name.py deleted file mode 100644 index 2f8b368466..0000000000 --- a/narwhals/_arrow/expr_name.py +++ /dev/null @@ -1,81 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING -from typing import Callable -from typing import Sequence - -from narwhals._arrow.utils import ArrowExprNamespace - -if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._arrow.expr import ArrowExpr - - -class ArrowExprNameNamespace(ArrowExprNamespace): - def keep(self: Self) -> ArrowExpr: - return self._from_colname_func_and_alias_output_names( - name_mapping_func=lambda name: name, - alias_output_names=None, - ) - - def map(self: Self, function: Callable[[str], str]) -> ArrowExpr: - return self._from_colname_func_and_alias_output_names( - name_mapping_func=function, - alias_output_names=lambda output_names: [ - function(name) for name in output_names - ], - ) - - def prefix(self: Self, prefix: str) -> ArrowExpr: - return self._from_colname_func_and_alias_output_names( - name_mapping_func=lambda name: f"{prefix}{name}", - alias_output_names=lambda output_names: [ - f"{prefix}{output_name}" for output_name in output_names - ], - ) - - def suffix(self: Self, suffix: str) -> ArrowExpr: - return self._from_colname_func_and_alias_output_names( - name_mapping_func=lambda name: f"{name}{suffix}", - alias_output_names=lambda output_names: [ - f"{output_name}{suffix}" for output_name in output_names - ], - ) - - def to_lowercase(self: Self) -> ArrowExpr: - return self._from_colname_func_and_alias_output_names( - name_mapping_func=str.lower, - alias_output_names=lambda output_names: [ - name.lower() for name in output_names - ], - ) - - def to_uppercase(self: Self) -> ArrowExpr: - return self._from_colname_func_and_alias_output_names( - name_mapping_func=str.upper, - alias_output_names=lambda output_names: [ - name.upper() for name in output_names - ], - ) - - def _from_colname_func_and_alias_output_names( - self: Self, - name_mapping_func: Callable[[str], str], - alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None, - ) -> ArrowExpr: - return self.compliant.__class__( - call=lambda df: [ - series.alias(name_mapping_func(name)) - for series, name in zip( - self.compliant._call(df), self.compliant._evaluate_output_names(df) - ) - ], - depth=self.compliant._depth, - function_name=self.compliant._function_name, - evaluate_output_names=self.compliant._evaluate_output_names, - alias_output_names=alias_output_names, - backend_version=self.compliant._backend_version, - version=self.compliant._version, - call_kwargs=self.compliant._call_kwargs, - ) diff --git a/narwhals/_arrow/expr_str.py b/narwhals/_arrow/expr_str.py deleted file mode 100644 index fbabfb551f..0000000000 --- a/narwhals/_arrow/expr_str.py +++ /dev/null @@ -1,84 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from narwhals._arrow.utils import ArrowExprNamespace -from narwhals._expression_parsing import reuse_series_namespace_implementation - -if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._arrow.expr import ArrowExpr - - -class ArrowExprStringNamespace(ArrowExprNamespace): - def len_chars(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self.compliant, "str", "len_chars") - - def replace( - self: Self, pattern: str, value: str, *, literal: bool, n: int - ) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, - "str", - "replace", - pattern=pattern, - value=value, - literal=literal, - n=n, - ) - - def replace_all(self: Self, pattern: str, value: str, *, literal: bool) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, - "str", - "replace_all", - pattern=pattern, - value=value, - literal=literal, - ) - - def strip_chars(self: Self, characters: str | None) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "str", "strip_chars", characters=characters - ) - - def starts_with(self: Self, prefix: str) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "str", "starts_with", prefix=prefix - ) - - def ends_with(self: Self, suffix: str) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "str", "ends_with", suffix=suffix - ) - - def contains(self: Self, pattern: str, *, literal: bool) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "str", "contains", pattern=pattern, literal=literal - ) - - def slice(self: Self, offset: int, length: int | None) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "str", "slice", offset=offset, length=length - ) - - def split(self: Self, by: str) -> ArrowExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "str", "split", by=by - ) - - def to_datetime(self: Self, format: str | None) -> ArrowExpr: # noqa: A002 - return reuse_series_namespace_implementation( - self.compliant, "str", "to_datetime", format=format - ) - - def to_uppercase(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "str", "to_uppercase" - ) - - def to_lowercase(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation( - self.compliant, "str", "to_lowercase" - ) diff --git a/narwhals/_arrow/expr_struct.py b/narwhals/_arrow/expr_struct.py deleted file mode 100644 index 4d4f1863d2..0000000000 --- a/narwhals/_arrow/expr_struct.py +++ /dev/null @@ -1,23 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from narwhals._expression_parsing import reuse_series_namespace_implementation - -if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._arrow.expr import ArrowExpr - - -class ArrowExprStructNamespace: - def __init__(self: Self, expr: ArrowExpr) -> None: - self._compliant_expr = expr - - def field(self: Self, name: str) -> ArrowExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, - "struct", - "field", - name=name, - ).alias(name) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index 7d80310352..22e2e56184 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -12,7 +12,6 @@ from typing import Literal from typing import Sequence -import pyarrow as pa import pyarrow.compute as pc from narwhals._arrow.dataframe import ArrowDataFrame @@ -25,9 +24,9 @@ from narwhals._arrow.utils import horizontal_concat from narwhals._arrow.utils import nulls_like from narwhals._arrow.utils import vertical_concat +from narwhals._compliant import EagerNamespace from narwhals._expression_parsing import combine_alias_output_names from narwhals._expression_parsing import combine_evaluate_output_names -from narwhals.typing import CompliantNamespace from narwhals.utils import Implementation from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names @@ -48,66 +47,17 @@ _Scalar: TypeAlias = Any -class ArrowNamespace(CompliantNamespace[ArrowDataFrame, ArrowSeries]): - def _create_expr_from_callable( - self: Self, - func: Callable[[ArrowDataFrame], Sequence[ArrowSeries]], - *, - depth: int, - function_name: str, - evaluate_output_names: Callable[[ArrowDataFrame], Sequence[str]], - alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None, - call_kwargs: dict[str, Any] | None = None, - ) -> ArrowExpr: - from narwhals._arrow.expr import ArrowExpr - - return ArrowExpr( - func, - depth=depth, - function_name=function_name, - evaluate_output_names=evaluate_output_names, - alias_output_names=alias_output_names, - backend_version=self._backend_version, - version=self._version, - call_kwargs=call_kwargs, - ) - - def _create_expr_from_series(self: Self, series: ArrowSeries) -> ArrowExpr: - from narwhals._arrow.expr import ArrowExpr - - return ArrowExpr( - lambda _df: [series], - depth=0, - function_name="series", - evaluate_output_names=lambda _df: [series.name], - alias_output_names=None, - backend_version=self._backend_version, - version=self._version, - ) - - def _create_series_from_scalar( - self: Self, value: Any, *, reference_series: ArrowSeries - ) -> ArrowSeries: - from narwhals._arrow.series import ArrowSeries +class ArrowNamespace(EagerNamespace[ArrowDataFrame, ArrowSeries, ArrowExpr]): + @property + def _expr(self) -> type[ArrowExpr]: + return ArrowExpr - if self._backend_version < (13,) and hasattr(value, "as_py"): - value = value.as_py() - return ArrowSeries._from_iterable( - [value], - name=reference_series.name, - backend_version=self._backend_version, - version=self._version, - ) + @property + def _series(self) -> type[ArrowSeries]: + return ArrowSeries def _create_compliant_series(self: Self, value: Any) -> ArrowSeries: - from narwhals._arrow.series import ArrowSeries - - return ArrowSeries( - native_series=pa.chunked_array([value]), - name="", - backend_version=self._backend_version, - version=self._version, - ) + return self._series._from_iterable(value, name="", context=self) # --- not in spec --- def __init__( @@ -119,39 +69,26 @@ def __init__( # --- selection --- def col(self: Self, *column_names: str) -> ArrowExpr: - from narwhals._arrow.expr import ArrowExpr - - return ArrowExpr.from_column_names( - passthrough_column_names(column_names), - function_name="col", - backend_version=self._backend_version, - version=self._version, + return self._expr.from_column_names( + passthrough_column_names(column_names), function_name="col", context=self ) def exclude(self: Self, excluded_names: Container[str]) -> ArrowExpr: - return ArrowExpr.from_column_names( + return self._expr.from_column_names( partial(exclude_column_names, names=excluded_names), function_name="exclude", - backend_version=self._backend_version, - version=self._version, + context=self, ) def nth(self: Self, *column_indices: int) -> ArrowExpr: - from narwhals._arrow.expr import ArrowExpr - - return ArrowExpr.from_column_indices( - *column_indices, backend_version=self._backend_version, version=self._version - ) + return self._expr.from_column_indices(*column_indices, context=self) def len(self: Self) -> ArrowExpr: # coverage bug? this is definitely hit - return ArrowExpr( # pragma: no cover + return self._expr( # pragma: no cover lambda df: [ ArrowSeries._from_iterable( - [len(df._native_frame)], - name="len", - backend_version=self._backend_version, - version=self._version, + [len(df._native_frame)], name="len", context=self ) ], depth=0, @@ -163,26 +100,20 @@ def len(self: Self) -> ArrowExpr: ) def all(self: Self) -> ArrowExpr: - return ArrowExpr.from_column_names( - get_column_names, - function_name="all", - backend_version=self._backend_version, - version=self._version, + return self._expr.from_column_names( + get_column_names, function_name="all", context=self ) def lit(self: Self, value: Any, dtype: DType | None) -> ArrowExpr: def _lit_arrow_series(_: ArrowDataFrame) -> ArrowSeries: arrow_series = ArrowSeries._from_iterable( - data=[value], - name="literal", - backend_version=self._backend_version, - version=self._version, + data=[value], name="literal", context=self ) if dtype: return arrow_series.cast(dtype) return arrow_series - return ArrowExpr( + return self._expr( lambda df: [_lit_arrow_series(df)], depth=0, function_name="lit", @@ -197,12 +128,13 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: series = chain.from_iterable(expr(df) for expr in exprs) return [reduce(operator.and_, align_series_full_broadcast(*series))] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="all_horizontal", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) def any_horizontal(self: Self, *exprs: ArrowExpr) -> ArrowExpr: @@ -210,12 +142,13 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: series = chain.from_iterable(expr(df) for expr in exprs) return [reduce(operator.or_, align_series_full_broadcast(*series))] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="any_horizontal", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) def sum_horizontal(self: Self, *exprs: ArrowExpr) -> ArrowExpr: @@ -224,12 +157,13 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: series = (s.fill_null(0, strategy=None, limit=None) for s in it) return [reduce(operator.add, align_series_full_broadcast(*series))] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="sum_horizontal", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) def mean_horizontal(self: Self, *exprs: ArrowExpr) -> IntoArrowExpr: @@ -245,12 +179,13 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: ) return [reduce(operator.add, series) / reduce(operator.add, non_na)] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="mean_horizontal", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) def min_horizontal(self: Self, *exprs: ArrowExpr) -> ArrowExpr: @@ -273,12 +208,13 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: ) ] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="min_horizontal", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) def max_horizontal(self: Self, *exprs: ArrowExpr) -> ArrowExpr: @@ -302,12 +238,13 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: ) ] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="max_horizontal", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) def concat( @@ -373,12 +310,13 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: ) ] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="concat_str", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) @@ -399,16 +337,13 @@ def __init__( self._version = version def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]: - plx = df.__narwhals_namespace__() condition = self._condition(df)[0] condition_native = condition._native_series if isinstance(self._then_value, ArrowExpr): value_series = self._then_value(df)[0] else: - value_series = plx._create_series_from_scalar( - self._then_value, reference_series=condition.alias("literal") - ) + value_series = condition.alias("literal")._from_scalar(self._then_value) value_series._broadcast = True value_series_native = extract_dataframe_comparand( len(df), value_series, self._backend_version @@ -466,6 +401,7 @@ def __init__( backend_version: tuple[int, ...], version: Version, call_kwargs: dict[str, Any] | None = None, + implementation: Implementation | None = None, ) -> None: self._backend_version = backend_version self._version = version diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py index d9c74be112..4cf50535db 100644 --- a/narwhals/_arrow/selectors.py +++ b/narwhals/_arrow/selectors.py @@ -3,16 +3,16 @@ from typing import TYPE_CHECKING from narwhals._arrow.expr import ArrowExpr -from narwhals._selectors import CompliantSelector -from narwhals._selectors import EagerSelectorNamespace +from narwhals._compliant import CompliantSelector +from narwhals._compliant import EagerSelectorNamespace if TYPE_CHECKING: from typing_extensions import Self from narwhals._arrow.dataframe import ArrowDataFrame from narwhals._arrow.series import ArrowSeries - from narwhals._selectors import EvalNames - from narwhals._selectors import EvalSeries + from narwhals._compliant import EvalNames + from narwhals._compliant import EvalSeries from narwhals.utils import _FullContext diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 0aa2593ab2..dbb359b9db 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -26,10 +26,9 @@ from narwhals._arrow.utils import native_to_narwhals_dtype from narwhals._arrow.utils import nulls_like from narwhals._arrow.utils import pad_series +from narwhals._compliant import EagerSeries from narwhals.exceptions import InvalidOperationError -from narwhals.typing import CompliantSeries from narwhals.utils import Implementation -from narwhals.utils import _StoresNative from narwhals.utils import generate_temporary_column_name from narwhals.utils import import_dtypes_module from narwhals.utils import validate_backend_version @@ -56,6 +55,7 @@ from narwhals.typing import _1DArray from narwhals.typing import _2DArray from narwhals.utils import Version + from narwhals.utils import _FullContext # TODO @dangotbanned: move into `_arrow.utils` @@ -96,7 +96,7 @@ def maybe_extract_py_scalar(value: Any, return_py_scalar: bool) -> Any: # noqa: return value -class ArrowSeries(CompliantSeries, _StoresNative["ArrowChunkedArray"]): +class ArrowSeries(EagerSeries["ArrowChunkedArray"]): def __init__( self: Self, native_series: ArrowChunkedArray, @@ -113,6 +113,10 @@ def __init__( validate_backend_version(self._implementation, self._backend_version) self._broadcast = False + @property + def native(self) -> ArrowChunkedArray: + return self._native_series + def _change_version(self: Self, version: Version) -> Self: return self.__class__( self.native, @@ -138,16 +142,20 @@ def _from_iterable( data: Iterable[Any], name: str, *, - backend_version: tuple[int, ...], - version: Version, + context: _FullContext, ) -> Self: return cls( chunked_array([data]), name=name, - backend_version=backend_version, - version=version, + backend_version=context._backend_version, + version=context._version, ) + def _from_scalar(self, value: Any) -> Self: + if self._backend_version < (13,) and hasattr(value, "as_py"): + value = value.as_py() + return super()._from_scalar(value) + def __narwhals_namespace__(self: Self) -> ArrowNamespace: from narwhals._arrow.namespace import ArrowNamespace @@ -268,10 +276,6 @@ def __invert__(self: Self) -> Self: def _type(self: Self) -> pa.DataType: return self.native.type - @property - def native(self) -> ArrowChunkedArray: - return self._native_series - def len(self: Self, *, _return_py_scalar: bool = True) -> int: return maybe_extract_py_scalar(len(self.native), _return_py_scalar) @@ -557,12 +561,7 @@ def arg_true(self: Self) -> Self: import numpy as np # ignore-banned-import res = np.flatnonzero(self.native) - return self._from_iterable( - res, - name=self.name, - backend_version=self._backend_version, - version=self._version, - ) + return self._from_iterable(res, name=self.name, context=self) def item(self: Self, index: int | None = None) -> Any: if index is None: diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index bfc617cee9..a1fd03f495 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -12,7 +12,6 @@ import pyarrow as pa import pyarrow.compute as pc -from narwhals.utils import _ExprNamespace from narwhals.utils import _SeriesNamespace from narwhals.utils import import_dtypes_module from narwhals.utils import isinstance_or_issubclass @@ -24,7 +23,6 @@ from typing_extensions import TypeAlias from typing_extensions import TypeIs - from narwhals._arrow.expr import ArrowExpr from narwhals._arrow.series import ArrowSeries from narwhals._arrow.typing import ArrowArray from narwhals._arrow.typing import ArrowChunkedArray @@ -548,8 +546,3 @@ def pad_series( class ArrowSeriesNamespace(_SeriesNamespace["ArrowSeries", "ArrowChunkedArray"]): def __init__(self: Self, series: ArrowSeries, /) -> None: self._compliant_series = series - - -class ArrowExprNamespace(_ExprNamespace["ArrowExpr"]): - def __init__(self: Self, expr: ArrowExpr, /) -> None: - self._compliant_expr = expr diff --git a/narwhals/_compliant/__init__.py b/narwhals/_compliant/__init__.py new file mode 100644 index 0000000000..c4dbdcd9ef --- /dev/null +++ b/narwhals/_compliant/__init__.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from narwhals._compliant.dataframe import CompliantDataFrame +from narwhals._compliant.dataframe import CompliantLazyFrame +from narwhals._compliant.dataframe import EagerDataFrame +from narwhals._compliant.expr import CompliantExpr +from narwhals._compliant.expr import EagerExpr +from narwhals._compliant.expr import LazyExpr +from narwhals._compliant.namespace import CompliantNamespace +from narwhals._compliant.namespace import EagerNamespace +from narwhals._compliant.selectors import CompliantSelector +from narwhals._compliant.selectors import CompliantSelectorNamespace +from narwhals._compliant.selectors import EagerSelectorNamespace +from narwhals._compliant.selectors import EvalNames +from narwhals._compliant.selectors import EvalSeries +from narwhals._compliant.selectors import LazySelectorNamespace +from narwhals._compliant.series import CompliantSeries +from narwhals._compliant.series import EagerSeries +from narwhals._compliant.typing import CompliantFrameT +from narwhals._compliant.typing import CompliantSeriesOrNativeExprT_co +from narwhals._compliant.typing import CompliantSeriesT_co +from narwhals._compliant.typing import EagerDataFrameT +from narwhals._compliant.typing import EagerSeriesT +from narwhals._compliant.typing import IntoCompliantExpr + +__all__ = [ + "CompliantDataFrame", + "CompliantExpr", + "CompliantFrameT", + "CompliantLazyFrame", + "CompliantNamespace", + "CompliantSelector", + "CompliantSelectorNamespace", + "CompliantSeries", + "CompliantSeriesOrNativeExprT_co", + "CompliantSeriesT_co", + "EagerDataFrame", + "EagerDataFrameT", + "EagerExpr", + "EagerNamespace", + "EagerSelectorNamespace", + "EagerSeries", + "EagerSeriesT", + "EvalNames", + "EvalSeries", + "IntoCompliantExpr", + "LazyExpr", + "LazySelectorNamespace", +] diff --git a/narwhals/_compliant/any_namespace.py b/narwhals/_compliant/any_namespace.py new file mode 100644 index 0000000000..d67b0a434c --- /dev/null +++ b/narwhals/_compliant/any_namespace.py @@ -0,0 +1,87 @@ +"""`Expr` and `Series` namespace accessor protocols.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing import Protocol + +from narwhals.utils import CompliantT_co +from narwhals.utils import _StoresCompliant + +if TYPE_CHECKING: + from typing import Callable + + from narwhals.typing import TimeUnit + +__all__ = [ + "CatNamespace", + "DateTimeNamespace", + "ListNamespace", + "NameNamespace", + "StringNamespace", + "StructNamespace", +] + + +class CatNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): + def get_categories(self) -> CompliantT_co: ... + + +class DateTimeNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): + def to_string(self, format: str) -> CompliantT_co: ... # noqa: A002 + def replace_time_zone(self, time_zone: str | None) -> CompliantT_co: ... + def convert_time_zone(self, time_zone: str) -> CompliantT_co: ... + def timestamp(self, time_unit: TimeUnit) -> CompliantT_co: ... + def date(self) -> CompliantT_co: ... + def year(self) -> CompliantT_co: ... + def month(self) -> CompliantT_co: ... + def day(self) -> CompliantT_co: ... + def hour(self) -> CompliantT_co: ... + def minute(self) -> CompliantT_co: ... + def second(self) -> CompliantT_co: ... + def millisecond(self) -> CompliantT_co: ... + def microsecond(self) -> CompliantT_co: ... + def nanosecond(self) -> CompliantT_co: ... + def ordinal_day(self) -> CompliantT_co: ... + def weekday(self) -> CompliantT_co: ... + def total_minutes(self) -> CompliantT_co: ... + def total_seconds(self) -> CompliantT_co: ... + def total_milliseconds(self) -> CompliantT_co: ... + def total_microseconds(self) -> CompliantT_co: ... + def total_nanoseconds(self) -> CompliantT_co: ... + + +class ListNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): + def len(self) -> CompliantT_co: ... + + +class NameNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): + def keep(self) -> CompliantT_co: ... + def map(self, function: Callable[[str], str]) -> CompliantT_co: ... + def prefix(self, prefix: str) -> CompliantT_co: ... + def suffix(self, suffix: str) -> CompliantT_co: ... + def to_lowercase(self) -> CompliantT_co: ... + def to_uppercase(self) -> CompliantT_co: ... + + +class StringNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): + def len_chars(self) -> CompliantT_co: ... + def replace( + self, pattern: str, value: str, *, literal: bool, n: int + ) -> CompliantT_co: ... + def replace_all( + self, pattern: str, value: str, *, literal: bool + ) -> CompliantT_co: ... + def strip_chars(self, characters: str | None) -> CompliantT_co: ... + def starts_with(self, prefix: str) -> CompliantT_co: ... + def ends_with(self, suffix: str) -> CompliantT_co: ... + def contains(self, pattern: str, *, literal: bool) -> CompliantT_co: ... + def slice(self, offset: int, length: int | None) -> CompliantT_co: ... + def split(self, by: str) -> CompliantT_co: ... + def to_datetime(self, format: str | None) -> CompliantT_co: ... # noqa: A002 + def to_lowercase(self) -> CompliantT_co: ... + def to_uppercase(self) -> CompliantT_co: ... + + +class StructNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): + def field(self, name: str) -> CompliantT_co: ... diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py new file mode 100644 index 0000000000..ed1d83b998 --- /dev/null +++ b/narwhals/_compliant/dataframe.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +from itertools import chain +from typing import TYPE_CHECKING +from typing import Any +from typing import Iterator +from typing import Mapping +from typing import Protocol +from typing import Sequence +from typing import TypeVar + +from narwhals._compliant.typing import CompliantSeriesT_co +from narwhals._compliant.typing import EagerSeriesT +from narwhals._expression_parsing import evaluate_output_names_and_aliases + +if TYPE_CHECKING: + from typing_extensions import Self + from typing_extensions import TypeIs + + from narwhals._compliant.expr import EagerExpr + from narwhals.dtypes import DType + +__all__ = ["CompliantDataFrame", "CompliantLazyFrame", "EagerDataFrame"] + +T = TypeVar("T") + + +class CompliantDataFrame(Protocol[CompliantSeriesT_co]): + def __narwhals_dataframe__(self) -> Self: ... + def __narwhals_namespace__(self) -> Any: ... + def simple_select( + self, *column_names: str + ) -> Self: ... # `select` where all args are column names. + def aggregate(self, *exprs: Any) -> Self: # pragma: no cover + ... # `select` where all args are aggregations or literals + # (so, no broadcasting is necessary). + + @property + def columns(self) -> Sequence[str]: ... + @property + def schema(self) -> Mapping[str, DType]: ... + def get_column(self, name: str) -> CompliantSeriesT_co: ... + def iter_columns(self) -> Iterator[CompliantSeriesT_co]: ... + + +class CompliantLazyFrame(Protocol): + def __narwhals_lazyframe__(self) -> Self: ... + def __narwhals_namespace__(self) -> Any: ... + def simple_select( + self, *column_names: str + ) -> Self: ... # `select` where all args are column names. + def aggregate(self, *exprs: Any) -> Self: # pragma: no cover + ... # `select` where all args are aggregations or literals + # (so, no broadcasting is necessary). + + @property + def columns(self) -> Sequence[str]: ... + @property + def schema(self) -> Mapping[str, DType]: ... + def _iter_columns(self) -> Iterator[Any]: ... + + +class EagerDataFrame(CompliantDataFrame[EagerSeriesT], Protocol[EagerSeriesT]): + def _maybe_evaluate_expr( + self, expr: EagerExpr[Self, EagerSeriesT] | T, / + ) -> EagerSeriesT | T: + if is_eager_expr(expr): + result: Sequence[EagerSeriesT] = expr(self) + if len(result) > 1: + msg = ( + "Multi-output expressions (e.g. `nw.all()` or `nw.col('a', 'b')`) " + "are not supported in this context" + ) + raise ValueError(msg) + return result[0] + return expr + + def _evaluate_into_exprs( + self, *exprs: EagerExpr[Self, EagerSeriesT] + ) -> Sequence[EagerSeriesT]: + return list(chain.from_iterable(self._evaluate_into_expr(expr) for expr in exprs)) + + def _evaluate_into_expr( + self, expr: EagerExpr[Self, EagerSeriesT], / + ) -> Sequence[EagerSeriesT]: + """Return list of raw columns. + + For eager backends we alias operations at each step. + + As a safety precaution, here we can check that the expected result names match those + we were expecting from the various `evaluate_output_names` / `alias_output_names` calls. + + Note that for PySpark / DuckDB, we are less free to liberally set aliases whenever we want. + """ + _, aliases = evaluate_output_names_and_aliases(expr, self, []) + result = expr(self) + if list(aliases) != [s.name for s in result]: + msg = f"Safety assertion failed, expected {aliases}, got {result}" + raise AssertionError(msg) + return result + + +# NOTE: `mypy` is requiring the gymnastics here and is very fragile +# DON'T CHANGE THIS or `EagerDataFrame._maybe_evaluate_expr` +def is_eager_expr( + obj: EagerExpr[Any, EagerSeriesT] | Any, +) -> TypeIs[EagerExpr[Any, EagerSeriesT]]: + return hasattr(obj, "__narwhals_expr__") diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py new file mode 100644 index 0000000000..30e9bfa0a7 --- /dev/null +++ b/narwhals/_compliant/expr.py @@ -0,0 +1,1009 @@ +from __future__ import annotations + +import sys +from functools import partial +from operator import methodcaller +from typing import TYPE_CHECKING +from typing import Any +from typing import Callable +from typing import Generic +from typing import Literal +from typing import Mapping +from typing import Protocol +from typing import Sequence + +from narwhals._compliant.any_namespace import CatNamespace +from narwhals._compliant.any_namespace import DateTimeNamespace +from narwhals._compliant.any_namespace import ListNamespace +from narwhals._compliant.any_namespace import NameNamespace +from narwhals._compliant.any_namespace import StringNamespace +from narwhals._compliant.any_namespace import StructNamespace +from narwhals._compliant.namespace import CompliantNamespace +from narwhals._compliant.typing import AliasName +from narwhals._compliant.typing import AliasNames +from narwhals._compliant.typing import CompliantFrameT +from narwhals._compliant.typing import CompliantLazyFrameT +from narwhals._compliant.typing import CompliantSeriesOrNativeExprT_co +from narwhals._compliant.typing import EagerDataFrameT +from narwhals._compliant.typing import EagerExprT +from narwhals._compliant.typing import EagerSeriesT +from narwhals._compliant.typing import NativeExprT_co +from narwhals._expression_parsing import evaluate_output_names_and_aliases +from narwhals.dtypes import DType +from narwhals.utils import _ExprNamespace +from narwhals.utils import deprecated +from narwhals.utils import not_implemented +from narwhals.utils import unstable + +if not TYPE_CHECKING: # pragma: no cover + if sys.version_info >= (3, 9): + from typing import Protocol as Protocol38 + else: + from typing import Generic as Protocol38 +else: # pragma: no cover + # TODO @dangotbanned: Remove after dropping `3.8` (#2084) + # - https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1965921386 + from typing import Protocol as Protocol38 + +if TYPE_CHECKING: + from typing import Mapping + + from typing_extensions import Self + + from narwhals._compliant.namespace import CompliantNamespace + from narwhals._compliant.namespace import EagerNamespace + from narwhals._compliant.series import CompliantSeries + from narwhals._expression_parsing import ExprKind + from narwhals.dtypes import DType + from narwhals.typing import TimeUnit + from narwhals.utils import Implementation + from narwhals.utils import Version + from narwhals.utils import _FullContext + +__all__ = ["CompliantExpr", "EagerExpr", "LazyExpr", "NativeExpr"] + + +class NativeExpr(Protocol): + """An `Expr`-like object from a package with [Lazy-only support](https://narwhals-dev.github.io/narwhals/extending/#levels-of-support). + + Protocol members are chosen *purely* for matching statically - as they + are common to all currently supported packages. + """ + + def between(self, *args: Any, **kwds: Any) -> Any: ... + def isin(self, *args: Any, **kwds: Any) -> Any: ... + + +class CompliantExpr(Protocol38[CompliantFrameT, CompliantSeriesOrNativeExprT_co]): + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + _evaluate_output_names: Callable[[CompliantFrameT], Sequence[str]] + _alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None + _depth: int + _function_name: str + + def __call__( + self, df: CompliantFrameT + ) -> Sequence[CompliantSeriesOrNativeExprT_co]: ... + def __narwhals_expr__(self) -> None: ... + def __narwhals_namespace__( + self, + ) -> CompliantNamespace[CompliantFrameT, CompliantSeriesOrNativeExprT_co]: ... + def is_null(self) -> Self: ... + def abs(self) -> Self: ... + def all(self) -> Self: ... + def any(self) -> Self: ... + def alias(self, name: str) -> Self: ... + def cast(self, dtype: DType | type[DType]) -> Self: ... + def count(self) -> Self: ... + def min(self) -> Self: ... + def max(self) -> Self: ... + def arg_min(self) -> Self: ... + def arg_max(self) -> Self: ... + def arg_true(self) -> Self: ... + def mean(self) -> Self: ... + def sum(self) -> Self: ... + def median(self) -> Self: ... + def skew(self) -> Self: ... + def std(self, *, ddof: int) -> Self: ... + def var(self, *, ddof: int) -> Self: ... + def n_unique(self) -> Self: ... + def null_count(self) -> Self: ... + def drop_nulls(self) -> Self: ... + def fill_null( + self, + value: Any | None, + strategy: Literal["forward", "backward"] | None, + limit: int | None, + ) -> Self: ... + def diff(self) -> Self: ... + def unique(self) -> Self: ... + def len(self) -> Self: ... + def round(self, decimals: int) -> Self: ... + def mode(self) -> Self: ... + def head(self, n: int) -> Self: ... + def tail(self, n: int) -> Self: ... + def shift(self, n: int) -> Self: ... + def is_finite(self) -> Self: ... + def is_nan(self) -> Self: ... + def is_unique(self) -> Self: ... + def is_first_distinct(self) -> Self: ... + def is_last_distinct(self) -> Self: ... + def cum_sum(self, *, reverse: bool) -> Self: ... + def cum_count(self, *, reverse: bool) -> Self: ... + def cum_min(self, *, reverse: bool) -> Self: ... + def cum_max(self, *, reverse: bool) -> Self: ... + def cum_prod(self, *, reverse: bool) -> Self: ... + def is_in(self, other: Any) -> Self: ... + def sort(self, *, descending: bool, nulls_last: bool) -> Self: ... + def rank( + self, + method: Literal["average", "min", "max", "dense", "ordinal"], + *, + descending: bool, + ) -> Self: ... + def replace_strict( + self, + old: Sequence[Any] | Mapping[Any, Any], + new: Sequence[Any], + *, + return_dtype: DType | type[DType] | None, + ) -> Self: ... + def over( + self: Self, keys: Sequence[str], kind: ExprKind, order_by: Sequence[str] | None + ) -> Self: ... + def sample( + self, + n: int | None, + *, + fraction: float | None, + with_replacement: bool, + seed: int | None, + ) -> Self: ... + def quantile( + self, + quantile: float, + interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"], + ) -> Self: ... + def map_batches( + self, + function: Callable[[CompliantSeries], CompliantExpr[Any, Any]], + return_dtype: DType | type[DType] | None, + ) -> Self: ... + + @property + def str(self) -> Any: ... + @property + def name(self) -> Any: ... + @property + def dt(self) -> Any: ... + @property + def cat(self) -> Any: ... + @property + def list(self) -> Any: ... + @property + def struct(self) -> Any: ... + + @unstable + def ewm_mean( + self, + *, + com: float | None, + span: float | None, + half_life: float | None, + alpha: float | None, + adjust: bool, + min_samples: int, + ignore_nulls: bool, + ) -> Self: ... + + @unstable + def rolling_sum( + self, + window_size: int, + *, + min_samples: int, + center: bool, + ) -> Self: ... + + @unstable + def rolling_mean( + self, + window_size: int, + *, + min_samples: int, + center: bool, + ) -> Self: ... + + @unstable + def rolling_var( + self, + window_size: int, + *, + min_samples: int, + center: bool, + ddof: int, + ) -> Self: ... + + @unstable + def rolling_std( + self, + window_size: int, + *, + min_samples: int, + center: bool, + ddof: int, + ) -> Self: ... + + @deprecated("Since `1.22.0`") + def gather_every(self, n: int, offset: int) -> Self: ... + def __and__(self, other: Any) -> Self: ... + def __or__(self, other: Any) -> Self: ... + def __add__(self, other: Any) -> Self: ... + def __sub__(self, other: Any) -> Self: ... + def __mul__(self, other: Any) -> Self: ... + def __floordiv__(self, other: Any) -> Self: ... + def __truediv__(self, other: Any) -> Self: ... + def __mod__(self, other: Any) -> Self: ... + def __pow__(self, other: Any) -> Self: ... + def __gt__(self, other: Any) -> Self: ... + def __ge__(self, other: Any) -> Self: ... + def __lt__(self, other: Any) -> Self: ... + def __le__(self, other: Any) -> Self: ... + def __invert__(self) -> Self: ... + def broadcast( + self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL] + ) -> Self: ... + + +class EagerExpr( + CompliantExpr[EagerDataFrameT, EagerSeriesT], + Protocol38[EagerDataFrameT, EagerSeriesT], +): + _call: Callable[[EagerDataFrameT], Sequence[EagerSeriesT]] + _depth: int + _function_name: str + _evaluate_output_names: Any + _alias_output_names: Any + _call_kwargs: dict[str, Any] + + def __init__( + self: Self, + call: Callable[[EagerDataFrameT], Sequence[EagerSeriesT]], + *, + depth: int, + function_name: str, + evaluate_output_names: Callable[[EagerDataFrameT], Sequence[str]], + alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None, + implementation: Implementation, + backend_version: tuple[int, ...], + version: Version, + call_kwargs: dict[str, Any] | None = None, + ) -> None: ... + + def __call__(self, df: EagerDataFrameT) -> Sequence[EagerSeriesT]: + return self._call(df) + + def __repr__(self) -> str: # pragma: no cover + return f"{type(self).__name__}(depth={self._depth}, function_name={self._function_name})" + + def __narwhals_namespace__( + self, + ) -> EagerNamespace[EagerDataFrameT, EagerSeriesT, Self]: ... + def __narwhals_expr__(self) -> None: ... + + @classmethod + def _from_callable( + cls, + func: Callable[[EagerDataFrameT], Sequence[EagerSeriesT]], + *, + depth: int, + function_name: str, + evaluate_output_names: Callable[[EagerDataFrameT], Sequence[str]], + alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None, + context: _FullContext, + call_kwargs: dict[str, Any] | None = None, + ) -> Self: + return cls( + func, + depth=depth, + function_name=function_name, + evaluate_output_names=evaluate_output_names, + alias_output_names=alias_output_names, + implementation=context._implementation, + backend_version=context._backend_version, + version=context._version, + call_kwargs=call_kwargs, + ) + + @classmethod + def _from_series(cls, series: EagerSeriesT) -> Self: + return cls( + lambda _df: [series], + depth=0, + function_name="series", + evaluate_output_names=lambda _df: [series.name], + alias_output_names=None, + implementation=series._implementation, + backend_version=series._backend_version, + version=series._version, + ) + + @classmethod + def from_column_names( + cls, + evaluate_column_names: Callable[[EagerDataFrameT], Sequence[str]], + /, + *, + function_name: str, + context: _FullContext, + ) -> Self: ... + @classmethod + def from_column_indices( + cls, + *column_indices: int, + context: _FullContext, + ) -> Self: ... + + def _reuse_series( + self: Self, + method_name: str, + *, + returns_scalar: bool = False, + call_kwargs: dict[str, Any] | None = None, + **expressifiable_args: Any, + ) -> Self: + """Reuse Series implementation for expression. + + If Series.foo is already defined, and we'd like Expr.foo to be the same, we can + leverage this method to do that for us. + + Arguments: + method_name: name of method. + returns_scalar: whether the Series version returns a scalar. In this case, + the expression version should return a 1-row Series. + call_kwargs: non-expressifiable args which we may need to reuse in `agg` or `over`, + such as `ddof` for `std` and `var`. + expressifiable_args: keyword arguments to pass to function, which may + be expressifiable (e.g. `nw.col('a').is_between(3, nw.col('b')))`). + """ + func = partial( + self._reuse_series_inner, + method_name=method_name, + returns_scalar=returns_scalar, + call_kwargs=call_kwargs or {}, + expressifiable_args=expressifiable_args, + ) + return self._from_callable( + func, + depth=self._depth + 1, + function_name=f"{self._function_name}->{method_name}", + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + call_kwargs=call_kwargs, + context=self, + ) + + # For PyArrow.Series, we return Python Scalars (like Polars does) instead of PyArrow Scalars. + # However, when working with expressions, we keep everything PyArrow-native. + def _reuse_series_extra_kwargs( + self, *, returns_scalar: bool = False + ) -> dict[str, Any]: + return {} + + def _reuse_series_inner( + self, + df: EagerDataFrameT, + *, + method_name: str, + returns_scalar: bool, + call_kwargs: dict[str, Any], + expressifiable_args: dict[str, Any], + ) -> Sequence[EagerSeriesT]: + kwargs = { + **call_kwargs, + **{ + arg_name: df._maybe_evaluate_expr(arg_value) + for arg_name, arg_value in expressifiable_args.items() + }, + } + method = methodcaller( + method_name, + **self._reuse_series_extra_kwargs(returns_scalar=returns_scalar), + **kwargs, + ) + out: Sequence[EagerSeriesT] = [ + series._from_scalar(method(series)) if returns_scalar else method(series) + for series in self(df) + ] + _, aliases = evaluate_output_names_and_aliases(self, df, []) + if [s.name for s in out] != list(aliases): # pragma: no cover + msg = ( + f"Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues\n" + f"Expression aliases: {aliases}\n" + f"Series names: {[s.name for s in out]}" + ) + raise AssertionError(msg) + return out + + def _reuse_series_namespace( + self: Self, + series_namespace: Literal["cat", "dt", "list", "name", "str", "struct"], + method_name: str, + **kwargs: Any, + ) -> Self: + """Reuse Series implementation for expression. + + Just like `_reuse_series`, but for e.g. `Expr.dt.foo` instead + of `Expr.foo`. + + Arguments: + series_namespace: The Series namespace. + method_name: name of method, within `series_namespace`. + kwargs: keyword arguments to pass to function. + """ + return self._from_callable( + lambda df: [ + getattr(getattr(series, series_namespace), method_name)(**kwargs) + for series in self(df) + ], + depth=self._depth + 1, + function_name=f"{self._function_name}->{series_namespace}.{method_name}", + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + call_kwargs={**self._call_kwargs, **kwargs}, + context=self, + ) + + def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self: + # Mark the resulting Series with `_broadcast = True`. + # Then, when extracting native objects, `extract_native` will + # know what to do. + def func(df: EagerDataFrameT) -> list[EagerSeriesT]: + results = [] + for result in self(df): + result._broadcast = True + results.append(result) + return results + + return type(self)( + func, + depth=self._depth, + function_name=self._function_name, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + backend_version=self._backend_version, + implementation=self._implementation, + version=self._version, + call_kwargs=self._call_kwargs, + ) + + def cast(self, dtype: DType | type[DType]) -> Self: + return self._reuse_series("cast", dtype=dtype) + + def __eq__(self, other: Self | Any) -> Self: # type: ignore[override] + return self._reuse_series("__eq__", other=other) + + def __ne__(self, other: Self | Any) -> Self: # type: ignore[override] + return self._reuse_series("__ne__", other=other) + + def __ge__(self, other: Self | Any) -> Self: + return self._reuse_series("__ge__", other=other) + + def __gt__(self, other: Self | Any) -> Self: + return self._reuse_series("__gt__", other=other) + + def __le__(self, other: Self | Any) -> Self: + return self._reuse_series("__le__", other=other) + + def __lt__(self, other: Self | Any) -> Self: + return self._reuse_series("__lt__", other=other) + + def __and__(self, other: Self | bool | Any) -> Self: + return self._reuse_series("__and__", other=other) + + def __or__(self, other: Self | bool | Any) -> Self: + return self._reuse_series("__or__", other=other) + + def __add__(self, other: Self | Any) -> Self: + return self._reuse_series("__add__", other=other) + + def __sub__(self, other: Self | Any) -> Self: + return self._reuse_series("__sub__", other=other) + + def __rsub__(self, other: Self | Any) -> Self: + return self.alias("literal")._reuse_series("__rsub__", other=other) + + def __mul__(self, other: Self | Any) -> Self: + return self._reuse_series("__mul__", other=other) + + def __truediv__(self, other: Self | Any) -> Self: + return self._reuse_series("__truediv__", other=other) + + def __rtruediv__(self, other: Self | Any) -> Self: + return self.alias("literal")._reuse_series("__rtruediv__", other=other) + + def __floordiv__(self, other: Self | Any) -> Self: + return self._reuse_series("__floordiv__", other=other) + + def __rfloordiv__(self, other: Self | Any) -> Self: + return self.alias("literal")._reuse_series("__rfloordiv__", other=other) + + def __pow__(self, other: Self | Any) -> Self: + return self._reuse_series("__pow__", other=other) + + def __rpow__(self, other: Self | Any) -> Self: + return self.alias("literal")._reuse_series("__rpow__", other=other) + + def __mod__(self, other: Self | Any) -> Self: + return self._reuse_series("__mod__", other=other) + + def __rmod__(self, other: Self | Any) -> Self: + return self.alias("literal")._reuse_series("__rmod__", other=other) + + # Unary + def __invert__(self) -> Self: + return self._reuse_series("__invert__") + + # Reductions + def null_count(self) -> Self: + return self._reuse_series("null_count", returns_scalar=True) + + def n_unique(self) -> Self: + return self._reuse_series("n_unique", returns_scalar=True) + + def sum(self) -> Self: + return self._reuse_series("sum", returns_scalar=True) + + def count(self) -> Self: + return self._reuse_series("count", returns_scalar=True) + + def mean(self) -> Self: + return self._reuse_series("mean", returns_scalar=True) + + def median(self) -> Self: + return self._reuse_series("median", returns_scalar=True) + + def std(self, *, ddof: int) -> Self: + return self._reuse_series("std", returns_scalar=True, call_kwargs={"ddof": ddof}) + + def var(self, *, ddof: int) -> Self: + return self._reuse_series("var", returns_scalar=True, call_kwargs={"ddof": ddof}) + + def skew(self) -> Self: + return self._reuse_series("skew", returns_scalar=True) + + def any(self) -> Self: + return self._reuse_series("any", returns_scalar=True) + + def all(self) -> Self: + return self._reuse_series("all", returns_scalar=True) + + def max(self) -> Self: + return self._reuse_series("max", returns_scalar=True) + + def min(self) -> Self: + return self._reuse_series("min", returns_scalar=True) + + def arg_min(self) -> Self: + return self._reuse_series("arg_min", returns_scalar=True) + + def arg_max(self) -> Self: + return self._reuse_series("arg_max", returns_scalar=True) + + # Other + + def clip(self, lower_bound: Any, upper_bound: Any) -> Self: + return self._reuse_series( + "clip", lower_bound=lower_bound, upper_bound=upper_bound + ) + + def is_null(self) -> Self: + return self._reuse_series("is_null") + + def is_nan(self) -> Self: + return self._reuse_series("is_nan") + + def fill_null( + self, + value: Any | None, + strategy: Literal["forward", "backward"] | None, + limit: int | None, + ) -> Self: + return self._reuse_series( + "fill_null", value=value, strategy=strategy, limit=limit + ) + + def is_in(self, other: Any) -> Self: + return self._reuse_series("is_in", other=other) + + def arg_true(self) -> Self: + return self._reuse_series("arg_true") + + def filter(self, *predicates: Self) -> Self: + plx = self.__narwhals_namespace__() + predicate = plx.all_horizontal(*predicates) + return self._reuse_series("filter", predicate=predicate) + + def drop_nulls(self) -> Self: + return self._reuse_series("drop_nulls") + + def replace_strict( + self, + old: Sequence[Any] | Mapping[Any, Any], + new: Sequence[Any], + *, + return_dtype: DType | type[DType] | None, + ) -> Self: + return self._reuse_series( + "replace_strict", old=old, new=new, return_dtype=return_dtype + ) + + def sort(self, *, descending: bool, nulls_last: bool) -> Self: + return self._reuse_series("sort", descending=descending, nulls_last=nulls_last) + + def abs(self) -> Self: + return self._reuse_series("abs") + + def unique(self) -> Self: + return self._reuse_series("unique", maintain_order=False) + + def diff(self) -> Self: + return self._reuse_series("diff") + + def sample( + self, + n: int | None, + *, + fraction: float | None, + with_replacement: bool, + seed: int | None, + ) -> Self: + return self._reuse_series( + "sample", n=n, fraction=fraction, with_replacement=with_replacement, seed=seed + ) + + def alias(self: Self, name: str) -> Self: + def alias_output_names(names: Sequence[str]) -> Sequence[str]: + if len(names) != 1: + msg = f"Expected function with single output, found output names: {names}" + raise ValueError(msg) + return [name] + + # Define this one manually, so that we can + # override `output_names` and not increase depth + return type(self)( + lambda df: [series.alias(name) for series in self(df)], + depth=self._depth, + function_name=self._function_name, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=alias_output_names, + backend_version=self._backend_version, + implementation=self._implementation, + version=self._version, + call_kwargs=self._call_kwargs, + ) + + def is_unique(self) -> Self: + return self._reuse_series("is_unique") + + def is_first_distinct(self) -> Self: + return self._reuse_series("is_first_distinct") + + def is_last_distinct(self) -> Self: + return self._reuse_series("is_last_distinct") + + def quantile( + self, + quantile: float, + interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"], + ) -> Self: + return self._reuse_series( + "quantile", + quantile=quantile, + interpolation=interpolation, + returns_scalar=True, + ) + + def head(self, n: int) -> Self: + return self._reuse_series("head", n=n) + + def tail(self, n: int) -> Self: + return self._reuse_series("tail", n=n) + + def round(self, decimals: int) -> Self: + return self._reuse_series("round", decimals=decimals) + + def len(self) -> Self: + return self._reuse_series("len", returns_scalar=True) + + def gather_every(self, n: int, offset: int) -> Self: + return self._reuse_series("gather_every", n=n, offset=offset) + + def mode(self) -> Self: + return self._reuse_series("mode") + + def is_finite(self) -> Self: + return self._reuse_series("is_finite") + + def rolling_mean( + self, window_size: int, *, min_samples: int | None, center: bool + ) -> Self: + return self._reuse_series( + "rolling_mean", + window_size=window_size, + min_samples=min_samples, + center=center, + ) + + def rolling_std( + self, window_size: int, *, min_samples: int | None, center: bool, ddof: int + ) -> Self: + return self._reuse_series( + "rolling_std", + window_size=window_size, + min_samples=min_samples, + center=center, + ddof=ddof, + ) + + def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self: + return self._reuse_series( + "rolling_sum", window_size=window_size, min_samples=min_samples, center=center + ) + + def rolling_var( + self, window_size: int, *, min_samples: int | None, center: bool, ddof: int + ) -> Self: + return self._reuse_series( + "rolling_var", + window_size=window_size, + min_samples=min_samples, + center=center, + ddof=ddof, + ) + + @property + def cat(self) -> EagerExprCatNamespace[Self]: + return EagerExprCatNamespace(self) + + @property + def dt(self) -> EagerExprDateTimeNamespace[Self]: + return EagerExprDateTimeNamespace(self) + + @property + def list(self) -> EagerExprListNamespace[Self]: + return EagerExprListNamespace(self) + + @property + def name(self) -> EagerExprNameNamespace[Self]: + return EagerExprNameNamespace(self) + + @property + def str(self) -> EagerExprStringNamespace[Self]: + return EagerExprStringNamespace(self) + + @property + def struct(self) -> EagerExprStructNamespace[Self]: + return EagerExprStructNamespace(self) + + +class LazyExpr( + CompliantExpr[CompliantLazyFrameT, NativeExprT_co], + Protocol38[CompliantLazyFrameT, NativeExprT_co], +): + arg_min: not_implemented = not_implemented() + arg_max: not_implemented = not_implemented() + arg_true: not_implemented = not_implemented() + head: not_implemented = not_implemented() + tail: not_implemented = not_implemented() + mode: not_implemented = not_implemented() + sort: not_implemented = not_implemented() + rank: not_implemented = not_implemented() + sample: not_implemented = not_implemented() + map_batches: not_implemented = not_implemented() + ewm_mean: not_implemented = not_implemented() + rolling_mean: not_implemented = not_implemented() + rolling_var: not_implemented = not_implemented() + rolling_std: not_implemented = not_implemented() + gather_every: not_implemented = not_implemented() + replace_strict: not_implemented = not_implemented() + cat: not_implemented = not_implemented() # pyright: ignore[reportAssignmentType] + + +class EagerExprNamespace(_ExprNamespace[EagerExprT], Generic[EagerExprT]): + def __init__(self, expr: EagerExprT, /) -> None: + self._compliant_expr = expr + + +class EagerExprCatNamespace( + EagerExprNamespace[EagerExprT], CatNamespace[EagerExprT], Generic[EagerExprT] +): + def get_categories(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("cat", "get_categories") + + +class EagerExprDateTimeNamespace( + EagerExprNamespace[EagerExprT], DateTimeNamespace[EagerExprT], Generic[EagerExprT] +): + def to_string(self, format: str) -> EagerExprT: # noqa: A002 + return self.compliant._reuse_series_namespace("dt", "to_string", format=format) + + def replace_time_zone(self, time_zone: str | None) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "dt", "replace_time_zone", time_zone=time_zone + ) + + def convert_time_zone(self, time_zone: str) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "dt", "convert_time_zone", time_zone=time_zone + ) + + def timestamp(self, time_unit: TimeUnit) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "dt", "timestamp", time_unit=time_unit + ) + + def date(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "date") + + def year(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "year") + + def month(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "month") + + def day(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "day") + + def hour(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "hour") + + def minute(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "minute") + + def second(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "second") + + def millisecond(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "millisecond") + + def microsecond(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "microsecond") + + def nanosecond(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "nanosecond") + + def ordinal_day(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "ordinal_day") + + def weekday(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "weekday") + + def total_minutes(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "total_minutes") + + def total_seconds(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "total_seconds") + + def total_milliseconds(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "total_milliseconds") + + def total_microseconds(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "total_microseconds") + + def total_nanoseconds(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "total_nanoseconds") + + +class EagerExprListNamespace( + EagerExprNamespace[EagerExprT], ListNamespace[EagerExprT], Generic[EagerExprT] +): + def len(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("list", "len") + + +class EagerExprNameNamespace( + EagerExprNamespace[EagerExprT], NameNamespace[EagerExprT], Generic[EagerExprT] +): + def keep(self) -> EagerExprT: + return self._from_callable(lambda name: name, alias=False) + + def map(self, function: AliasName) -> EagerExprT: + return self._from_callable(function) + + def prefix(self, prefix: str) -> EagerExprT: + return self._from_callable(lambda name: f"{prefix}{name}") + + def suffix(self, suffix: str) -> EagerExprT: + return self._from_callable(lambda name: f"{name}{suffix}") + + def to_lowercase(self) -> EagerExprT: + return self._from_callable(str.lower) + + def to_uppercase(self) -> EagerExprT: + return self._from_callable(str.upper) + + @staticmethod + def _alias_output_names(func: AliasName, /) -> AliasNames: + def fn(output_names: Sequence[str], /) -> Sequence[str]: + return [func(name) for name in output_names] + + return fn + + def _from_callable(self, func: AliasName, /, *, alias: bool = True) -> EagerExprT: + expr = self.compliant + return type(expr)( + lambda df: [ + series.alias(func(name)) + for series, name in zip(expr(df), expr._evaluate_output_names(df)) + ], + depth=expr._depth, + function_name=expr._function_name, + evaluate_output_names=expr._evaluate_output_names, + alias_output_names=self._alias_output_names(func) if alias else None, + backend_version=expr._backend_version, + implementation=expr._implementation, + version=expr._version, + call_kwargs=expr._call_kwargs, + ) + + +class EagerExprStringNamespace( + EagerExprNamespace[EagerExprT], StringNamespace[EagerExprT], Generic[EagerExprT] +): + def len_chars(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "len_chars") + + def replace(self, pattern: str, value: str, *, literal: bool, n: int) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "str", "replace", pattern=pattern, value=value, literal=literal, n=n + ) + + def replace_all(self, pattern: str, value: str, *, literal: bool) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "str", "replace_all", pattern=pattern, value=value, literal=literal + ) + + def strip_chars(self, characters: str | None) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "str", "strip_chars", characters=characters + ) + + def starts_with(self, prefix: str) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "starts_with", prefix=prefix) + + def ends_with(self, suffix: str) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "ends_with", suffix=suffix) + + def contains(self, pattern: str, *, literal: bool) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "str", "contains", pattern=pattern, literal=literal + ) + + def slice(self, offset: int, length: int | None) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "str", "slice", offset=offset, length=length + ) + + def split(self, by: str) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "split", by=by) + + def to_datetime(self, format: str | None) -> EagerExprT: # noqa: A002 + return self.compliant._reuse_series_namespace("str", "to_datetime", format=format) + + def to_lowercase(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "to_lowercase") + + def to_uppercase(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "to_uppercase") + + +class EagerExprStructNamespace( + EagerExprNamespace[EagerExprT], StructNamespace[EagerExprT], Generic[EagerExprT] +): + def field(self, name: str) -> EagerExprT: + return self.compliant._reuse_series_namespace("struct", "field", name=name).alias( + name + ) diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py new file mode 100644 index 0000000000..688f2770c2 --- /dev/null +++ b/narwhals/_compliant/namespace.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing import Any +from typing import Protocol + +from narwhals._compliant.typing import CompliantFrameT +from narwhals._compliant.typing import CompliantSeriesOrNativeExprT_co +from narwhals._compliant.typing import EagerDataFrameT +from narwhals._compliant.typing import EagerExprT +from narwhals._compliant.typing import EagerSeriesT_co +from narwhals.utils import deprecated + +if TYPE_CHECKING: + from narwhals._compliant.expr import CompliantExpr + from narwhals._compliant.selectors import CompliantSelectorNamespace + from narwhals.dtypes import DType + +__all__ = ["CompliantNamespace", "EagerNamespace"] + + +class CompliantNamespace(Protocol[CompliantFrameT, CompliantSeriesOrNativeExprT_co]): + def col( + self, *column_names: str + ) -> CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co]: ... + def lit( + self, value: Any, dtype: DType | None + ) -> CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co]: ... + @property + def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ... + + +class EagerNamespace( + CompliantNamespace[EagerDataFrameT, EagerSeriesT_co], + Protocol[EagerDataFrameT, EagerSeriesT_co, EagerExprT], +): + @property + def _expr(self) -> type[EagerExprT]: ... + @property + def _series(self) -> type[EagerSeriesT_co]: ... + def all_horizontal(self, *exprs: EagerExprT) -> EagerExprT: ... + + @deprecated( + "Internally used for `numpy.ndarray` -> `CompliantSeries`\n" + "Also referenced in untyped `nw.dataframe.DataFrame._extract_compliant`\n" + "See Also:\n" + " - https://github.com/narwhals-dev/narwhals/pull/2149#discussion_r1986283345\n" + " - https://github.com/narwhals-dev/narwhals/issues/2116\n" + " - https://github.com/narwhals-dev/narwhals/pull/2169" + ) + def _create_compliant_series(self, value: Any) -> EagerSeriesT_co: ... diff --git a/narwhals/_selectors.py b/narwhals/_compliant/selectors.py similarity index 69% rename from narwhals/_selectors.py rename to narwhals/_compliant/selectors.py index 639b0eb740..969d4f460c 100644 --- a/narwhals/_selectors.py +++ b/narwhals/_compliant/selectors.py @@ -14,7 +14,7 @@ from typing import TypeVar from typing import overload -from narwhals.typing import CompliantExpr +from narwhals._compliant.expr import CompliantExpr from narwhals.utils import _parse_time_unit_and_time_zone from narwhals.utils import dtype_matches_time_unit_and_time_zone from narwhals.utils import get_column_names @@ -43,59 +43,65 @@ from typing_extensions import TypeAlias from typing_extensions import TypeIs + from narwhals._compliant.dataframe import CompliantDataFrame + from narwhals._compliant.dataframe import CompliantLazyFrame + from narwhals._compliant.expr import NativeExpr + from narwhals._compliant.series import CompliantSeries from narwhals.dtypes import DType - from narwhals.typing import CompliantDataFrame - from narwhals.typing import CompliantLazyFrame - from narwhals.typing import CompliantSeries from narwhals.typing import TimeUnit from narwhals.utils import Implementation from narwhals.utils import Version +__all__ = [ + "CompliantSelector", + "CompliantSelectorNamespace", + "EagerSelectorNamespace", + "EvalNames", + "EvalSeries", + "LazySelectorNamespace", +] + +SeriesOrExprT = TypeVar("SeriesOrExprT", bound="CompliantSeries | NativeExpr") SeriesT = TypeVar("SeriesT", bound="CompliantSeries") +ExprT = TypeVar("ExprT", bound="NativeExpr") FrameT = TypeVar("FrameT", bound="CompliantDataFrame[Any] | CompliantLazyFrame") DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrame[Any]") LazyFrameT = TypeVar("LazyFrameT", bound="CompliantLazyFrame") SelectorOrExpr: TypeAlias = ( - "CompliantSelector[FrameT, SeriesT] | CompliantExpr[FrameT, SeriesT]" + "CompliantSelector[FrameT, SeriesOrExprT] | CompliantExpr[FrameT, SeriesOrExprT]" ) -EvalSeries: TypeAlias = Callable[[FrameT], Sequence[SeriesT]] +EvalSeries: TypeAlias = Callable[[FrameT], Sequence[SeriesOrExprT]] EvalNames: TypeAlias = Callable[[FrameT], Sequence[str]] -class CompliantSelectorNamespace(Protocol[FrameT, SeriesT]): +class CompliantSelectorNamespace(Protocol[FrameT, SeriesOrExprT]): _implementation: Implementation _backend_version: tuple[int, ...] _version: Version def _selector( self, - call: EvalSeries[FrameT, SeriesT], + call: EvalSeries[FrameT, SeriesOrExprT], evaluate_output_names: EvalNames[FrameT], /, - ) -> CompliantSelector[FrameT, SeriesT]: ... + ) -> CompliantSelector[FrameT, SeriesOrExprT]: ... - def _iter_columns(self, df: FrameT, /) -> Iterator[SeriesT]: ... + def _iter_columns(self, df: FrameT, /) -> Iterator[SeriesOrExprT]: ... - def _iter_schema(self, df: FrameT, /) -> Iterator[tuple[str, DType]]: - for ser in self._iter_columns(df): - yield ser.name, ser.dtype + def _iter_schema(self, df: FrameT, /) -> Iterator[tuple[str, DType]]: ... - def _iter_columns_dtypes(self, df: FrameT, /) -> Iterator[tuple[SeriesT, DType]]: - # NOTE: Defined to be overridden for lazy - # - Their `SeriesT` is a **native** object - # - `.dtype` won't return a `nw.DType` (or maybe anything) for lazy backends - # - See (https://github.com/narwhals-dev/narwhals/issues/2044) - for ser in self._iter_columns(df): - yield ser, ser.dtype + def _iter_columns_dtypes( + self, df: FrameT, / + ) -> Iterator[tuple[SeriesOrExprT, DType]]: ... - def _iter_columns_names(self, df: FrameT, /) -> Iterator[tuple[SeriesT, str]]: + def _iter_columns_names(self, df: FrameT, /) -> Iterator[tuple[SeriesOrExprT, str]]: yield from zip(self._iter_columns(df), df.columns) def _is_dtype( - self: CompliantSelectorNamespace[FrameT, SeriesT], dtype: type[DType], / - ) -> CompliantSelector[FrameT, SeriesT]: - def series(df: FrameT) -> Sequence[SeriesT]: + self: CompliantSelectorNamespace[FrameT, SeriesOrExprT], dtype: type[DType], / + ) -> CompliantSelector[FrameT, SeriesOrExprT]: + def series(df: FrameT) -> Sequence[SeriesOrExprT]: return [ ser for ser, tp in self._iter_columns_dtypes(df) if isinstance(tp, dtype) ] @@ -107,8 +113,8 @@ def names(df: FrameT) -> Sequence[str]: def by_dtype( self: Self, dtypes: Collection[DType | type[DType]] - ) -> CompliantSelector[FrameT, SeriesT]: - def series(df: FrameT) -> Sequence[SeriesT]: + ) -> CompliantSelector[FrameT, SeriesOrExprT]: + def series(df: FrameT) -> Sequence[SeriesOrExprT]: return [ser for ser, tp in self._iter_columns_dtypes(df) if tp in dtypes] def names(df: FrameT) -> Sequence[str]: @@ -116,10 +122,10 @@ def names(df: FrameT) -> Sequence[str]: return self._selector(series, names) - def matches(self: Self, pattern: str) -> CompliantSelector[FrameT, SeriesT]: + def matches(self: Self, pattern: str) -> CompliantSelector[FrameT, SeriesOrExprT]: p = re.compile(pattern) - def series(df: FrameT) -> Sequence[SeriesT]: + def series(df: FrameT) -> Sequence[SeriesOrExprT]: if is_compliant_dataframe(df) and not self._implementation.is_duckdb(): return [df.get_column(col) for col in df.columns if p.search(col)] @@ -130,8 +136,8 @@ def names(df: FrameT) -> Sequence[str]: return self._selector(series, names) - def numeric(self: Self) -> CompliantSelector[FrameT, SeriesT]: - def series(df: FrameT) -> Sequence[SeriesT]: + def numeric(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: + def series(df: FrameT) -> Sequence[SeriesOrExprT]: return [ser for ser, tp in self._iter_columns_dtypes(df) if tp.is_numeric()] def names(df: FrameT) -> Sequence[str]: @@ -139,17 +145,17 @@ def names(df: FrameT) -> Sequence[str]: return self._selector(series, names) - def categorical(self: Self) -> CompliantSelector[FrameT, SeriesT]: + def categorical(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: return self._is_dtype(import_dtypes_module(self._version).Categorical) - def string(self: Self) -> CompliantSelector[FrameT, SeriesT]: + def string(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: return self._is_dtype(import_dtypes_module(self._version).String) - def boolean(self: Self) -> CompliantSelector[FrameT, SeriesT]: + def boolean(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: return self._is_dtype(import_dtypes_module(self._version).Boolean) - def all(self: Self) -> CompliantSelector[FrameT, SeriesT]: - def series(df: FrameT) -> Sequence[SeriesT]: + def all(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: + def series(df: FrameT) -> Sequence[SeriesOrExprT]: return list(self._iter_columns(df)) return self._selector(series, get_column_names) @@ -158,7 +164,7 @@ def datetime( self: Self, time_unit: TimeUnit | Iterable[TimeUnit] | None, time_zone: str | timezone | Iterable[str | timezone | None] | None, - ) -> CompliantSelector[FrameT, SeriesT]: + ) -> CompliantSelector[FrameT, SeriesOrExprT]: time_units, time_zones = _parse_time_unit_and_time_zone(time_unit, time_zone) matches = partial( dtype_matches_time_unit_and_time_zone, @@ -167,7 +173,7 @@ def datetime( time_zones=time_zones, ) - def series(df: FrameT) -> Sequence[SeriesT]: + def series(df: FrameT) -> Sequence[SeriesOrExprT]: return [ser for ser, tp in self._iter_columns_dtypes(df) if matches(tp)] def names(df: FrameT) -> Sequence[str]: @@ -177,49 +183,61 @@ def names(df: FrameT) -> Sequence[str]: class EagerSelectorNamespace( - CompliantSelectorNamespace[DataFrameT, SeriesT], Protocol[DataFrameT, SeriesT] + CompliantSelectorNamespace[DataFrameT, SeriesT], + Protocol[DataFrameT, SeriesT], ): + def _iter_schema(self, df: DataFrameT, /) -> Iterator[tuple[str, DType]]: + for ser in self._iter_columns(df): + yield ser.name, ser.dtype + def _iter_columns(self, df: DataFrameT, /) -> Iterator[SeriesT]: yield from df.iter_columns() + def _iter_columns_dtypes(self, df: DataFrameT, /) -> Iterator[tuple[SeriesT, DType]]: + for ser in self._iter_columns(df): + yield ser, ser.dtype + class LazySelectorNamespace( - CompliantSelectorNamespace[LazyFrameT, SeriesT], Protocol[LazyFrameT, SeriesT] + CompliantSelectorNamespace[LazyFrameT, ExprT], + Protocol[LazyFrameT, ExprT], ): def _iter_schema(self, df: LazyFrameT) -> Iterator[tuple[str, DType]]: yield from df.schema.items() - def _iter_columns(self, df: LazyFrameT) -> Iterator[SeriesT]: + def _iter_columns(self, df: LazyFrameT) -> Iterator[ExprT]: yield from df._iter_columns() - def _iter_columns_dtypes(self, df: LazyFrameT, /) -> Iterator[tuple[SeriesT, DType]]: + def _iter_columns_dtypes(self, df: LazyFrameT, /) -> Iterator[tuple[ExprT, DType]]: yield from zip(self._iter_columns(df), df.schema.values()) -class CompliantSelector(CompliantExpr[FrameT, SeriesT], Protocol[FrameT, SeriesT]): +class CompliantSelector( + CompliantExpr[FrameT, SeriesOrExprT], Protocol[FrameT, SeriesOrExprT] +): @property - def selectors(self) -> CompliantSelectorNamespace[FrameT, SeriesT]: + def selectors(self) -> CompliantSelectorNamespace[FrameT, SeriesOrExprT]: return self.__narwhals_namespace__().selectors - def _to_expr(self: Self) -> CompliantExpr[FrameT, SeriesT]: ... + def _to_expr(self: Self) -> CompliantExpr[FrameT, SeriesOrExprT]: ... def _is_selector( - self: Self, other: Self | CompliantExpr[FrameT, SeriesT] - ) -> TypeIs[CompliantSelector[FrameT, SeriesT]]: + self: Self, other: Self | CompliantExpr[FrameT, SeriesOrExprT] + ) -> TypeIs[CompliantSelector[FrameT, SeriesOrExprT]]: return isinstance(other, type(self)) @overload def __sub__(self: Self, other: Self) -> Self: ... @overload def __sub__( - self: Self, other: CompliantExpr[FrameT, SeriesT] - ) -> CompliantExpr[FrameT, SeriesT]: ... + self: Self, other: CompliantExpr[FrameT, SeriesOrExprT] + ) -> CompliantExpr[FrameT, SeriesOrExprT]: ... def __sub__( - self: Self, other: SelectorOrExpr[FrameT, SeriesT] - ) -> SelectorOrExpr[FrameT, SeriesT]: + self: Self, other: SelectorOrExpr[FrameT, SeriesOrExprT] + ) -> SelectorOrExpr[FrameT, SeriesOrExprT]: if self._is_selector(other): - def series(df: FrameT) -> Sequence[SeriesT]: + def series(df: FrameT) -> Sequence[SeriesOrExprT]: lhs_names, rhs_names = _eval_lhs_rhs(df, self, other) return [ x for x, name in zip(self(df), lhs_names) if name not in rhs_names @@ -237,14 +255,14 @@ def names(df: FrameT) -> Sequence[str]: def __or__(self: Self, other: Self) -> Self: ... @overload def __or__( - self: Self, other: CompliantExpr[FrameT, SeriesT] - ) -> CompliantExpr[FrameT, SeriesT]: ... + self: Self, other: CompliantExpr[FrameT, SeriesOrExprT] + ) -> CompliantExpr[FrameT, SeriesOrExprT]: ... def __or__( - self: Self, other: SelectorOrExpr[FrameT, SeriesT] - ) -> SelectorOrExpr[FrameT, SeriesT]: + self: Self, other: SelectorOrExpr[FrameT, SeriesOrExprT] + ) -> SelectorOrExpr[FrameT, SeriesOrExprT]: if self._is_selector(other): - def names(df: FrameT) -> Sequence[SeriesT]: + def names(df: FrameT) -> Sequence[SeriesOrExprT]: lhs_names, rhs_names = _eval_lhs_rhs(df, self, other) return [ *(x for x, name in zip(self(df), lhs_names) if name not in rhs_names), @@ -263,14 +281,14 @@ def series(df: FrameT) -> Sequence[str]: def __and__(self: Self, other: Self) -> Self: ... @overload def __and__( - self: Self, other: CompliantExpr[FrameT, SeriesT] - ) -> CompliantExpr[FrameT, SeriesT]: ... + self: Self, other: CompliantExpr[FrameT, SeriesOrExprT] + ) -> CompliantExpr[FrameT, SeriesOrExprT]: ... def __and__( - self: Self, other: SelectorOrExpr[FrameT, SeriesT] - ) -> SelectorOrExpr[FrameT, SeriesT]: + self: Self, other: SelectorOrExpr[FrameT, SeriesOrExprT] + ) -> SelectorOrExpr[FrameT, SeriesOrExprT]: if self._is_selector(other): - def series(df: FrameT) -> Sequence[SeriesT]: + def series(df: FrameT) -> Sequence[SeriesOrExprT]: lhs_names, rhs_names = _eval_lhs_rhs(df, self, other) return [x for x, name in zip(self(df), lhs_names) if name in rhs_names] @@ -282,7 +300,7 @@ def names(df: FrameT) -> Sequence[str]: else: return self._to_expr() & other - def __invert__(self: Self) -> CompliantSelector[FrameT, SeriesT]: + def __invert__(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: return self.selectors.all() - self # type: ignore[no-any-return] def __repr__(self: Self) -> str: # pragma: no cover diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py new file mode 100644 index 0000000000..e6be377046 --- /dev/null +++ b/narwhals/_compliant/series.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing import Any +from typing import Iterable +from typing import Protocol +from typing import TypeVar + +if TYPE_CHECKING: + from typing_extensions import Self + + from narwhals._compliant.expr import CompliantExpr # noqa: F401 + from narwhals._compliant.expr import EagerExpr + from narwhals._compliant.namespace import CompliantNamespace # noqa: F401 + from narwhals._compliant.namespace import EagerNamespace + from narwhals.dtypes import DType + from narwhals.typing import NativeSeries + from narwhals.utils import Implementation + from narwhals.utils import Version + from narwhals.utils import _FullContext + +__all__ = ["CompliantSeries", "EagerSeries"] + +NativeSeriesT_co = TypeVar("NativeSeriesT_co", bound="NativeSeries", covariant=True) + + +class CompliantSeries(Protocol): + @property + def dtype(self) -> DType: ... + @property + def name(self) -> str: ... + @property + def native(self) -> Any: ... + def __narwhals_series__(self) -> CompliantSeries: ... + def alias(self, name: str) -> Self: ... + def __narwhals_namespace__(self) -> Any: ... # CompliantNamespace[Any, Self]: ... + def _from_native_series(self, series: Any) -> Self: ... + def _to_expr(self) -> Any: ... # CompliantExpr[Any, Self]: ... + + +class EagerSeries(CompliantSeries, Protocol[NativeSeriesT_co]): + _native_series: Any + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + _broadcast: bool + + @property + def native(self) -> NativeSeriesT_co: ... + + def _from_scalar(self, value: Any) -> Self: + return self._from_iterable([value], name=self.name, context=self) + + @classmethod + def _from_iterable( + cls: type[Self], data: Iterable[Any], name: str, *, context: _FullContext + ) -> Self: ... + + def __narwhals_namespace__(self) -> EagerNamespace[Any, Self, Any]: ... + + def _to_expr(self) -> EagerExpr[Any, Any]: + return self.__narwhals_namespace__()._expr._from_series(self) # type: ignore[no-any-return] diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py new file mode 100644 index 0000000000..2513097a50 --- /dev/null +++ b/narwhals/_compliant/typing.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing import Any +from typing import Callable +from typing import Sequence +from typing import TypeVar + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + + from narwhals._compliant.dataframe import CompliantDataFrame + from narwhals._compliant.dataframe import CompliantLazyFrame + from narwhals._compliant.dataframe import EagerDataFrame + from narwhals._compliant.expr import CompliantExpr + from narwhals._compliant.expr import EagerExpr + from narwhals._compliant.expr import NativeExpr + from narwhals._compliant.series import CompliantSeries + from narwhals._compliant.series import EagerSeries + +__all__ = [ + "AliasName", + "AliasNames", + "CompliantDataFrameT", + "CompliantFrameT", + "CompliantLazyFrameT", + "CompliantSeriesT_co", + "IntoCompliantExpr", +] +NativeExprT_co = TypeVar("NativeExprT_co", bound="NativeExpr", covariant=True) +CompliantSeriesT_co = TypeVar( + "CompliantSeriesT_co", bound="CompliantSeries", covariant=True +) +CompliantSeriesOrNativeExprT_co = TypeVar( + "CompliantSeriesOrNativeExprT_co", + bound="CompliantSeries | NativeExpr", + covariant=True, +) +CompliantFrameT = TypeVar( + "CompliantFrameT", bound="CompliantDataFrame[Any] | CompliantLazyFrame" +) +CompliantDataFrameT = TypeVar("CompliantDataFrameT", bound="CompliantDataFrame[Any]") +CompliantLazyFrameT = TypeVar("CompliantLazyFrameT", bound="CompliantLazyFrame") +IntoCompliantExpr: TypeAlias = "CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co] | CompliantSeriesOrNativeExprT_co" + +EagerDataFrameT = TypeVar("EagerDataFrameT", bound="EagerDataFrame[Any]") +EagerSeriesT = TypeVar("EagerSeriesT", bound="EagerSeries[Any]") +EagerSeriesT_co = TypeVar("EagerSeriesT_co", bound="EagerSeries[Any]", covariant=True) +EagerExprT = TypeVar("EagerExprT", bound="EagerExpr[Any, Any]") +AliasNames: TypeAlias = Callable[[Sequence[str]], Sequence[str]] +AliasName: TypeAlias = Callable[[str], str] diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 82ed345ce8..a5f673f9d1 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -8,6 +8,7 @@ from typing import Literal from typing import Sequence +from narwhals._compliant import LazyExpr from narwhals._dask.expr_dt import DaskExprDateTimeNamespace from narwhals._dask.expr_name import DaskExprNameNamespace from narwhals._dask.expr_str import DaskExprStringNamespace @@ -20,7 +21,6 @@ from narwhals._pandas_like.utils import native_to_narwhals_dtype from narwhals.exceptions import ColumnNotFoundError from narwhals.exceptions import InvalidOperationError -from narwhals.typing import CompliantExpr from narwhals.utils import Implementation from narwhals.utils import generate_temporary_column_name from narwhals.utils import not_implemented @@ -41,7 +41,7 @@ from narwhals.utils import Version -class DaskExpr(CompliantExpr["DaskLazyFrame", "dx.Series"]): # pyright: ignore[reportInvalidTypeArguments] (#2044) +class DaskExpr(LazyExpr["DaskLazyFrame", "dx.Series"]): _implementation: Implementation = Implementation.DASK def __init__( @@ -633,23 +633,5 @@ def dt(self: Self) -> DaskExprDateTimeNamespace: def name(self: Self) -> DaskExprNameNamespace: return DaskExprNameNamespace(self) - arg_min = not_implemented() - arg_max = not_implemented() - arg_true = not_implemented() - head = not_implemented() - tail = not_implemented() - mode = not_implemented() - sort = not_implemented() - rank = not_implemented() - sample = not_implemented() - map_batches = not_implemented() - ewm_mean = not_implemented() - rolling_mean = not_implemented() - rolling_var = not_implemented() - rolling_std = not_implemented() - gather_every = not_implemented() - replace_strict = not_implemented() - - cat = not_implemented() # pyright: ignore[reportAssignmentType] list = not_implemented() # pyright: ignore[reportAssignmentType] struct = not_implemented() # pyright: ignore[reportAssignmentType] diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index eddbe7925f..10744d6bb0 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -14,6 +14,7 @@ import dask.dataframe as dd import pandas as pd +from narwhals._compliant import CompliantNamespace from narwhals._dask.dataframe import DaskLazyFrame from narwhals._dask.expr import DaskExpr from narwhals._dask.selectors import DaskSelectorNamespace @@ -24,7 +25,6 @@ from narwhals._dask.utils import validate_comparand from narwhals._expression_parsing import combine_alias_output_names from narwhals._expression_parsing import combine_evaluate_output_names -from narwhals.typing import CompliantNamespace from narwhals.utils import Implementation from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names @@ -42,7 +42,7 @@ import dask_expr as dx -class DaskNamespace(CompliantNamespace[DaskLazyFrame, "dx.Series"]): # pyright: ignore[reportInvalidTypeArguments] (#2044) +class DaskNamespace(CompliantNamespace[DaskLazyFrame, "dx.Series"]): _implementation: Implementation = Implementation.DASK @property diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py index 9533721d49..1d3b311352 100644 --- a/narwhals/_dask/selectors.py +++ b/narwhals/_dask/selectors.py @@ -2,9 +2,9 @@ from typing import TYPE_CHECKING +from narwhals._compliant import CompliantSelector +from narwhals._compliant import LazySelectorNamespace from narwhals._dask.expr import DaskExpr -from narwhals._selectors import CompliantSelector -from narwhals._selectors import LazySelectorNamespace if TYPE_CHECKING: try: @@ -14,9 +14,9 @@ from typing_extensions import Self + from narwhals._compliant import EvalNames + from narwhals._compliant import EvalSeries from narwhals._dask.dataframe import DaskLazyFrame - from narwhals._selectors import EvalNames - from narwhals._selectors import EvalSeries from narwhals.utils import _FullContext diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 6a6104f893..6dc21b8ffc 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -14,6 +14,7 @@ from duckdb import FunctionExpression from duckdb.typing import DuckDBPyType +from narwhals._compliant import LazyExpr from narwhals._duckdb.expr_dt import DuckDBExprDateTimeNamespace from narwhals._duckdb.expr_list import DuckDBExprListNamespace from narwhals._duckdb.expr_name import DuckDBExprNameNamespace @@ -23,7 +24,6 @@ from narwhals._duckdb.utils import maybe_evaluate_expr from narwhals._duckdb.utils import narwhals_to_native_dtype from narwhals._expression_parsing import ExprKind -from narwhals.typing import CompliantExpr from narwhals.utils import Implementation from narwhals.utils import not_implemented @@ -37,7 +37,7 @@ from narwhals.utils import Version -class DuckDBExpr(CompliantExpr["DuckDBLazyFrame", "duckdb.Expression"]): # type: ignore[type-var] +class DuckDBExpr(LazyExpr["DuckDBLazyFrame", "duckdb.Expression"]): _implementation = Implementation.DUCKDB _depth = 0 # Unused, just for compatibility with CompliantExpr @@ -489,22 +489,6 @@ def list(self: Self) -> DuckDBExprListNamespace: def struct(self: Self) -> DuckDBExprStructNamespace: return DuckDBExprStructNamespace(self) - arg_min = not_implemented() - arg_max = not_implemented() - arg_true = not_implemented() - head = not_implemented() - tail = not_implemented() - mode = not_implemented() - sort = not_implemented() - rank = not_implemented() - sample = not_implemented() - map_batches = not_implemented() - ewm_mean = not_implemented() - rolling_sum = not_implemented() - rolling_mean = not_implemented() - rolling_var = not_implemented() - rolling_std = not_implemented() - gather_every = not_implemented() drop_nulls = not_implemented() diff = not_implemented() unique = not_implemented() @@ -517,7 +501,5 @@ def struct(self: Self) -> DuckDBExprStructNamespace: cum_min = not_implemented() cum_max = not_implemented() cum_prod = not_implemented() - replace_strict = not_implemented() over = not_implemented() - - cat = not_implemented() # pyright: ignore[reportAssignmentType] + rolling_sum = not_implemented() diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py index f8780fbc44..f245c74e27 100644 --- a/narwhals/_duckdb/namespace.py +++ b/narwhals/_duckdb/namespace.py @@ -16,6 +16,7 @@ from duckdb.typing import BIGINT from duckdb.typing import VARCHAR +from narwhals._compliant import CompliantNamespace from narwhals._duckdb.expr import DuckDBExpr from narwhals._duckdb.selectors import DuckDBSelectorNamespace from narwhals._duckdb.utils import lit @@ -23,7 +24,6 @@ from narwhals._duckdb.utils import narwhals_to_native_dtype from narwhals._expression_parsing import combine_alias_output_names from narwhals._expression_parsing import combine_evaluate_output_names -from narwhals.typing import CompliantNamespace from narwhals.utils import Implementation from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names @@ -38,7 +38,7 @@ from narwhals.utils import Version -class DuckDBNamespace(CompliantNamespace["DuckDBLazyFrame", "duckdb.Expression"]): # type: ignore[type-var] +class DuckDBNamespace(CompliantNamespace["DuckDBLazyFrame", "duckdb.Expression"]): _implementation: Implementation = Implementation.DUCKDB def __init__( diff --git a/narwhals/_duckdb/selectors.py b/narwhals/_duckdb/selectors.py index 0e54fd3c76..77b5bc2d15 100644 --- a/narwhals/_duckdb/selectors.py +++ b/narwhals/_duckdb/selectors.py @@ -2,26 +2,26 @@ from typing import TYPE_CHECKING +from narwhals._compliant import CompliantSelector +from narwhals._compliant import LazySelectorNamespace from narwhals._duckdb.expr import DuckDBExpr -from narwhals._selectors import CompliantSelector -from narwhals._selectors import LazySelectorNamespace if TYPE_CHECKING: import duckdb from typing_extensions import Self + from narwhals._compliant import EvalNames + from narwhals._compliant import EvalSeries from narwhals._duckdb.dataframe import DuckDBLazyFrame - from narwhals._selectors import EvalNames - from narwhals._selectors import EvalSeries from narwhals.utils import _FullContext class DuckDBSelectorNamespace( - LazySelectorNamespace["DuckDBLazyFrame", "duckdb.Expression"] # type: ignore[type-var] + LazySelectorNamespace["DuckDBLazyFrame", "duckdb.Expression"] ): def _selector( self, - call: EvalSeries[DuckDBLazyFrame, duckdb.Expression], # type: ignore[type-var] + call: EvalSeries[DuckDBLazyFrame, duckdb.Expression], evaluate_output_names: EvalNames[DuckDBLazyFrame], /, ) -> DuckDBSelector: @@ -41,8 +41,7 @@ def __init__(self: Self, context: _FullContext, /) -> None: class DuckDBSelector( # type: ignore[misc] - CompliantSelector["DuckDBLazyFrame", "duckdb.Expression"], # type: ignore[type-var] - DuckDBExpr, + CompliantSelector["DuckDBLazyFrame", "duckdb.Expression"], DuckDBExpr ): def _to_expr(self: Self) -> DuckDBExpr: return DuckDBExpr( diff --git a/narwhals/_expression_parsing.py b/narwhals/_expression_parsing.py index 2f1920e18c..f5d091c4eb 100644 --- a/narwhals/_expression_parsing.py +++ b/narwhals/_expression_parsing.py @@ -12,35 +12,27 @@ from typing import Literal from typing import Sequence from typing import TypeVar -from typing import overload from narwhals.dependencies import is_narwhals_series from narwhals.dependencies import is_numpy_array from narwhals.exceptions import LengthChangingExprError from narwhals.exceptions import ShapeError -from narwhals.utils import Implementation from narwhals.utils import is_compliant_expr if TYPE_CHECKING: from typing_extensions import Never from typing_extensions import TypeIs - from narwhals._arrow.expr import ArrowExpr - from narwhals._pandas_like.expr import PandasLikeExpr + from narwhals._compliant import CompliantExpr + from narwhals._compliant import CompliantFrameT + from narwhals._compliant import CompliantNamespace + from narwhals._compliant import CompliantSeriesOrNativeExprT_co from narwhals.expr import Expr from narwhals.typing import CompliantDataFrame - from narwhals.typing import CompliantExpr - from narwhals.typing import CompliantFrameT from narwhals.typing import CompliantLazyFrame - from narwhals.typing import CompliantNamespace - from narwhals.typing import CompliantSeries - from narwhals.typing import CompliantSeriesT_co from narwhals.typing import IntoExpr from narwhals.typing import _1DArray - PandasLikeExprT = TypeVar("PandasLikeExprT", bound=PandasLikeExpr) - ArrowExprT = TypeVar("ArrowExprT", bound=ArrowExpr) - T = TypeVar("T") @@ -51,188 +43,6 @@ def is_expr(obj: Any) -> TypeIs[Expr]: return isinstance(obj, Expr) -def evaluate_into_expr( - df: CompliantFrameT, expr: CompliantExpr[CompliantFrameT, CompliantSeriesT_co] -) -> Sequence[CompliantSeriesT_co]: - """Return list of raw columns. - - This is only use for eager backends (pandas, PyArrow), where we - alias operations at each step. As a safety precaution, here we - can check that the expected result names match those we were - expecting from the various `evaluate_output_names` / `alias_output_names` - calls. Note that for PySpark / DuckDB, we are less free to liberally - set aliases whenever we want. - """ - _, aliases = evaluate_output_names_and_aliases(expr, df, []) - result = expr(df) - if list(aliases) != [s.name for s in result]: # pragma: no cover - msg = f"Safety assertion failed, expected {aliases}, got {result}" - raise AssertionError(msg) - return result - - -def evaluate_into_exprs( - df: CompliantFrameT, - /, - *exprs: CompliantExpr[CompliantFrameT, CompliantSeriesT_co], -) -> list[CompliantSeriesT_co]: - """Evaluate each expr into Series.""" - return list(chain.from_iterable(evaluate_into_expr(df, expr) for expr in exprs)) - - -@overload -def maybe_evaluate_expr( - df: CompliantFrameT, expr: CompliantExpr[CompliantFrameT, CompliantSeriesT_co] -) -> CompliantSeriesT_co: ... - - -@overload -def maybe_evaluate_expr(df: CompliantDataFrame[Any], expr: T) -> T: ... - - -def maybe_evaluate_expr( - df: Any, expr: CompliantExpr[Any, CompliantSeriesT_co] | T -) -> CompliantSeriesT_co | T: - """Evaluate `expr` if it's an expression, otherwise return it as is.""" - if is_compliant_expr(expr): - result: Sequence[CompliantSeriesT_co] = expr(df) - if len(result) > 1: - msg = "Multi-output expressions (e.g. `nw.all()` or `nw.col('a', 'b')`) are not supported in this context" - raise ValueError(msg) - return result[0] - return expr - - -@overload -def reuse_series_implementation( - expr: PandasLikeExprT, - attr: str, - *, - returns_scalar: bool = False, - **kwargs: Any, -) -> PandasLikeExprT: ... - - -@overload -def reuse_series_implementation( - expr: ArrowExprT, - attr: str, - *, - returns_scalar: bool = False, - **kwargs: Any, -) -> ArrowExprT: ... - - -def reuse_series_implementation( - expr: ArrowExprT | PandasLikeExprT, - attr: str, - *, - returns_scalar: bool = False, - call_kwargs: dict[str, Any] | None = None, - **expressifiable_args: Any, -) -> ArrowExprT | PandasLikeExprT: - """Reuse Series implementation for expression. - - If Series.foo is already defined, and we'd like Expr.foo to be the same, we can - leverage this method to do that for us. - - Arguments: - expr: expression object. - attr: name of method. - returns_scalar: whether the Series version returns a scalar. In this case, - the expression version should return a 1-row Series. - call_kwargs: non-expressifiable args which we may need to reuse in `agg` or `over`, - such as `ddof` for `std` and `var`. - expressifiable_args: keyword arguments to pass to function, which may - be expressifiable (e.g. `nw.col('a').is_between(3, nw.col('b')))`). - """ - plx = expr.__narwhals_namespace__() - - def func(df: CompliantDataFrame[Any]) -> Sequence[CompliantSeries]: - _kwargs = { - **(call_kwargs or {}), - **{ - arg_name: maybe_evaluate_expr(df, arg_value) - for arg_name, arg_value in expressifiable_args.items() - }, - } - - # For PyArrow.Series, we return Python Scalars (like Polars does) instead of PyArrow Scalars. - # However, when working with expressions, we keep everything PyArrow-native. - extra_kwargs = ( - {"_return_py_scalar": False} - if returns_scalar and expr._implementation is Implementation.PYARROW - else {} - ) - - out: list[CompliantSeries] = [ - plx._create_series_from_scalar( - getattr(series, attr)(**extra_kwargs, **_kwargs), - reference_series=series, # type: ignore[arg-type] - ) - if returns_scalar - else getattr(series, attr)(**_kwargs) - for series in expr(df) # type: ignore[arg-type] - ] - _, aliases = evaluate_output_names_and_aliases(expr, df, []) - if [s.name for s in out] != list(aliases): # pragma: no cover - msg = ( - f"Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues\n" - f"Expression aliases: {aliases}\n" - f"Series names: {[s.name for s in out]}" - ) - raise AssertionError(msg) - return out - - return plx._create_expr_from_callable( # type: ignore[return-value] - func, # type: ignore[arg-type] - depth=expr._depth + 1, - function_name=f"{expr._function_name}->{attr}", - evaluate_output_names=expr._evaluate_output_names, # type: ignore[arg-type] - alias_output_names=expr._alias_output_names, - call_kwargs=call_kwargs, - ) - - -@overload -def reuse_series_namespace_implementation( - expr: ArrowExprT, series_namespace: str, attr: str, **kwargs: Any -) -> ArrowExprT: ... -@overload -def reuse_series_namespace_implementation( - expr: PandasLikeExprT, series_namespace: str, attr: str, **kwargs: Any -) -> PandasLikeExprT: ... -def reuse_series_namespace_implementation( - expr: ArrowExprT | PandasLikeExprT, - series_namespace: str, - attr: str, - **kwargs: Any, -) -> ArrowExprT | PandasLikeExprT: - """Reuse Series implementation for expression. - - Just like `reuse_series_implementation`, but for e.g. `Expr.dt.foo` instead - of `Expr.foo`. - - Arguments: - expr: expression object. - series_namespace: The Series namespace (e.g. `dt`, `cat`, `str`, `list`, `name`) - attr: name of method. - kwargs: keyword arguments to pass to function. - """ - plx = expr.__narwhals_namespace__() - - return plx._create_expr_from_callable( # type: ignore[return-value] - lambda df: [ - getattr(getattr(series, series_namespace), attr)(**kwargs) - for series in expr(df) # type: ignore[arg-type] - ], - depth=expr._depth + 1, - function_name=f"{expr._function_name}->{series_namespace}.{attr}", - evaluate_output_names=expr._evaluate_output_names, # type: ignore[arg-type] - alias_output_names=expr._alias_output_names, - ) - - def is_elementary_expression(expr: CompliantExpr[Any, Any]) -> bool: """Check if expr is elementary. @@ -281,20 +91,19 @@ def alias_output_names(names: Sequence[str]) -> Sequence[str]: def extract_compliant( - plx: CompliantNamespace[CompliantFrameT, CompliantSeriesT_co], + plx: CompliantNamespace[CompliantFrameT, CompliantSeriesOrNativeExprT_co], other: Any, *, str_as_lit: bool, -) -> CompliantExpr[CompliantFrameT, CompliantSeriesT_co] | object: +) -> CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co] | object: if is_expr(other): return other._to_compliant_expr(plx) if isinstance(other, str) and not str_as_lit: return plx.col(other) if is_narwhals_series(other): - return plx._create_expr_from_series(other._compliant_series) # type: ignore[attr-defined] + return other._compliant_series._to_expr() if is_numpy_array(other): - series = plx._create_compliant_series(other) # type: ignore[attr-defined] - return plx._create_expr_from_series(series) # type: ignore[attr-defined] + return plx._create_compliant_series(other)._to_expr() # type: ignore[attr-defined] return other diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index e958575461..d3e57bae89 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -10,7 +10,7 @@ import numpy as np -from narwhals._expression_parsing import evaluate_into_exprs +from narwhals._compliant import EagerDataFrame from narwhals._pandas_like.series import PANDAS_TO_NUMPY_DTYPE_MISSING from narwhals._pandas_like.series import PandasLikeSeries from narwhals._pandas_like.utils import align_series_full_broadcast @@ -26,6 +26,8 @@ from narwhals._pandas_like.utils import select_columns_by_name from narwhals.dependencies import is_numpy_array_1d from narwhals.exceptions import InvalidOperationError +from narwhals.typing import CompliantDataFrame +from narwhals.typing import CompliantLazyFrame from narwhals.utils import Implementation from narwhals.utils import check_column_exists from narwhals.utils import generate_temporary_column_name @@ -54,8 +56,6 @@ from narwhals.typing import _2DArray from narwhals.utils import Version -from narwhals.typing import CompliantDataFrame -from narwhals.typing import CompliantLazyFrame CLASSICAL_NUMPY_DTYPES: frozenset[np.dtype[Any]] = frozenset( [ @@ -83,7 +83,7 @@ ) -class PandasLikeDataFrame(CompliantDataFrame["PandasLikeSeries"], CompliantLazyFrame): +class PandasLikeDataFrame(EagerDataFrame["PandasLikeSeries"], CompliantLazyFrame): # --- not in the spec --- def __init__( self: Self, @@ -404,7 +404,7 @@ def aggregate( return self.select(*exprs) def select(self: PandasLikeDataFrame, *exprs: PandasLikeExpr) -> PandasLikeDataFrame: - new_series = evaluate_into_exprs(self, *exprs) + new_series = self._evaluate_into_exprs(*exprs) if not new_series: # return empty dataframe, like Polars does return self._from_native_frame( @@ -458,7 +458,7 @@ def filter( mask_native: pd.Series[Any] | list[bool] = predicate else: # `[0]` is safe as the predicate's expression only returns a single column - mask = evaluate_into_exprs(self, predicate)[0] + mask = self._evaluate_into_exprs(predicate)[0] mask_native = extract_dataframe_comparand(self._native_frame.index, mask) return self._from_native_frame( @@ -469,7 +469,7 @@ def with_columns( self: PandasLikeDataFrame, *exprs: PandasLikeExpr ) -> PandasLikeDataFrame: index = self._native_frame.index - new_columns = evaluate_into_exprs(self, *exprs) + new_columns = self._evaluate_into_exprs(*exprs) if not new_columns and len(self) == 0: return self diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 586a129b59..bc18626778 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -5,25 +5,17 @@ from typing import Any from typing import Callable from typing import Literal -from typing import Mapping from typing import Sequence +from narwhals._compliant import EagerExpr from narwhals._expression_parsing import ExprKind from narwhals._expression_parsing import evaluate_output_names_and_aliases from narwhals._expression_parsing import is_elementary_expression -from narwhals._expression_parsing import reuse_series_implementation -from narwhals._pandas_like.expr_cat import PandasLikeExprCatNamespace -from narwhals._pandas_like.expr_dt import PandasLikeExprDateTimeNamespace -from narwhals._pandas_like.expr_list import PandasLikeExprListNamespace -from narwhals._pandas_like.expr_name import PandasLikeExprNameNamespace -from narwhals._pandas_like.expr_str import PandasLikeExprStringNamespace -from narwhals._pandas_like.expr_struct import PandasLikeExprStructNamespace from narwhals._pandas_like.group_by import AGGREGATIONS_TO_PANDAS_EQUIVALENT from narwhals._pandas_like.series import PandasLikeSeries from narwhals.dependencies import get_numpy from narwhals.dependencies import is_numpy_array from narwhals.exceptions import ColumnNotFoundError -from narwhals.typing import CompliantExpr from narwhals.utils import generate_temporary_column_name if TYPE_CHECKING: @@ -34,6 +26,7 @@ from narwhals.dtypes import DType from narwhals.utils import Implementation from narwhals.utils import Version + from narwhals.utils import _FullContext WINDOW_FUNCTIONS_TO_PANDAS_EQUIVALENT = { "cum_sum": "cumsum", @@ -77,7 +70,7 @@ def window_kwargs_to_pandas_equivalent( return pandas_kwargs -class PandasLikeExpr(CompliantExpr["PandasLikeDataFrame", PandasLikeSeries]): +class PandasLikeExpr(EagerExpr["PandasLikeDataFrame", PandasLikeSeries]): def __init__( self: Self, call: Callable[[PandasLikeDataFrame], Sequence[PandasLikeSeries]], @@ -101,14 +94,6 @@ def __init__( self._version = version self._call_kwargs = call_kwargs or {} - def __call__(self: Self, df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: - return self._call(df) - - def __repr__(self) -> str: # pragma: no cover - return ( - f"PandasLikeExpr(depth={self._depth}, function_name={self._function_name}, )" - ) - def __narwhals_namespace__(self: Self) -> PandasLikeNamespace: from narwhals._pandas_like.namespace import PandasLikeNamespace @@ -118,29 +103,6 @@ def __narwhals_namespace__(self: Self) -> PandasLikeNamespace: def __narwhals_expr__(self) -> None: ... - def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self: - # Make the resulting PandasLikeSeries with `_broadcast=True`. Then, - # when extracting native objects, `align_and_extract_native` will - # know what to do. - def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: - results = [] - for result in self(df): - result._broadcast = True - results.append(result) - return results - - return self.__class__( - func, - depth=self._depth, - function_name=self._function_name, - evaluate_output_names=self._evaluate_output_names, - alias_output_names=self._alias_output_names, - backend_version=self._backend_version, - version=self._version, - implementation=self._implementation, - call_kwargs=self._call_kwargs, - ) - @classmethod def from_column_names( cls: type[Self], @@ -148,9 +110,7 @@ def from_column_names( /, *, function_name: str, - implementation: Implementation, - backend_version: tuple[int, ...], - version: Version, + context: _FullContext, ) -> Self: def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: try: @@ -178,18 +138,14 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: function_name=function_name, evaluate_output_names=evaluate_column_names, alias_output_names=None, - implementation=implementation, - backend_version=backend_version, - version=version, + implementation=context._implementation, + backend_version=context._backend_version, + version=context._version, ) @classmethod def from_column_indices( - cls: type[Self], - *column_indices: int, - implementation: Implementation, - backend_version: tuple[int, ...], - version: Version, + cls: type[Self], *column_indices: int, context: _FullContext ) -> Self: def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: return [ @@ -208,161 +164,11 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: function_name="nth", evaluate_output_names=lambda df: [df.columns[i] for i in column_indices], alias_output_names=None, - implementation=implementation, - backend_version=backend_version, - version=version, - ) - - def cast(self: Self, dtype: DType | type[DType]) -> Self: - return reuse_series_implementation(self, "cast", dtype=dtype) - - def __eq__(self: Self, other: PandasLikeExpr | Any) -> Self: # type: ignore[override] - return reuse_series_implementation(self, "__eq__", other=other) - - def __ne__(self: Self, other: PandasLikeExpr | Any) -> Self: # type: ignore[override] - return reuse_series_implementation(self, "__ne__", other=other) - - def __ge__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self, "__ge__", other=other) - - def __gt__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self, "__gt__", other=other) - - def __le__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self, "__le__", other=other) - - def __lt__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self, "__lt__", other=other) - - def __and__(self: Self, other: PandasLikeExpr | bool | Any) -> Self: - return reuse_series_implementation(self, "__and__", other=other) - - def __or__(self: Self, other: PandasLikeExpr | bool | Any) -> Self: - return reuse_series_implementation(self, "__or__", other=other) - - def __add__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self, "__add__", other=other) - - def __sub__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self, "__sub__", other=other) - - def __rsub__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self.alias("literal"), "__rsub__", other=other) - - def __mul__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self, "__mul__", other=other) - - def __truediv__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self, "__truediv__", other=other) - - def __rtruediv__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation( - self.alias("literal"), "__rtruediv__", other=other + implementation=context._implementation, + backend_version=context._backend_version, + version=context._version, ) - def __floordiv__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self, "__floordiv__", other=other) - - def __rfloordiv__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation( - self.alias("literal"), "__rfloordiv__", other=other - ) - - def __pow__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self, "__pow__", other=other) - - def __rpow__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self.alias("literal"), "__rpow__", other=other) - - def __mod__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self, "__mod__", other=other) - - def __rmod__(self: Self, other: PandasLikeExpr | Any) -> Self: - return reuse_series_implementation(self.alias("literal"), "__rmod__", other=other) - - # Unary - def __invert__(self: Self) -> Self: - return reuse_series_implementation(self, "__invert__") - - # Reductions - def null_count(self: Self) -> Self: - return reuse_series_implementation(self, "null_count", returns_scalar=True) - - def n_unique(self: Self) -> Self: - return reuse_series_implementation(self, "n_unique", returns_scalar=True) - - def sum(self: Self) -> Self: - return reuse_series_implementation(self, "sum", returns_scalar=True) - - def count(self: Self) -> Self: - return reuse_series_implementation(self, "count", returns_scalar=True) - - def mean(self: Self) -> Self: - return reuse_series_implementation(self, "mean", returns_scalar=True) - - def median(self: Self) -> Self: - return reuse_series_implementation(self, "median", returns_scalar=True) - - def std(self: Self, *, ddof: int) -> Self: - return reuse_series_implementation( - self, "std", returns_scalar=True, call_kwargs={"ddof": ddof} - ) - - def var(self: Self, *, ddof: int) -> Self: - return reuse_series_implementation( - self, "var", returns_scalar=True, call_kwargs={"ddof": ddof} - ) - - def skew(self: Self) -> Self: - return reuse_series_implementation(self, "skew", returns_scalar=True) - - def any(self: Self) -> Self: - return reuse_series_implementation(self, "any", returns_scalar=True) - - def all(self: Self) -> Self: - return reuse_series_implementation(self, "all", returns_scalar=True) - - def max(self: Self) -> Self: - return reuse_series_implementation(self, "max", returns_scalar=True) - - def min(self: Self) -> Self: - return reuse_series_implementation(self, "min", returns_scalar=True) - - def arg_min(self: Self) -> Self: - return reuse_series_implementation(self, "arg_min", returns_scalar=True) - - def arg_max(self: Self) -> Self: - return reuse_series_implementation(self, "arg_max", returns_scalar=True) - - # Other - - def clip(self: Self, lower_bound: Any, upper_bound: Any) -> Self: - return reuse_series_implementation( - self, "clip", lower_bound=lower_bound, upper_bound=upper_bound - ) - - def is_null(self: Self) -> Self: - return reuse_series_implementation(self, "is_null") - - def is_nan(self: Self) -> Self: - return reuse_series_implementation(self, "is_nan") - - def fill_null( - self: Self, - value: Self | Any | None, - strategy: Literal["forward", "backward"] | None, - limit: int | None, - ) -> Self: - return reuse_series_implementation( - self, "fill_null", value=value, strategy=strategy, limit=limit - ) - - def is_in(self: Self, other: Any) -> Self: - return reuse_series_implementation(self, "is_in", other=other) - - def arg_true(self: Self) -> Self: - return reuse_series_implementation(self, "arg_true") - def ewm_mean( self: Self, *, @@ -374,8 +180,7 @@ def ewm_mean( min_samples: int, ignore_nulls: bool, ) -> Self: - return reuse_series_implementation( - self, + return self._reuse_series( "ewm_mean", com=com, span=span, @@ -386,84 +191,11 @@ def ewm_mean( ignore_nulls=ignore_nulls, ) - def filter(self: Self, *predicates: PandasLikeExpr) -> Self: - plx = self.__narwhals_namespace__() - predicate = plx.all_horizontal(*predicates) - return reuse_series_implementation(self, "filter", predicate=predicate) - - def drop_nulls(self: Self) -> Self: - return reuse_series_implementation(self, "drop_nulls") - - def replace_strict( - self: Self, - old: Sequence[Any] | Mapping[Any, Any], - new: Sequence[Any], - *, - return_dtype: DType | type[DType] | None, - ) -> Self: - return reuse_series_implementation( - self, "replace_strict", old=old, new=new, return_dtype=return_dtype - ) - - def sort(self: Self, *, descending: bool, nulls_last: bool) -> Self: - return reuse_series_implementation( - self, "sort", descending=descending, nulls_last=nulls_last - ) - - def abs(self: Self) -> Self: - return reuse_series_implementation(self, "abs") - def cum_sum(self: Self, *, reverse: bool) -> Self: - return reuse_series_implementation( - self, "cum_sum", call_kwargs={"reverse": reverse} - ) - - def unique(self: Self) -> Self: - return reuse_series_implementation(self, "unique", maintain_order=False) - - def diff(self: Self) -> Self: - return reuse_series_implementation(self, "diff") + return self._reuse_series("cum_sum", call_kwargs={"reverse": reverse}) def shift(self: Self, n: int) -> Self: - return reuse_series_implementation(self, "shift", call_kwargs={"n": n}) - - def sample( - self: Self, - n: int | None, - *, - fraction: float | None, - with_replacement: bool, - seed: int | None, - ) -> Self: - return reuse_series_implementation( - self, - "sample", - n=n, - fraction=fraction, - with_replacement=with_replacement, - seed=seed, - ) - - def alias(self: Self, name: str) -> Self: - def alias_output_names(names: Sequence[str]) -> Sequence[str]: - if len(names) != 1: - msg = f"Expected function with single output, found output names: {names}" - raise ValueError(msg) - return [name] - - # Define this one manually, so that we can - # override `output_names` and not increase depth - return self.__class__( - lambda df: [series.alias(name) for series in self._call(df)], - depth=self._depth, - function_name=self._function_name, - evaluate_output_names=self._evaluate_output_names, - alias_output_names=alias_output_names, - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, - call_kwargs=self._call_kwargs, - ) + return self._reuse_series("shift", call_kwargs={"n": n}) def over( self: Self, @@ -567,46 +299,6 @@ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: version=self._version, ) - def is_unique(self: Self) -> Self: - return reuse_series_implementation(self, "is_unique") - - def is_first_distinct(self: Self) -> Self: - return reuse_series_implementation(self, "is_first_distinct") - - def is_last_distinct(self: Self) -> Self: - return reuse_series_implementation(self, "is_last_distinct") - - def quantile( - self: Self, - quantile: float, - interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"], - ) -> Self: - return reuse_series_implementation( - self, - "quantile", - quantile=quantile, - interpolation=interpolation, - returns_scalar=True, - ) - - def head(self: Self, n: int) -> Self: - return reuse_series_implementation(self, "head", n=n) - - def tail(self: Self, n: int) -> Self: - return reuse_series_implementation(self, "tail", n=n) - - def round(self: Self, decimals: int) -> Self: - return reuse_series_implementation(self, "round", decimals=decimals) - - def len(self: Self) -> Self: - return reuse_series_implementation(self, "len", returns_scalar=True) - - def gather_every(self: Self, n: int, offset: int) -> Self: - return reuse_series_implementation(self, "gather_every", n=n, offset=offset) - - def mode(self: Self) -> Self: - return reuse_series_implementation(self, "mode") - def map_batches( self: Self, function: Callable[[Any], Any], @@ -640,34 +332,22 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: version=self._version, ) - def is_finite(self: Self) -> Self: - return reuse_series_implementation(self, "is_finite") - def cum_count(self: Self, *, reverse: bool) -> Self: - return reuse_series_implementation( - self, "cum_count", call_kwargs={"reverse": reverse} - ) + return self._reuse_series("cum_count", call_kwargs={"reverse": reverse}) def cum_min(self: Self, *, reverse: bool) -> Self: - return reuse_series_implementation( - self, "cum_min", call_kwargs={"reverse": reverse} - ) + return self._reuse_series("cum_min", call_kwargs={"reverse": reverse}) def cum_max(self: Self, *, reverse: bool) -> Self: - return reuse_series_implementation( - self, "cum_max", call_kwargs={"reverse": reverse} - ) + return self._reuse_series("cum_max", call_kwargs={"reverse": reverse}) def cum_prod(self: Self, *, reverse: bool) -> Self: - return reuse_series_implementation( - self, "cum_prod", call_kwargs={"reverse": reverse} - ) + return self._reuse_series("cum_prod", call_kwargs={"reverse": reverse}) def rolling_sum( self: Self, window_size: int, *, min_samples: int, center: bool ) -> Self: - return reuse_series_implementation( - self, + return self._reuse_series( "rolling_sum", call_kwargs={ "window_size": window_size, @@ -676,85 +356,12 @@ def rolling_sum( }, ) - def rolling_mean( - self: Self, - window_size: int, - *, - min_samples: int, - center: bool, - ) -> Self: - return reuse_series_implementation( - self, - "rolling_mean", - window_size=window_size, - min_samples=min_samples, - center=center, - ) - - def rolling_var( - self: Self, - window_size: int, - *, - min_samples: int, - center: bool, - ddof: int, - ) -> Self: - return reuse_series_implementation( - self, - "rolling_var", - window_size=window_size, - min_samples=min_samples, - center=center, - ddof=ddof, - ) - - def rolling_std( - self: Self, - window_size: int, - *, - min_samples: int, - center: bool, - ddof: int, - ) -> Self: - return reuse_series_implementation( - self, - "rolling_std", - window_size=window_size, - min_samples=min_samples, - center=center, - ddof=ddof, - ) - def rank( self: Self, method: Literal["average", "min", "max", "dense", "ordinal"], *, descending: bool, ) -> Self: - return reuse_series_implementation( - self, "rank", call_kwargs={"method": method, "descending": descending} + return self._reuse_series( + "rank", call_kwargs={"method": method, "descending": descending} ) - - @property - def str(self: Self) -> PandasLikeExprStringNamespace: - return PandasLikeExprStringNamespace(self) - - @property - def dt(self: Self) -> PandasLikeExprDateTimeNamespace: - return PandasLikeExprDateTimeNamespace(self) - - @property - def cat(self: Self) -> PandasLikeExprCatNamespace: - return PandasLikeExprCatNamespace(self) - - @property - def name(self: Self) -> PandasLikeExprNameNamespace: - return PandasLikeExprNameNamespace(self) - - @property - def list(self: Self) -> PandasLikeExprListNamespace: - return PandasLikeExprListNamespace(self) - - @property - def struct(self: Self) -> PandasLikeExprStructNamespace: - return PandasLikeExprStructNamespace(self) diff --git a/narwhals/_pandas_like/expr_cat.py b/narwhals/_pandas_like/expr_cat.py deleted file mode 100644 index 985ae52935..0000000000 --- a/narwhals/_pandas_like/expr_cat.py +++ /dev/null @@ -1,22 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from narwhals._expression_parsing import reuse_series_namespace_implementation - -if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._pandas_like.expr import PandasLikeExpr - - -class PandasLikeExprCatNamespace: - def __init__(self: Self, expr: PandasLikeExpr) -> None: - self._compliant_expr = expr - - def get_categories(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, - "cat", - "get_categories", - ) diff --git a/narwhals/_pandas_like/expr_dt.py b/narwhals/_pandas_like/expr_dt.py deleted file mode 100644 index ed5ffe8f12..0000000000 --- a/narwhals/_pandas_like/expr_dt.py +++ /dev/null @@ -1,107 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from narwhals._expression_parsing import reuse_series_namespace_implementation - -if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._pandas_like.expr import PandasLikeExpr - from narwhals.typing import TimeUnit - - -class PandasLikeExprDateTimeNamespace: - def __init__(self: Self, expr: PandasLikeExpr) -> None: - self._compliant_expr = expr - - def date(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation(self._compliant_expr, "dt", "date") - - def year(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation(self._compliant_expr, "dt", "year") - - def month(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation(self._compliant_expr, "dt", "month") - - def day(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation(self._compliant_expr, "dt", "day") - - def hour(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation(self._compliant_expr, "dt", "hour") - - def minute(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation(self._compliant_expr, "dt", "minute") - - def second(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation(self._compliant_expr, "dt", "second") - - def millisecond(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "millisecond" - ) - - def microsecond(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "microsecond" - ) - - def nanosecond(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "nanosecond" - ) - - def ordinal_day(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "ordinal_day" - ) - - def weekday(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "weekday" - ) - - def total_minutes(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "total_minutes" - ) - - def total_seconds(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "total_seconds" - ) - - def total_milliseconds(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "total_milliseconds" - ) - - def total_microseconds(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "total_microseconds" - ) - - def total_nanoseconds(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "total_nanoseconds" - ) - - def to_string(self: Self, format: str) -> PandasLikeExpr: # noqa: A002 - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "to_string", format=format - ) - - def replace_time_zone(self: Self, time_zone: str | None) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "replace_time_zone", time_zone=time_zone - ) - - def convert_time_zone(self: Self, time_zone: str) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "convert_time_zone", time_zone=time_zone - ) - - def timestamp(self: Self, time_unit: TimeUnit) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "dt", "timestamp", time_unit=time_unit - ) diff --git a/narwhals/_pandas_like/expr_list.py b/narwhals/_pandas_like/expr_list.py deleted file mode 100644 index 865f73a8ee..0000000000 --- a/narwhals/_pandas_like/expr_list.py +++ /dev/null @@ -1,22 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from narwhals._expression_parsing import reuse_series_namespace_implementation - -if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._pandas_like.expr import PandasLikeExpr - - -class PandasLikeExprListNamespace: - def __init__(self: Self, expr: PandasLikeExpr) -> None: - self._expr = expr - - def len(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._expr, - "list", - "len", - ) diff --git a/narwhals/_pandas_like/expr_name.py b/narwhals/_pandas_like/expr_name.py deleted file mode 100644 index 2128b730a4..0000000000 --- a/narwhals/_pandas_like/expr_name.py +++ /dev/null @@ -1,84 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING -from typing import Callable -from typing import Sequence - -if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._pandas_like.expr import PandasLikeExpr - - -class PandasLikeExprNameNamespace: - def __init__(self: Self, expr: PandasLikeExpr) -> None: - self._compliant_expr = expr - - def keep(self: Self) -> PandasLikeExpr: - return self._from_colname_func_and_alias_output_names( - name_mapping_func=lambda name: name, - alias_output_names=None, - ) - - def map(self: Self, function: Callable[[str], str]) -> PandasLikeExpr: - return self._from_colname_func_and_alias_output_names( - name_mapping_func=lambda name: function(str(name)), - alias_output_names=lambda output_names: [ - function(str(name)) for name in output_names - ], - ) - - def prefix(self: Self, prefix: str) -> PandasLikeExpr: - return self._from_colname_func_and_alias_output_names( - name_mapping_func=lambda name: f"{prefix}{name}", - alias_output_names=lambda output_names: [ - f"{prefix}{output_name}" for output_name in output_names - ], - ) - - def suffix(self: Self, suffix: str) -> PandasLikeExpr: - return self._from_colname_func_and_alias_output_names( - name_mapping_func=lambda name: f"{name}{suffix}", - alias_output_names=lambda output_names: [ - f"{output_name}{suffix}" for output_name in output_names - ], - ) - - def to_lowercase(self: Self) -> PandasLikeExpr: - return self._from_colname_func_and_alias_output_names( - name_mapping_func=lambda name: str(name).lower(), - alias_output_names=lambda output_names: [ - str(name).lower() for name in output_names - ], - ) - - def to_uppercase(self: Self) -> PandasLikeExpr: - return self._from_colname_func_and_alias_output_names( - name_mapping_func=lambda name: str(name).upper(), - alias_output_names=lambda output_names: [ - str(name).upper() for name in output_names - ], - ) - - def _from_colname_func_and_alias_output_names( - self: Self, - name_mapping_func: Callable[[str], str], - alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None, - ) -> PandasLikeExpr: - return self._compliant_expr.__class__( - call=lambda df: [ - series.alias(name_mapping_func(name)) - for series, name in zip( - self._compliant_expr._call(df), - self._compliant_expr._evaluate_output_names(df), - ) - ], - depth=self._compliant_expr._depth, - function_name=self._compliant_expr._function_name, - evaluate_output_names=self._compliant_expr._evaluate_output_names, - alias_output_names=alias_output_names, - backend_version=self._compliant_expr._backend_version, - implementation=self._compliant_expr._implementation, - version=self._compliant_expr._version, - call_kwargs=self._compliant_expr._call_kwargs, - ) diff --git a/narwhals/_pandas_like/expr_str.py b/narwhals/_pandas_like/expr_str.py deleted file mode 100644 index 8f241515de..0000000000 --- a/narwhals/_pandas_like/expr_str.py +++ /dev/null @@ -1,121 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from narwhals._expression_parsing import reuse_series_namespace_implementation - -if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._pandas_like.expr import PandasLikeExpr - - -class PandasLikeExprStringNamespace: - def __init__(self: Self, expr: PandasLikeExpr) -> None: - self._compliant_expr = expr - - def len_chars( - self: Self, - ) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "str", "len_chars" - ) - - def replace( - self: Self, - pattern: str, - value: str, - *, - literal: bool, - n: int, - ) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "replace", - pattern=pattern, - value=value, - literal=literal, - n=n, - ) - - def replace_all( - self: Self, - pattern: str, - value: str, - *, - literal: bool, - ) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "replace_all", - pattern=pattern, - value=value, - literal=literal, - ) - - def strip_chars(self: Self, characters: str | None) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "strip_chars", - characters=characters, - ) - - def starts_with(self: Self, prefix: str) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "starts_with", - prefix=prefix, - ) - - def ends_with(self: Self, suffix: str) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "ends_with", - suffix=suffix, - ) - - def contains(self: Self, pattern: str, *, literal: bool) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "contains", - pattern=pattern, - literal=literal, - ) - - def slice(self: Self, offset: int, length: int | None) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "str", "slice", offset=offset, length=length - ) - - def split(self: Self, by: str) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, "str", "split", by=by - ) - - def to_datetime(self: Self, format: str | None) -> PandasLikeExpr: # noqa: A002 - return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "to_datetime", - format=format, - ) - - def to_uppercase(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "to_uppercase", - ) - - def to_lowercase(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "to_lowercase", - ) diff --git a/narwhals/_pandas_like/expr_struct.py b/narwhals/_pandas_like/expr_struct.py deleted file mode 100644 index 997ce1dab7..0000000000 --- a/narwhals/_pandas_like/expr_struct.py +++ /dev/null @@ -1,23 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from narwhals._expression_parsing import reuse_series_namespace_implementation - -if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._pandas_like.expr import PandasLikeExpr - - -class PandasLikeExprStructNamespace: - def __init__(self: Self, expr: PandasLikeExpr) -> None: - self._compliant_expr = expr - - def field(self, name: str) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._compliant_expr, - "struct", - "field", - name=name, - ).alias(name) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 4b4994c6d0..e9902e6f8b 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -11,6 +11,7 @@ from typing import Literal from typing import Sequence +from narwhals._compliant import EagerNamespace from narwhals._expression_parsing import combine_alias_output_names from narwhals._expression_parsing import combine_evaluate_output_names from narwhals._pandas_like.dataframe import PandasLikeDataFrame @@ -23,7 +24,6 @@ from narwhals._pandas_like.utils import extract_dataframe_comparand from narwhals._pandas_like.utils import horizontal_concat from narwhals._pandas_like.utils import vertical_concat -from narwhals.typing import CompliantNamespace from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names from narwhals.utils import import_dtypes_module @@ -40,7 +40,17 @@ _Scalar: TypeAlias = Any -class PandasLikeNamespace(CompliantNamespace[PandasLikeDataFrame, PandasLikeSeries]): +class PandasLikeNamespace( + EagerNamespace[PandasLikeDataFrame, PandasLikeSeries, PandasLikeExpr] +): + @property + def _expr(self) -> type[PandasLikeExpr]: + return PandasLikeExpr + + @property + def _series(self) -> type[PandasLikeSeries]: + return PandasLikeSeries + @property def selectors(self: Self) -> PandasSelectorNamespace: return PandasSelectorNamespace(self) @@ -56,52 +66,6 @@ def __init__( self._backend_version = backend_version self._version = version - def _create_expr_from_callable( - self: Self, - func: Callable[[PandasLikeDataFrame], Sequence[PandasLikeSeries]], - *, - depth: int, - function_name: str, - evaluate_output_names: Callable[[PandasLikeDataFrame], Sequence[str]], - alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None, - call_kwargs: dict[str, Any] | None = None, - ) -> PandasLikeExpr: - return PandasLikeExpr( - func, - depth=depth, - function_name=function_name, - evaluate_output_names=evaluate_output_names, - alias_output_names=alias_output_names, - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, - call_kwargs=call_kwargs, - ) - - def _create_series_from_scalar( - self: Self, value: Any, *, reference_series: PandasLikeSeries - ) -> PandasLikeSeries: - return PandasLikeSeries._from_iterable( - [value], - name=reference_series._native_series.name, - index=reference_series._native_series.index[0:1], - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, - ) - - def _create_expr_from_series(self: Self, series: PandasLikeSeries) -> PandasLikeExpr: - return PandasLikeExpr( - lambda _df: [series], - depth=0, - function_name="series", - evaluate_output_names=lambda _df: [series.name], - alias_output_names=None, - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, - ) - def _create_compliant_series(self: Self, value: Any) -> PandasLikeSeries: return create_compliant_series( value, @@ -112,49 +76,32 @@ def _create_compliant_series(self: Self, value: Any) -> PandasLikeSeries: # --- selection --- def col(self: Self, *column_names: str) -> PandasLikeExpr: - return PandasLikeExpr.from_column_names( - passthrough_column_names(column_names), - function_name="col", - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, + return self._expr.from_column_names( + passthrough_column_names(column_names), function_name="col", context=self ) def exclude(self: Self, excluded_names: Container[str]) -> PandasLikeExpr: - return PandasLikeExpr.from_column_names( + return self._expr.from_column_names( partial(exclude_column_names, names=excluded_names), function_name="exclude", - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, + context=self, ) def nth(self: Self, *column_indices: int) -> PandasLikeExpr: - return PandasLikeExpr.from_column_indices( - *column_indices, - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, - ) + return self._expr.from_column_indices(*column_indices, context=self) def all(self: Self) -> PandasLikeExpr: - return PandasLikeExpr.from_column_names( - get_column_names, - function_name="all", - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, + return self._expr.from_column_names( + get_column_names, function_name="all", context=self ) def lit(self: Self, value: Any, dtype: DType | None) -> PandasLikeExpr: def _lit_pandas_series(df: PandasLikeDataFrame) -> PandasLikeSeries: - pandas_series = PandasLikeSeries._from_iterable( + pandas_series = self._series._from_iterable( data=[value], name="literal", index=df._native_frame.index[0:1], - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, + context=self, ) if dtype: return pandas_series.cast(dtype) @@ -174,13 +121,8 @@ def _lit_pandas_series(df: PandasLikeDataFrame) -> PandasLikeSeries: def len(self: Self) -> PandasLikeExpr: return PandasLikeExpr( lambda df: [ - PandasLikeSeries._from_iterable( - [len(df._native_frame)], - name="len", - index=[0], - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, + self._series._from_iterable( + [len(df._native_frame)], name="len", index=[0], context=self ) ], depth=0, @@ -200,12 +142,13 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: native_series = (s.fill_null(0, None, None) for s in series) return [reduce(operator.add, native_series)] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="sum_horizontal", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) def all_horizontal(self: Self, *exprs: PandasLikeExpr) -> PandasLikeExpr: @@ -215,12 +158,13 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: ) return [reduce(operator.and_, series)] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="all_horizontal", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) def any_horizontal(self: Self, *exprs: PandasLikeExpr) -> PandasLikeExpr: @@ -230,12 +174,13 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: ) return [reduce(operator.or_, series)] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="any_horizontal", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) def mean_horizontal(self: Self, *exprs: PandasLikeExpr) -> PandasLikeExpr: @@ -247,12 +192,13 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: non_na = align_series_full_broadcast(*(1 - s.is_null() for s in expr_results)) return [reduce(operator.add, series) / reduce(operator.add, non_na)] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="mean_horizontal", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) def min_horizontal(self: Self, *exprs: PandasLikeExpr) -> PandasLikeExpr: @@ -271,12 +217,13 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: ).alias(series[0].name) ] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="min_horizontal", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) def max_horizontal(self: Self, *exprs: PandasLikeExpr) -> PandasLikeExpr: @@ -295,12 +242,13 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: ).alias(series[0].name) ] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="max_horizontal", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) def concat( @@ -379,13 +327,11 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: s.zip_with(~nm, "") for s, nm in zip(series, null_mask) ] - sep_array = init_value.__class__._from_iterable( + sep_array = init_value._from_iterable( data=[separator] * len(init_value), name="sep", index=init_value._native_series.index, - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, + context=self, ) separators = (sep_array.zip_with(~nm, "") for nm in null_mask[:-1]) result = reduce( @@ -396,12 +342,13 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: return [result] - return self._create_expr_from_callable( + return self._expr._from_callable( func=func, depth=max(x._depth for x in exprs) + 1, function_name="concat_str", evaluate_output_names=combine_evaluate_output_names(*exprs), alias_output_names=combine_alias_output_names(*exprs), + context=self, ) @@ -424,16 +371,13 @@ def __init__( self._version = version def __call__(self: Self, df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: - plx = df.__narwhals_namespace__() condition = self._condition(df)[0] condition_native = condition._native_series if isinstance(self._then_value, PandasLikeExpr): value_series = self._then_value(df)[0] else: - value_series = plx._create_series_from_scalar( - self._then_value, reference_series=condition.alias("literal") - ) + value_series = condition.alias("literal")._from_scalar(self._then_value) value_series._broadcast = True value_series_native = extract_dataframe_comparand( df._native_frame.index, value_series diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py index bdf5cf33cd..69109f0cec 100644 --- a/narwhals/_pandas_like/selectors.py +++ b/narwhals/_pandas_like/selectors.py @@ -2,19 +2,19 @@ from typing import TYPE_CHECKING +from narwhals._compliant import CompliantSelector +from narwhals._compliant import EagerSelectorNamespace from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.expr import PandasLikeExpr from narwhals._pandas_like.series import PandasLikeSeries -from narwhals._selectors import CompliantSelector -from narwhals._selectors import EagerSelectorNamespace if TYPE_CHECKING: from typing_extensions import Self + from narwhals._compliant import EvalNames + from narwhals._compliant import EvalSeries from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.series import PandasLikeSeries - from narwhals._selectors import EvalNames - from narwhals._selectors import EvalSeries from narwhals.utils import _FullContext diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 23873bb221..41c6198398 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -11,6 +11,7 @@ import numpy as np +from narwhals._compliant import EagerSeries from narwhals._pandas_like.series_cat import PandasLikeSeriesCatNamespace from narwhals._pandas_like.series_dt import PandasLikeSeriesDateTimeNamespace from narwhals._pandas_like.series_list import PandasLikeSeriesListNamespace @@ -27,7 +28,6 @@ from narwhals._pandas_like.utils import set_index from narwhals.dependencies import is_numpy_scalar from narwhals.exceptions import InvalidOperationError -from narwhals.typing import CompliantSeries from narwhals.utils import Implementation from narwhals.utils import import_dtypes_module from narwhals.utils import parse_version @@ -43,10 +43,12 @@ from narwhals._arrow.typing import ArrowArray from narwhals._pandas_like.dataframe import PandasLikeDataFrame + from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals.dtypes import DType from narwhals.typing import _1DArray from narwhals.typing import _AnyDArray from narwhals.utils import Version + from narwhals.utils import _FullContext PANDAS_TO_NUMPY_DTYPE_NO_MISSING = { "Int64": "int64", @@ -94,7 +96,7 @@ } -class PandasLikeSeries(CompliantSeries): +class PandasLikeSeries(EagerSeries[Any]): def __init__( self: Self, native_series: Any, @@ -116,6 +118,10 @@ def __init__( # the length of the whole dataframe, we just extract the scalar. self._broadcast = False + @property + def native(self) -> Any: + return self._native_series + def __native_namespace__(self: Self) -> ModuleType: if self._implementation in { Implementation.PANDAS, @@ -130,6 +136,13 @@ def __native_namespace__(self: Self) -> ModuleType: def __narwhals_series__(self: Self) -> Self: return self + def __narwhals_namespace__(self) -> PandasLikeNamespace: + from narwhals._pandas_like.namespace import PandasLikeNamespace + + return PandasLikeNamespace( + self._implementation, self._backend_version, self._version + ) + @overload def __getitem__(self: Self, idx: int) -> Any: ... @@ -143,7 +156,7 @@ def __getitem__(self: Self, idx: int | slice | Sequence[int]) -> Any | Self: def _change_version(self: Self, version: Version) -> Self: return self.__class__( - self._native_series, + self.native, implementation=self._implementation, backend_version=self._backend_version, version=version, @@ -162,22 +175,20 @@ def _from_iterable( cls: type[Self], data: Iterable[Any], name: str, - index: Any, *, - implementation: Implementation, - backend_version: tuple[int, ...], - version: Version, + context: _FullContext, + index: Any = None, # NOTE: Originally a liskov substitution principle violation ) -> Self: return cls( native_series_from_iterable( data, name=name, - index=index, - implementation=implementation, + index=[] if index is None else index, + implementation=context._implementation, ), - implementation=implementation, - backend_version=backend_version, - version=version, + implementation=context._implementation, + backend_version=context._backend_version, + version=context._version, ) def __len__(self: Self) -> int: @@ -198,10 +209,6 @@ def dtype(self: Self) -> DType: ) ) - @property - def native(self) -> Any: - return self._native_series - def ewm_mean( self: Self, *, diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index 3bcd31bc83..8d57e31920 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -38,6 +38,14 @@ def _from_native_expr(self: Self, expr: pl.Expr) -> Self: expr, version=self._version, backend_version=self._backend_version ) + @classmethod + def _from_series(cls, series: Any) -> Self: + return cls( + series._native_series, + version=series._version, + backend_version=series._backend_version, + ) + def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self: # Let Polars do its thing. return self diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index 6f1dc95daa..2bcf62f949 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -37,11 +37,9 @@ def __init__( self._version = version def __getattr__(self: Self, attr: str) -> Any: - from narwhals._polars.expr import PolarsExpr - def func(*args: Any, **kwargs: Any) -> Any: args, kwargs = extract_args_kwargs(args, kwargs) # type: ignore[assignment] - return PolarsExpr( + return self._expr( getattr(pl, attr)(*args, **kwargs), version=self._version, backend_version=self._backend_version, @@ -49,39 +47,35 @@ def func(*args: Any, **kwargs: Any) -> Any: return func + @property + def _expr(self) -> type[PolarsExpr]: + return PolarsExpr + + @property + def _series(self) -> type[PolarsSeries]: + return PolarsSeries + def _create_compliant_series(self, value: Any) -> PolarsSeries: - return PolarsSeries( + return self._series( pl.Series(value), backend_version=self._backend_version, version=self._version ) - def _create_expr_from_series(self, value: Any) -> PolarsExpr: - # Let Polars do its own thing. - return PolarsExpr( - value._native_series, - version=self._version, - backend_version=self._backend_version, - ) - def nth(self: Self, *indices: int) -> PolarsExpr: - from narwhals._polars.expr import PolarsExpr - if self._backend_version < (1, 0, 0): msg = "`nth` is only supported for Polars>=1.0.0. Please use `col` for columns selection instead." raise AttributeError(msg) - return PolarsExpr( + return self._expr( pl.nth(*indices), version=self._version, backend_version=self._backend_version ) def len(self: Self) -> PolarsExpr: - from narwhals._polars.expr import PolarsExpr - if self._backend_version < (0, 20, 5): - return PolarsExpr( + return self._expr( pl.count().alias("len"), version=self._version, backend_version=self._backend_version, ) - return PolarsExpr( + return self._expr( pl.len(), version=self._version, backend_version=self._backend_version ) @@ -123,10 +117,8 @@ def concat( ) def lit(self: Self, value: Any, dtype: DType | None) -> PolarsExpr: - from narwhals._polars.expr import PolarsExpr - if dtype is not None: - return PolarsExpr( + return self._expr( pl.lit( value, dtype=narwhals_to_native_dtype( @@ -136,22 +128,20 @@ def lit(self: Self, value: Any, dtype: DType | None) -> PolarsExpr: version=self._version, backend_version=self._backend_version, ) - return PolarsExpr( + return self._expr( pl.lit(value), version=self._version, backend_version=self._backend_version ) def mean_horizontal(self: Self, *exprs: PolarsExpr) -> PolarsExpr: - from narwhals._polars.expr import PolarsExpr - if self._backend_version < (0, 20, 8): - return PolarsExpr( + return self._expr( pl.sum_horizontal(e._native_expr for e in exprs) / pl.sum_horizontal(1 - e.is_null()._native_expr for e in exprs), version=self._version, backend_version=self._backend_version, ) - return PolarsExpr( + return self._expr( pl.mean_horizontal(e._native_expr for e in exprs), version=self._version, backend_version=self._backend_version, @@ -163,8 +153,6 @@ def concat_str( separator: str, ignore_nulls: bool, ) -> PolarsExpr: - from narwhals._polars.expr import PolarsExpr - pl_exprs: list[pl.Expr] = [expr._native_expr for expr in exprs] if self._backend_version < (0, 20, 6): @@ -193,11 +181,11 @@ def concat_str( exprs=[s + v for s, v in zip(separators, values)], ) - return PolarsExpr( + return self._expr( result, version=self._version, backend_version=self._backend_version ) - return PolarsExpr( + return self._expr( pl.concat_str( pl_exprs, separator=separator, @@ -218,8 +206,6 @@ def __init__(self: Self, version: Version, backend_version: tuple[int, ...]) -> self._backend_version = backend_version def by_dtype(self: Self, dtypes: Iterable[DType]) -> PolarsExpr: - from narwhals._polars.expr import PolarsExpr - native_dtypes = [ narwhals_to_native_dtype( dtype, self._version, self._backend_version @@ -235,10 +221,6 @@ def by_dtype(self: Self, dtypes: Iterable[DType]) -> PolarsExpr: ) def matches(self: Self, pattern: str) -> PolarsExpr: - import polars as pl - - from narwhals._polars.expr import PolarsExpr - return PolarsExpr( pl.selectors.matches(pattern=pattern), version=self._version, @@ -246,8 +228,6 @@ def matches(self: Self, pattern: str) -> PolarsExpr: ) def numeric(self: Self) -> PolarsExpr: - from narwhals._polars.expr import PolarsExpr - return PolarsExpr( pl.selectors.numeric(), version=self._version, @@ -255,8 +235,6 @@ def numeric(self: Self) -> PolarsExpr: ) def boolean(self: Self) -> PolarsExpr: - from narwhals._polars.expr import PolarsExpr - return PolarsExpr( pl.selectors.boolean(), version=self._version, @@ -264,8 +242,6 @@ def boolean(self: Self) -> PolarsExpr: ) def string(self: Self) -> PolarsExpr: - from narwhals._polars.expr import PolarsExpr - return PolarsExpr( pl.selectors.string(), version=self._version, @@ -273,8 +249,6 @@ def string(self: Self) -> PolarsExpr: ) def categorical(self: Self) -> PolarsExpr: - from narwhals._polars.expr import PolarsExpr - return PolarsExpr( pl.selectors.categorical(), version=self._version, @@ -282,8 +256,6 @@ def categorical(self: Self) -> PolarsExpr: ) def all(self: Self) -> PolarsExpr: - from narwhals._polars.expr import PolarsExpr - return PolarsExpr( pl.selectors.all(), version=self._version, @@ -295,10 +267,6 @@ def datetime( time_unit: TimeUnit | Iterable[TimeUnit] | None, time_zone: str | timezone | Iterable[str | timezone | None] | None, ) -> PolarsExpr: - import polars as pl - - from narwhals._polars.expr import PolarsExpr - return PolarsExpr( pl.selectors.datetime(time_unit=time_unit, time_zone=time_zone), # type: ignore[arg-type] version=self._version, diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index fc1f1e27a8..830f562f8a 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -23,6 +23,8 @@ from typing_extensions import Self from narwhals._polars.dataframe import PolarsDataFrame + from narwhals._polars.expr import PolarsExpr + from narwhals._polars.namespace import PolarsNamespace from narwhals.dtypes import DType from narwhals.typing import _1DArray from narwhals.utils import Version @@ -47,6 +49,13 @@ def __init__( def __repr__(self: Self) -> str: # pragma: no cover return "PolarsSeries" + def __narwhals_namespace__(self) -> PolarsNamespace: + from narwhals._polars.namespace import PolarsNamespace + + return PolarsNamespace( + backend_version=self._backend_version, version=self._version + ) + def __narwhals_series__(self: Self) -> Self: return self @@ -90,6 +99,9 @@ def _from_native_object( # scalar return series + def _to_expr(self) -> PolarsExpr: + return self.__narwhals_namespace__()._expr._from_series(self) + def __getattr__(self: Self, attr: str) -> Any: if attr == "as_py": # pragma: no cover raise AttributeError diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 45a801273d..c81804a6a5 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -9,6 +9,7 @@ from typing import Sequence from typing import cast +from narwhals._compliant import LazyExpr from narwhals._expression_parsing import ExprKind from narwhals._spark_like.expr_dt import SparkLikeExprDateTimeNamespace from narwhals._spark_like.expr_list import SparkLikeExprListNamespace @@ -18,7 +19,6 @@ from narwhals._spark_like.utils import maybe_evaluate_expr from narwhals._spark_like.utils import narwhals_to_native_dtype from narwhals.dependencies import get_pyspark -from narwhals.typing import CompliantExpr from narwhals.utils import Implementation from narwhals.utils import not_implemented from narwhals.utils import parse_version @@ -35,7 +35,7 @@ from narwhals.utils import Version -class SparkLikeExpr(CompliantExpr["SparkLikeLazyFrame", "Column"]): # type: ignore[type-var] # (#2044) +class SparkLikeExpr(LazyExpr["SparkLikeLazyFrame", "Column"]): _depth = 0 # Unused, just for compatibility with CompliantExpr def __init__( @@ -622,21 +622,6 @@ def list(self: Self) -> SparkLikeExprListNamespace: def struct(self: Self) -> SparkLikeExprStructNamespace: return SparkLikeExprStructNamespace(self) - arg_min = not_implemented() - arg_max = not_implemented() - arg_true = not_implemented() - head = not_implemented() - tail = not_implemented() - mode = not_implemented() - sort = not_implemented() - rank = not_implemented() - sample = not_implemented() - map_batches = not_implemented() - ewm_mean = not_implemented() - rolling_mean = not_implemented() - rolling_var = not_implemented() - rolling_std = not_implemented() - gather_every = not_implemented() drop_nulls = not_implemented() diff = not_implemented() unique = not_implemented() @@ -647,8 +632,5 @@ def struct(self: Self) -> SparkLikeExprStructNamespace: cum_min = not_implemented() cum_max = not_implemented() cum_prod = not_implemented() - replace_strict = not_implemented() fill_null = not_implemented() quantile = not_implemented() - - cat = not_implemented() # pyright: ignore[reportAssignmentType] diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py index 91cad2bbb6..21d9746712 100644 --- a/narwhals/_spark_like/namespace.py +++ b/narwhals/_spark_like/namespace.py @@ -12,6 +12,7 @@ from typing import Sequence from typing import cast +from narwhals._compliant import CompliantNamespace from narwhals._expression_parsing import combine_alias_output_names from narwhals._expression_parsing import combine_evaluate_output_names from narwhals._spark_like.dataframe import SparkLikeLazyFrame @@ -19,7 +20,6 @@ from narwhals._spark_like.selectors import SparkLikeSelectorNamespace from narwhals._spark_like.utils import maybe_evaluate_expr from narwhals._spark_like.utils import narwhals_to_native_dtype -from narwhals.typing import CompliantNamespace from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names from narwhals.utils import passthrough_column_names @@ -34,7 +34,7 @@ from narwhals.utils import Version -class SparkLikeNamespace(CompliantNamespace["SparkLikeLazyFrame", "Column"]): # type: ignore[type-var] # (#2044) +class SparkLikeNamespace(CompliantNamespace["SparkLikeLazyFrame", "Column"]): def __init__( self: Self, *, diff --git a/narwhals/_spark_like/selectors.py b/narwhals/_spark_like/selectors.py index eb7ab72fae..66654304da 100644 --- a/narwhals/_spark_like/selectors.py +++ b/narwhals/_spark_like/selectors.py @@ -2,25 +2,24 @@ from typing import TYPE_CHECKING -from narwhals._selectors import CompliantSelector -from narwhals._selectors import LazySelectorNamespace +from narwhals._compliant import CompliantSelector +from narwhals._compliant import LazySelectorNamespace from narwhals._spark_like.expr import SparkLikeExpr if TYPE_CHECKING: from pyspark.sql import Column from typing_extensions import Self - from narwhals._selectors import EvalNames - from narwhals._selectors import EvalSeries + from narwhals._compliant import EvalNames + from narwhals._compliant import EvalSeries from narwhals._spark_like.dataframe import SparkLikeLazyFrame from narwhals.utils import _FullContext -# NOTE: See issue regarding ignores (#2044) -class SparkLikeSelectorNamespace(LazySelectorNamespace["SparkLikeLazyFrame", "Column"]): # type: ignore[type-var] +class SparkLikeSelectorNamespace(LazySelectorNamespace["SparkLikeLazyFrame", "Column"]): def _selector( self, - call: EvalSeries[SparkLikeLazyFrame, Column], # type: ignore[type-var] + call: EvalSeries[SparkLikeLazyFrame, Column], evaluate_output_names: EvalNames[SparkLikeLazyFrame], /, ) -> SparkLikeSelector: @@ -40,7 +39,7 @@ def __init__(self: Self, context: _FullContext, /) -> None: self._implementation = context._implementation -class SparkLikeSelector(CompliantSelector["SparkLikeLazyFrame", "Column"], SparkLikeExpr): # type: ignore[type-var, misc] +class SparkLikeSelector(CompliantSelector["SparkLikeLazyFrame", "Column"], SparkLikeExpr): # type: ignore[misc] def _to_expr(self: Self) -> SparkLikeExpr: return SparkLikeExpr( self._call, diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index d89a9dbedb..2bbeb3918a 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -49,10 +49,10 @@ from typing_extensions import ParamSpec from typing_extensions import Self + from narwhals._compliant import IntoCompliantExpr from narwhals.group_by import GroupBy from narwhals.group_by import LazyGroupBy from narwhals.series import Series - from narwhals.typing import IntoCompliantExpr from narwhals.typing import IntoDataFrame from narwhals.typing import IntoExpr from narwhals.typing import IntoFrame @@ -429,7 +429,7 @@ def _extract_compliant(self: Self, arg: Any) -> Any: if isinstance(arg, BaseFrame): return arg._compliant_frame if isinstance(arg, Series): - return plx._create_expr_from_series(arg._compliant_series) + return arg._compliant_series._to_expr() if isinstance(arg, Expr): return arg._to_compliant_expr(self.__narwhals_namespace__()) if isinstance(arg, str): @@ -443,7 +443,7 @@ def _extract_compliant(self: Self, arg: Any) -> Any: ) raise TypeError(msg) if is_numpy_array(arg): - return plx._create_expr_from_series(plx._create_compliant_series(arg)) + return plx._create_compliant_series(arg)._to_expr() raise InvalidIntoExprError.from_invalid_type(type(arg)) @property diff --git a/narwhals/expr.py b/narwhals/expr.py index 0925b03a8b..e1099ca9fe 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -34,9 +34,9 @@ from typing_extensions import Self from typing_extensions import TypeAlias + from narwhals._compliant import CompliantExpr + from narwhals._compliant import CompliantNamespace from narwhals.dtypes import DType - from narwhals.typing import CompliantExpr - from narwhals.typing import CompliantNamespace from narwhals.typing import IntoExpr PS = ParamSpec("PS") diff --git a/narwhals/functions.py b/narwhals/functions.py index a1fe424d3b..db29692b98 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -42,12 +42,12 @@ import pyarrow as pa from typing_extensions import Self + from narwhals._compliant import CompliantExpr + from narwhals._compliant import CompliantNamespace from narwhals.dataframe import DataFrame from narwhals.dataframe import LazyFrame from narwhals.dtypes import DType from narwhals.series import Series - from narwhals.typing import CompliantExpr - from narwhals.typing import CompliantNamespace from narwhals.typing import DTypeBackend from narwhals.typing import IntoDataFrameT from narwhals.typing import IntoExpr diff --git a/narwhals/series.py b/narwhals/series.py index 3879ab4494..5004e9be44 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -77,6 +77,8 @@ def __init__( ) -> None: self._level: Literal["full", "lazy", "interchange"] = level if hasattr(series, "__narwhals_series__"): + # TODO @dangotbanned: Repeat (#2119) for `CompliantSeries` to support typing + # morally: `CompliantSeries` self._compliant_series = series.__narwhals_series__() else: # pragma: no cover msg = f"Expected Polars Series or an object which implements `__narwhals_series__`, got: {type(series)}." diff --git a/narwhals/typing.py b/narwhals/typing.py index f6ff238f25..49ad668335 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -1,50 +1,29 @@ from __future__ import annotations -import sys from typing import TYPE_CHECKING from typing import Any -from typing import Callable -from typing import Iterator from typing import Literal -from typing import Mapping from typing import Protocol -from typing import Sequence from typing import TypeVar from typing import Union -from narwhals.utils import deprecated -from narwhals.utils import unstable - -if not TYPE_CHECKING: - if sys.version_info >= (3, 9): - from typing import Protocol as Protocol38 - else: - from typing import Generic as Protocol38 -else: - # TODO @dangotbanned: Remove after dropping `3.8` (#2084) - # - https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1965921386 - from typing import Protocol as Protocol38 +from narwhals._compliant import CompliantDataFrame +from narwhals._compliant import CompliantLazyFrame +from narwhals._compliant import CompliantSeries if TYPE_CHECKING: from types import ModuleType from typing import Iterable - from typing import Mapping from typing import Sized import numpy as np - from typing_extensions import Self from typing_extensions import TypeAlias from narwhals import dtypes - from narwhals._expression_parsing import ExprKind - from narwhals._selectors import CompliantSelectorNamespace from narwhals.dataframe import DataFrame from narwhals.dataframe import LazyFrame - from narwhals.dtypes import DType from narwhals.expr import Expr from narwhals.series import Series - from narwhals.utils import Implementation - from narwhals.utils import Version # All dataframes supported by Narwhals have a # `columns` property. Their similarities don't extend @@ -62,263 +41,10 @@ class DataFrameLike(Protocol): def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... -class CompliantSeries(Protocol): - @property - def dtype(self) -> DType: ... - @property - def name(self) -> str: ... - @property - def native(self) -> Any: ... - def __narwhals_series__(self) -> CompliantSeries: ... - def alias(self, name: str) -> Self: ... - def _from_native_series(self, series: Any) -> Self: ... - - -CompliantSeriesT_co = TypeVar( - "CompliantSeriesT_co", bound=CompliantSeries, covariant=True -) - - -class CompliantDataFrame(Protocol[CompliantSeriesT_co]): - def __narwhals_dataframe__(self) -> Self: ... - def __narwhals_namespace__(self) -> Any: ... - def simple_select( - self, *column_names: str - ) -> Self: ... # `select` where all args are column names. - def aggregate(self, *exprs: Any) -> Self: - ... # `select` where all args are aggregations or literals - # (so, no broadcasting is necessary). - - @property - def columns(self) -> Sequence[str]: ... - @property - def schema(self) -> Mapping[str, DType]: ... - def get_column(self, name: str) -> CompliantSeriesT_co: ... - def iter_columns(self) -> Iterator[CompliantSeriesT_co]: ... - - -class CompliantLazyFrame(Protocol): - def __narwhals_lazyframe__(self) -> Self: ... - def __narwhals_namespace__(self) -> Any: ... - def simple_select( - self, *column_names: str - ) -> Self: ... # `select` where all args are column names. - def aggregate(self, *exprs: Any) -> Self: - ... # `select` where all args are aggregations or literals - # (so, no broadcasting is necessary). - - @property - def columns(self) -> Sequence[str]: ... - @property - def schema(self) -> Mapping[str, DType]: ... - def _iter_columns(self) -> Iterator[Any]: ... - - -CompliantFrameT = TypeVar( - "CompliantFrameT", bound="CompliantDataFrame[Any] | CompliantLazyFrame" -) - - -class CompliantExpr(Protocol38[CompliantFrameT, CompliantSeriesT_co]): - _implementation: Implementation - _backend_version: tuple[int, ...] - _version: Version - _evaluate_output_names: Callable[[CompliantFrameT], Sequence[str]] - _alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None - _depth: int - _function_name: str - - def __call__(self, df: CompliantFrameT) -> Sequence[CompliantSeriesT_co]: ... - def __narwhals_expr__(self) -> None: ... - def __narwhals_namespace__( - self, - ) -> CompliantNamespace[CompliantFrameT, CompliantSeriesT_co]: ... - def is_null(self) -> Self: ... - def abs(self) -> Self: ... - def all(self) -> Self: ... - def any(self) -> Self: ... - def alias(self, name: str) -> Self: ... - def cast(self, dtype: DType | type[DType]) -> Self: ... - def count(self) -> Self: ... - def min(self) -> Self: ... - def max(self) -> Self: ... - def arg_min(self) -> Self: ... - def arg_max(self) -> Self: ... - def arg_true(self) -> Self: ... - def mean(self) -> Self: ... - def sum(self) -> Self: ... - def median(self) -> Self: ... - def skew(self) -> Self: ... - def std(self, *, ddof: int) -> Self: ... - def var(self, *, ddof: int) -> Self: ... - def n_unique(self) -> Self: ... - def null_count(self) -> Self: ... - def drop_nulls(self) -> Self: ... - def fill_null( - self, - value: Any | None, - strategy: Literal["forward", "backward"] | None, - limit: int | None, - ) -> Self: ... - def diff(self) -> Self: ... - def unique(self) -> Self: ... - def len(self) -> Self: ... - def round(self, decimals: int) -> Self: ... - def mode(self) -> Self: ... - def head(self, n: int) -> Self: ... - def tail(self, n: int) -> Self: ... - def shift(self, n: int) -> Self: ... - def is_finite(self) -> Self: ... - def is_nan(self) -> Self: ... - def is_unique(self) -> Self: ... - def is_first_distinct(self) -> Self: ... - def is_last_distinct(self) -> Self: ... - def cum_sum(self, *, reverse: bool) -> Self: ... - def cum_count(self, *, reverse: bool) -> Self: ... - def cum_min(self, *, reverse: bool) -> Self: ... - def cum_max(self, *, reverse: bool) -> Self: ... - def cum_prod(self, *, reverse: bool) -> Self: ... - def is_in(self, other: Any) -> Self: ... - def sort(self, *, descending: bool, nulls_last: bool) -> Self: ... - def rank( - self, - method: Literal["average", "min", "max", "dense", "ordinal"], - *, - descending: bool, - ) -> Self: ... - def replace_strict( - self, - old: Sequence[Any] | Mapping[Any, Any], - new: Sequence[Any], - *, - return_dtype: DType | type[DType] | None, - ) -> Self: ... - def over( - self: Self, keys: Sequence[str], kind: ExprKind, order_by: Sequence[str] | None - ) -> Self: ... - def sample( - self, - n: int | None, - *, - fraction: float | None, - with_replacement: bool, - seed: int | None, - ) -> Self: ... - def quantile( - self, - quantile: float, - interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"], - ) -> Self: ... - def map_batches( - self, - function: Callable[[CompliantSeries], CompliantExpr[Any, Any]], - return_dtype: DType | type[DType] | None, - ) -> Self: ... - - @property - def str(self) -> Any: ... - @property - def name(self) -> Any: ... - @property - def dt(self) -> Any: ... - @property - def cat(self) -> Any: ... - @property - def list(self) -> Any: ... - @property - def struct(self) -> Any: ... - - @unstable - def ewm_mean( - self, - *, - com: float | None, - span: float | None, - half_life: float | None, - alpha: float | None, - adjust: bool, - min_samples: int, - ignore_nulls: bool, - ) -> Self: ... - - @unstable - def rolling_sum( - self, - window_size: int, - *, - min_samples: int, - center: bool, - ) -> Self: ... - - @unstable - def rolling_mean( - self, - window_size: int, - *, - min_samples: int, - center: bool, - ) -> Self: ... - - @unstable - def rolling_var( - self, - window_size: int, - *, - min_samples: int, - center: bool, - ddof: int, - ) -> Self: ... - - @unstable - def rolling_std( - self, - window_size: int, - *, - min_samples: int, - center: bool, - ddof: int, - ) -> Self: ... - - @deprecated("Since `1.22.0`") - def gather_every(self, n: int, offset: int) -> Self: ... - def __and__(self, other: Any) -> Self: ... - def __or__(self, other: Any) -> Self: ... - def __add__(self, other: Any) -> Self: ... - def __sub__(self, other: Any) -> Self: ... - def __mul__(self, other: Any) -> Self: ... - def __floordiv__(self, other: Any) -> Self: ... - def __truediv__(self, other: Any) -> Self: ... - def __mod__(self, other: Any) -> Self: ... - def __pow__(self, other: Any) -> Self: ... - def __gt__(self, other: Any) -> Self: ... - def __ge__(self, other: Any) -> Self: ... - def __lt__(self, other: Any) -> Self: ... - def __le__(self, other: Any) -> Self: ... - def __invert__(self) -> Self: ... - def broadcast( - self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL] - ) -> Self: ... - - -class CompliantNamespace(Protocol[CompliantFrameT, CompliantSeriesT_co]): - def col( - self, *column_names: str - ) -> CompliantExpr[CompliantFrameT, CompliantSeriesT_co]: ... - def lit( - self, value: Any, dtype: DType | None - ) -> CompliantExpr[CompliantFrameT, CompliantSeriesT_co]: ... - @property - def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ... - - class SupportsNativeNamespace(Protocol): def __native_namespace__(self) -> ModuleType: ... -IntoCompliantExpr: TypeAlias = ( - "CompliantExpr[CompliantFrameT, CompliantSeriesT_co] | CompliantSeriesT_co" -) - IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"] """Anything which can be converted to an expression. diff --git a/narwhals/utils.py b/narwhals/utils.py index 8512c79afe..2d775cfe9f 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -52,13 +52,14 @@ from typing_extensions import TypeAlias from typing_extensions import TypeIs + from narwhals._compliant import CompliantExpr + from narwhals._compliant import CompliantFrameT + from narwhals._compliant import CompliantSeriesOrNativeExprT_co from narwhals.dataframe import DataFrame from narwhals.dataframe import LazyFrame from narwhals.dtypes import DType from narwhals.series import Series from narwhals.typing import CompliantDataFrame - from narwhals.typing import CompliantExpr - from narwhals.typing import CompliantFrameT from narwhals.typing import CompliantLazyFrame from narwhals.typing import CompliantSeries from narwhals.typing import DataFrameLike @@ -1461,8 +1462,8 @@ def is_compliant_series(obj: Any) -> TypeIs[CompliantSeries]: def is_compliant_expr( - obj: CompliantExpr[CompliantFrameT, CompliantSeriesT_co] | Any, -) -> TypeIs[CompliantExpr[CompliantFrameT, CompliantSeriesT_co]]: + obj: CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co] | Any, +) -> TypeIs[CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co]]: return hasattr(obj, "__narwhals_expr__") diff --git a/pyproject.toml b/pyproject.toml index d95912faf3..cb6f1a5781 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -230,6 +230,8 @@ omit = [ 'narwhals/_spark_like/*', # we don't run these in every environment 'tests/ibis_test.py', + # Remove after finishing eager sub-protocols + 'narwhals/_compliant/*', ] exclude_also = [ "if sys.version_info() <",