diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 7236ef0ce6..a60b2f06bb 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -745,7 +745,7 @@ def is_sorted(self, *, descending: bool) -> bool: result = pc.all(pc.less_equal(self.native[:-1], self.native[1:])) return maybe_extract_py_scalar(result, return_py_scalar=True) - def unique(self, *, maintain_order: bool) -> Self: + def unique(self, *, maintain_order: bool = True) -> Self: # TODO(marco): `pc.unique` seems to always maintain order, is that guaranteed? return self._with_native(self.native.unique()) diff --git a/narwhals/_compliant/column.py b/narwhals/_compliant/column.py new file mode 100644 index 0000000000..880bdf605d --- /dev/null +++ b/narwhals/_compliant/column.py @@ -0,0 +1,211 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Protocol + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from typing_extensions import Self + + from narwhals._compliant.any_namespace import ( + CatNamespace, + DateTimeNamespace, + ListNamespace, + StringNamespace, + StructNamespace, + ) + from narwhals._compliant.namespace import CompliantNamespace + from narwhals._utils import Version + from narwhals.typing import ( + ClosedInterval, + FillNullStrategy, + IntoDType, + NonNestedLiteral, + NumericLiteral, + RankMethod, + TemporalLiteral, + ) + +__all__ = ["CompliantColumn"] + + +class CompliantColumn(Protocol): + """Common parts of `Expr`, `Series`.""" + + _version: Version + + def __add__(self, other: Any) -> Self: ... + def __and__(self, other: Any) -> Self: ... + def __eq__(self, other: object) -> Self: ... # type: ignore[override] + def __floordiv__(self, other: Any) -> Self: ... + def __ge__(self, other: Any) -> Self: ... + def __gt__(self, other: Any) -> Self: ... + def __invert__(self) -> Self: ... + def __le__(self, other: Any) -> Self: ... + def __lt__(self, other: Any) -> Self: ... + def __mod__(self, other: Any) -> Self: ... + def __mul__(self, other: Any) -> Self: ... + def __ne__(self, other: object) -> Self: ... # type: ignore[override] + def __or__(self, other: Any) -> Self: ... + def __pow__(self, other: Any) -> Self: ... + def __rfloordiv__(self, other: Any) -> Self: ... + def __rmod__(self, other: Any) -> Self: ... + def __rpow__(self, other: Any) -> Self: ... + def __rsub__(self, other: Any) -> Self: ... + def __rtruediv__(self, other: Any) -> Self: ... + def __sub__(self, other: Any) -> Self: ... + def __truediv__(self, other: Any) -> Self: ... + + def __narwhals_namespace__(self) -> CompliantNamespace[Any, Any]: ... + + def abs(self) -> Self: ... + def alias(self, name: str) -> Self: ... + def cast(self, dtype: IntoDType) -> Self: ... + def clip( + self, + lower_bound: Self | NumericLiteral | TemporalLiteral | None, + upper_bound: Self | NumericLiteral | TemporalLiteral | None, + ) -> Self: ... + def cum_count(self, *, reverse: bool) -> Self: ... + def cum_max(self, *, reverse: bool) -> Self: ... + def cum_min(self, *, reverse: bool) -> Self: ... + def cum_prod(self, *, reverse: bool) -> Self: ... + def cum_sum(self, *, reverse: bool) -> Self: ... + def diff(self) -> Self: ... + def drop_nulls(self) -> Self: ... + def ewm_mean( + self, + *, + com: float | None, + span: float | None, + half_life: float | None, + alpha: float | None, + adjust: bool, + min_samples: int, + ignore_nulls: bool, + ) -> Self: ... + def exp(self) -> Self: ... + def sqrt(self) -> Self: ... + def fill_null( + self, + value: Self | NonNestedLiteral, + strategy: FillNullStrategy | None, + limit: int | None, + ) -> Self: ... + def is_between( + self, lower_bound: Self, upper_bound: Self, closed: ClosedInterval + ) -> Self: + if closed == "left": + return (self >= lower_bound) & (self < upper_bound) + if closed == "right": + return (self > lower_bound) & (self <= upper_bound) + if closed == "none": + return (self > lower_bound) & (self < upper_bound) + return (self >= lower_bound) & (self <= upper_bound) + + def is_close( + self, + other: Self | NumericLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, + ) -> Self: + from decimal import Decimal + + other_abs: Self | NumericLiteral + other_is_nan: Self | bool + other_is_inf: Self | bool + other_is_not_inf: Self | bool + + if isinstance(other, (float, int, Decimal)): + from math import isinf, isnan + + # NOTE: See https://discuss.python.org/t/inferred-type-of-function-that-calls-dunder-abs-abs/101447 + other_abs = other.__abs__() + other_is_nan = isnan(other) + other_is_inf = isinf(other) + + # Define the other_is_not_inf variable to prevent triggering the following warning: + # > DeprecationWarning: Bitwise inversion '~' on bool is deprecated and will be + # > removed in Python 3.16. + other_is_not_inf = not other_is_inf + + else: + other_abs, other_is_nan = other.abs(), other.is_nan() + other_is_not_inf = other.is_finite() | other_is_nan + other_is_inf = ~other_is_not_inf + + rel_threshold = self.abs().clip(lower_bound=other_abs, upper_bound=None) * rel_tol + tolerance = rel_threshold.clip(lower_bound=abs_tol, upper_bound=None) + + self_is_nan = self.is_nan() + self_is_not_inf = self.is_finite() | self_is_nan + + # Values are close if abs_diff <= tolerance, and both finite + is_close = ( + ((self - other).abs() <= tolerance) & self_is_not_inf & other_is_not_inf + ) + + # Handle infinity cases: infinities are close/equal if they have the same sign + self_sign, other_sign = self > 0, other > 0 + is_same_inf = (~self_is_not_inf) & other_is_inf & (self_sign == other_sign) + + # Handle nan cases: + # * If any value is NaN, then False (via `& ~either_nan`) + # * However, if `nans_equals = True` and if _both_ values are NaN, then True + either_nan = self_is_nan | other_is_nan + result = (is_close | is_same_inf) & ~either_nan + + if nans_equal: + both_nan = self_is_nan & other_is_nan + result = result | both_nan + + return result + + def is_duplicated(self) -> Self: + return ~self.is_unique() + + def is_finite(self) -> Self: ... + def is_first_distinct(self) -> Self: ... + def is_in(self, other: Any) -> Self: ... + def is_last_distinct(self) -> Self: ... + def is_nan(self) -> Self: ... + def is_null(self) -> Self: ... + def is_unique(self) -> Self: ... + def log(self, base: float) -> Self: ... + def mode(self) -> Self: ... + def rank(self, method: RankMethod, *, descending: bool) -> Self: ... + def replace_strict( + self, + old: Sequence[Any] | Mapping[Any, Any], + new: Sequence[Any], + *, + return_dtype: IntoDType | None, + ) -> Self: ... + def rolling_mean( + self, window_size: int, *, min_samples: int, center: bool + ) -> Self: ... + def rolling_std( + self, window_size: int, *, min_samples: int, center: bool, ddof: int + ) -> Self: ... + def rolling_sum( + self, window_size: int, *, min_samples: int, center: bool + ) -> Self: ... + def rolling_var( + self, window_size: int, *, min_samples: int, center: bool, ddof: int + ) -> Self: ... + def round(self, decimals: int) -> Self: ... + def shift(self, n: int) -> Self: ... + def unique(self) -> Self: ... + + @property + def str(self) -> StringNamespace[Self]: ... + @property + def dt(self) -> DateTimeNamespace[Self]: ... + @property + def cat(self) -> CatNamespace[Self]: ... + @property + def list(self) -> ListNamespace[Self]: ... + @property + def struct(self) -> StructNamespace[Self]: ... diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index 57a790eb9e..86ad88ea98 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -13,6 +13,7 @@ StringNamespace, StructNamespace, ) +from narwhals._compliant.column import CompliantColumn from narwhals._compliant.namespace import CompliantNamespace from narwhals._compliant.typing import ( AliasName, @@ -27,7 +28,6 @@ LazyExprT, NativeExprT, ) -from narwhals._compliant.utils import IsClose from narwhals._utils import _StoresCompliant from narwhals.dependencies import get_numpy, is_numpy_array @@ -42,6 +42,7 @@ from narwhals._expression_parsing import ExprKind, ExprMetadata from narwhals._utils import Implementation, Version, _LimitedContext from narwhals.typing import ( + ClosedInterval, FillNullStrategy, IntoDType, NonNestedLiteral, @@ -76,9 +77,10 @@ def __eq__(self, value: Any, /) -> Self: ... # type: ignore[override] def __ne__(self, value: Any, /) -> Self: ... # type: ignore[override] -class CompliantExpr(IsClose, Protocol[CompliantFrameT, CompliantSeriesOrNativeExprT_co]): +class CompliantExpr( + CompliantColumn, Protocol[CompliantFrameT, CompliantSeriesOrNativeExprT_co] +): _implementation: Implementation - _version: Version _evaluate_output_names: EvalNames[CompliantFrameT] _alias_output_names: AliasNames | None _metadata: ExprMetadata | None @@ -108,12 +110,8 @@ def fn(df: CompliantFrameT) -> Sequence[str]: return fn - def is_null(self) -> Self: ... - def abs(self) -> Self: ... def all(self) -> Self: ... def any(self) -> Self: ... - def alias(self, name: str) -> Self: ... - def cast(self, dtype: IntoDType) -> Self: ... def count(self) -> Self: ... def min(self) -> Self: ... def max(self) -> Self: ... @@ -126,41 +124,7 @@ def std(self, *, ddof: int) -> Self: ... def var(self, *, ddof: int) -> Self: ... def n_unique(self) -> Self: ... def null_count(self) -> Self: ... - def drop_nulls(self) -> Self: ... - def fill_null( - self, - value: Self | NonNestedLiteral, - strategy: FillNullStrategy | None, - limit: int | None, - ) -> Self: ... - def diff(self) -> Self: ... - def exp(self) -> Self: ... - def sqrt(self) -> Self: ... - def unique(self) -> Self: ... def len(self) -> Self: ... - def log(self, base: float) -> Self: ... - def round(self, decimals: int) -> Self: ... - def mode(self) -> Self: ... - def shift(self, n: int) -> Self: ... - def is_finite(self) -> Self: ... - def is_nan(self) -> Self: ... - def is_unique(self) -> Self: ... - def is_first_distinct(self) -> Self: ... - def is_last_distinct(self) -> Self: ... - def cum_sum(self, *, reverse: bool) -> Self: ... - def cum_count(self, *, reverse: bool) -> Self: ... - def cum_min(self, *, reverse: bool) -> Self: ... - def cum_max(self, *, reverse: bool) -> Self: ... - def cum_prod(self, *, reverse: bool) -> Self: ... - def is_in(self, other: Any) -> Self: ... - def rank(self, method: RankMethod, *, descending: bool) -> Self: ... - def replace_strict( - self, - old: Sequence[Any] | Mapping[Any, Any], - new: Sequence[Any], - *, - return_dtype: IntoDType | None, - ) -> Self: ... def over(self, partition_by: Sequence[str], order_by: Sequence[str]) -> Self: ... def quantile( self, quantile: float, interpolation: RollingInterpolationMethod @@ -170,55 +134,6 @@ def map_batches( function: Callable[[CompliantSeries[Any]], CompliantExpr[Any, Any]], return_dtype: IntoDType | None, ) -> Self: ... - - def clip( - self, - lower_bound: Self | NumericLiteral | TemporalLiteral | None, - upper_bound: Self | NumericLiteral | TemporalLiteral | None, - ) -> Self: ... - - def ewm_mean( - self, - *, - com: float | None, - span: float | None, - half_life: float | None, - alpha: float | None, - adjust: bool, - min_samples: int, - ignore_nulls: bool, - ) -> Self: ... - - def rolling_sum( - self, window_size: int, *, min_samples: int, center: bool - ) -> Self: ... - - def rolling_mean( - self, window_size: int, *, min_samples: int, center: bool - ) -> Self: ... - - def rolling_var( - self, window_size: int, *, min_samples: int, center: bool, ddof: int - ) -> Self: ... - - def rolling_std( - self, window_size: int, *, min_samples: int, center: bool, ddof: int - ) -> Self: ... - - def __and__(self, other: Any) -> Self: ... - def __or__(self, other: Any) -> Self: ... - def __add__(self, other: Any) -> Self: ... - def __sub__(self, other: Any) -> Self: ... - def __mul__(self, other: Any) -> Self: ... - def __floordiv__(self, other: Any) -> Self: ... - def __truediv__(self, other: Any) -> Self: ... - def __mod__(self, other: Any) -> Self: ... - def __pow__(self, other: Any) -> Self: ... - def __gt__(self, other: Any) -> Self: ... - def __ge__(self, other: Any) -> Self: ... - def __lt__(self, other: Any) -> Self: ... - def __le__(self, other: Any) -> Self: ... - def __invert__(self) -> Self: ... def broadcast( self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL] ) -> Self: ... @@ -242,18 +157,8 @@ def _evaluate_aliases( names = self._evaluate_output_names(frame) return alias(names) if (alias := self._alias_output_names) else names - @property - def str(self) -> StringNamespace[Self]: ... @property def name(self) -> NameNamespace[Self]: ... - @property - def dt(self) -> DateTimeNamespace[Self]: ... - @property - def cat(self) -> CatNamespace[Self]: ... - @property - def list(self) -> ListNamespace[Self]: ... - @property - def struct(self) -> StructNamespace[Self]: ... class DepthTrackingExpr( @@ -543,65 +448,71 @@ def func(df: EagerDataFrameT) -> list[EagerSeriesT]: def cast(self, dtype: IntoDType) -> Self: return self._reuse_series("cast", dtype=dtype) + def _with_binary(self, operator: str, other: Self | Any, /) -> Self: + return self._reuse_series(operator, other=other) + + def _with_binary_right(self, operator: str, other: Self | Any, /) -> Self: + return self.alias("literal")._reuse_series(operator, other=other) + def __eq__(self, other: Self | Any) -> Self: # type: ignore[override] - return self._reuse_series("__eq__", other=other) + return self._with_binary("__eq__", other) def __ne__(self, other: Self | Any) -> Self: # type: ignore[override] - return self._reuse_series("__ne__", other=other) + return self._with_binary("__ne__", other) def __ge__(self, other: Self | Any) -> Self: - return self._reuse_series("__ge__", other=other) + return self._with_binary("__ge__", other) def __gt__(self, other: Self | Any) -> Self: - return self._reuse_series("__gt__", other=other) + return self._with_binary("__gt__", other) def __le__(self, other: Self | Any) -> Self: - return self._reuse_series("__le__", other=other) + return self._with_binary("__le__", other) def __lt__(self, other: Self | Any) -> Self: - return self._reuse_series("__lt__", other=other) + return self._with_binary("__lt__", other) def __and__(self, other: Self | bool | Any) -> Self: - return self._reuse_series("__and__", other=other) + return self._with_binary("__and__", other) def __or__(self, other: Self | bool | Any) -> Self: - return self._reuse_series("__or__", other=other) + return self._with_binary("__or__", other) def __add__(self, other: Self | Any) -> Self: - return self._reuse_series("__add__", other=other) + return self._with_binary("__add__", other) def __sub__(self, other: Self | Any) -> Self: - return self._reuse_series("__sub__", other=other) + return self._with_binary("__sub__", other) def __rsub__(self, other: Self | Any) -> Self: - return self.alias("literal")._reuse_series("__rsub__", other=other) + return self._with_binary_right("__rsub__", other) def __mul__(self, other: Self | Any) -> Self: - return self._reuse_series("__mul__", other=other) + return self._with_binary("__mul__", other) def __truediv__(self, other: Self | Any) -> Self: - return self._reuse_series("__truediv__", other=other) + return self._with_binary("__truediv__", other) def __rtruediv__(self, other: Self | Any) -> Self: - return self.alias("literal")._reuse_series("__rtruediv__", other=other) + return self._with_binary_right("__rtruediv__", other) def __floordiv__(self, other: Self | Any) -> Self: - return self._reuse_series("__floordiv__", other=other) + return self._with_binary("__floordiv__", other) def __rfloordiv__(self, other: Self | Any) -> Self: - return self.alias("literal")._reuse_series("__rfloordiv__", other=other) + return self._with_binary_right("__rfloordiv__", other) def __pow__(self, other: Self | Any) -> Self: - return self._reuse_series("__pow__", other=other) + return self._with_binary("__pow__", other) def __rpow__(self, other: Self | Any) -> Self: - return self.alias("literal")._reuse_series("__rpow__", other=other) + return self._with_binary_right("__rpow__", other) def __mod__(self, other: Self | Any) -> Self: - return self._reuse_series("__mod__", other=other) + return self._with_binary("__mod__", other) def __rmod__(self, other: Self | Any) -> Self: - return self.alias("literal")._reuse_series("__rmod__", other=other) + return self._with_binary_right("__rmod__", other) # Unary def __invert__(self) -> Self: @@ -903,6 +814,13 @@ def exp(self) -> Self: def sqrt(self) -> Self: return self._reuse_series("sqrt") + def is_between( + self, lower_bound: Any, upper_bound: Any, closed: ClosedInterval + ) -> Self: + return self._reuse_series( + "is_between", lower_bound=lower_bound, upper_bound=upper_bound, closed=closed + ) + def is_close( self, other: Self | NumericLiteral, diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index e8a88590d3..b49157e1f7 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -9,6 +9,7 @@ StringNamespace, StructNamespace, ) +from narwhals._compliant.column import CompliantColumn from narwhals._compliant.typing import ( CompliantSeriesT_co, EagerDataFrameAny, @@ -16,7 +17,6 @@ NativeSeriesT, NativeSeriesT_co, ) -from narwhals._compliant.utils import IsClose from narwhals._translate import FromIterable, FromNative, NumpyConvertible, ToNarwhals from narwhals._typing_compat import TypeVar, assert_never from narwhals._utils import ( @@ -28,7 +28,7 @@ ) if TYPE_CHECKING: - from collections.abc import Iterable, Iterator, Mapping, Sequence + from collections.abc import Iterable, Iterator, Sequence from types import ModuleType import pandas as pd @@ -38,22 +38,16 @@ from narwhals._compliant.dataframe import CompliantDataFrame from narwhals._compliant.expr import CompliantExpr, EagerExpr - from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace + from narwhals._compliant.namespace import EagerNamespace from narwhals._utils import Implementation, Version, _LimitedContext from narwhals.dtypes import DType from narwhals.series import Series from narwhals.typing import ( - ClosedInterval, - FillNullStrategy, Into1DArray, IntoDType, MultiIndexSelector, - NonNestedLiteral, - NumericLiteral, - RankMethod, RollingInterpolationMethod, SizedMultiIndexSelector, - TemporalLiteral, _1DArray, _SliceIndex, ) @@ -79,15 +73,14 @@ class HistData(TypedDict, Generic[NativeSeriesT, "_CountsT_co"]): class CompliantSeries( - IsClose, NumpyConvertible["_1DArray", "Into1DArray"], FromIterable, FromNative[NativeSeriesT], ToNarwhals["Series[NativeSeriesT]"], + CompliantColumn, Protocol[NativeSeriesT], ): _implementation: Implementation - _version: Version @property def dtype(self) -> DType: ... @@ -98,7 +91,6 @@ def native(self) -> NativeSeriesT: ... def __narwhals_series__(self) -> Self: return self - def __narwhals_namespace__(self) -> CompliantNamespace[Any, Any]: ... def __native_namespace__(self) -> ModuleType: ... def __array__(self, dtype: Any, *, copy: bool | None) -> _1DArray: ... def __contains__(self, other: Any) -> bool: ... @@ -127,122 +119,35 @@ def from_iterable( def to_narwhals(self) -> Series[NativeSeriesT]: return self._version.series(self, level="full") - # Operators - def __add__(self, other: Any) -> Self: ... - def __and__(self, other: Any) -> Self: ... - def __eq__(self, other: object) -> Self: ... # type: ignore[override] - def __floordiv__(self, other: Any) -> Self: ... - def __ge__(self, other: Any) -> Self: ... - def __gt__(self, other: Any) -> Self: ... - def __invert__(self) -> Self: ... - def __le__(self, other: Any) -> Self: ... - def __lt__(self, other: Any) -> Self: ... - def __mod__(self, other: Any) -> Self: ... - def __mul__(self, other: Any) -> Self: ... - def __ne__(self, other: object) -> Self: ... # type: ignore[override] - def __or__(self, other: Any) -> Self: ... - def __pow__(self, other: Any) -> Self: ... def __radd__(self, other: Any) -> Self: ... def __rand__(self, other: Any) -> Self: ... - def __rfloordiv__(self, other: Any) -> Self: ... - def __rmod__(self, other: Any) -> Self: ... def __rmul__(self, other: Any) -> Self: ... def __ror__(self, other: Any) -> Self: ... - def __rpow__(self, other: Any) -> Self: ... - def __rsub__(self, other: Any) -> Self: ... - def __rtruediv__(self, other: Any) -> Self: ... - def __sub__(self, other: Any) -> Self: ... - def __truediv__(self, other: Any) -> Self: ... - - def abs(self) -> Self: ... - def alias(self, name: str) -> Self: ... def all(self) -> bool: ... def any(self) -> bool: ... def arg_max(self) -> int: ... def arg_min(self) -> int: ... def arg_true(self) -> Self: ... - def cast(self, dtype: IntoDType) -> Self: ... - def clip( - self, - lower_bound: Self | NumericLiteral | TemporalLiteral | None, - upper_bound: Self | NumericLiteral | TemporalLiteral | None, - ) -> Self: ... def count(self) -> int: ... - def cum_count(self, *, reverse: bool) -> Self: ... - def cum_max(self, *, reverse: bool) -> Self: ... - def cum_min(self, *, reverse: bool) -> Self: ... - def cum_prod(self, *, reverse: bool) -> Self: ... - def cum_sum(self, *, reverse: bool) -> Self: ... - def diff(self) -> Self: ... - def drop_nulls(self) -> Self: ... - def ewm_mean( - self, - *, - com: float | None, - span: float | None, - half_life: float | None, - alpha: float | None, - adjust: bool, - min_samples: int, - ignore_nulls: bool, - ) -> Self: ... - def exp(self) -> Self: ... - def sqrt(self) -> Self: ... - def fill_null( - self, - value: Self | NonNestedLiteral, - strategy: FillNullStrategy | None, - limit: int | None, - ) -> Self: ... def filter(self, predicate: Any) -> Self: ... def gather_every(self, n: int, offset: int) -> Self: ... def head(self, n: int) -> Self: ... - def is_between( - self, lower_bound: Any, upper_bound: Any, closed: ClosedInterval - ) -> Self: ... - def is_finite(self) -> Self: ... - def is_first_distinct(self) -> Self: ... - def is_in(self, other: Any) -> Self: ... - def is_last_distinct(self) -> Self: ... - def is_nan(self) -> Self: ... - def is_null(self) -> Self: ... + def is_empty(self) -> bool: + return self.len() == 0 + def is_sorted(self, *, descending: bool) -> bool: ... - def is_unique(self) -> Self: ... def item(self, index: int | None) -> Any: ... def kurtosis(self) -> float | None: ... def len(self) -> int: ... - def log(self, base: float) -> Self: ... def max(self) -> Any: ... def mean(self) -> float: ... def median(self) -> float: ... def min(self) -> Any: ... - def mode(self) -> Self: ... def n_unique(self) -> int: ... def null_count(self) -> int: ... def quantile( self, quantile: float, interpolation: RollingInterpolationMethod ) -> float: ... - def rank(self, method: RankMethod, *, descending: bool) -> Self: ... - def replace_strict( - self, - old: Sequence[Any] | Mapping[Any, Any], - new: Sequence[Any], - *, - return_dtype: IntoDType | None, - ) -> Self: ... - def rolling_mean( - self, window_size: int, *, min_samples: int, center: bool - ) -> Self: ... - def rolling_std( - self, window_size: int, *, min_samples: int, center: bool, ddof: int - ) -> Self: ... - def rolling_sum( - self, window_size: int, *, min_samples: int, center: bool - ) -> Self: ... - def rolling_var( - self, window_size: int, *, min_samples: int, center: bool, ddof: int - ) -> Self: ... - def round(self, decimals: int) -> Self: ... def sample( self, n: int | None, @@ -266,7 +171,7 @@ def to_frame(self) -> CompliantDataFrame[Self, Any, Any, Any]: ... def to_list(self) -> list[Any]: ... def to_pandas(self) -> pd.Series[Any]: ... def to_polars(self) -> pl.Series: ... - def unique(self, *, maintain_order: bool) -> Self: ... + def unique(self, *, maintain_order: bool = False) -> Self: ... def value_counts( self, *, sort: bool, parallel: bool, name: str | None, normalize: bool ) -> CompliantDataFrame[Self, Any, Any, Any]: ... @@ -286,17 +191,6 @@ def hist_from_bin_count( """`Series.hist(bins=None, bin_count=...)`.""" ... - @property - def str(self) -> StringNamespace[Self]: ... - @property - def dt(self) -> DateTimeNamespace[Self]: ... - @property - def cat(self) -> CatNamespace[Self]: ... - @property - def list(self) -> ListNamespace[Self]: ... - @property - def struct(self) -> StructNamespace[Self]: ... - class EagerSeries(CompliantSeries[NativeSeriesT], Protocol[NativeSeriesT]): _native_series: Any diff --git a/narwhals/_compliant/utils.py b/narwhals/_compliant/utils.py deleted file mode 100644 index 3a88e39561..0000000000 --- a/narwhals/_compliant/utils.py +++ /dev/null @@ -1,88 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING, Any, Protocol - -if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals.typing import NumericLiteral, TemporalLiteral - - -class IsClose(Protocol): - """Every member defined is a dependency of `is_close` method.""" - - def __and__(self, other: Any) -> Self: ... - def __or__(self, other: Any) -> Self: ... - def __invert__(self) -> Self: ... - def __sub__(self, other: Any) -> Self: ... - def __mul__(self, other: Any) -> Self: ... - def __eq__(self, other: Self | Any) -> Self: ... # type: ignore[override] - def __gt__(self, other: Any) -> Self: ... - def __le__(self, other: Any) -> Self: ... - def abs(self) -> Self: ... - def is_nan(self) -> Self: ... - def is_finite(self) -> Self: ... - def clip( - self, - lower_bound: Self | NumericLiteral | TemporalLiteral | None, - upper_bound: Self | NumericLiteral | TemporalLiteral | None, - ) -> Self: ... - def is_close( - self, - other: Self | NumericLiteral, - *, - abs_tol: float, - rel_tol: float, - nans_equal: bool, - ) -> Self: - from decimal import Decimal - - other_abs: Self | NumericLiteral - other_is_nan: Self | bool - other_is_inf: Self | bool - other_is_not_inf: Self | bool - - if isinstance(other, (float, int, Decimal)): - from math import isinf, isnan - - # NOTE: See https://discuss.python.org/t/inferred-type-of-function-that-calls-dunder-abs-abs/101447 - other_abs = other.__abs__() - other_is_nan = isnan(other) - other_is_inf = isinf(other) - - # Define the other_is_not_inf variable to prevent triggering the following warning: - # > DeprecationWarning: Bitwise inversion '~' on bool is deprecated and will be - # > removed in Python 3.16. - other_is_not_inf = not other_is_inf - - else: - other_abs, other_is_nan = other.abs(), other.is_nan() - other_is_not_inf = other.is_finite() | other_is_nan - other_is_inf = ~other_is_not_inf - - rel_threshold = self.abs().clip(lower_bound=other_abs, upper_bound=None) * rel_tol - tolerance = rel_threshold.clip(lower_bound=abs_tol, upper_bound=None) - - self_is_nan = self.is_nan() - self_is_not_inf = self.is_finite() | self_is_nan - - # Values are close if abs_diff <= tolerance, and both finite - is_close = ( - ((self - other).abs() <= tolerance) & self_is_not_inf & other_is_not_inf - ) - - # Handle infinity cases: infinities are close/equal if they have the same sign - self_sign, other_sign = self > 0, other > 0 - is_same_inf = (~self_is_not_inf) & other_is_inf & (self_sign == other_sign) - - # Handle nan cases: - # * If any value is NaN, then False (via `& ~either_nan`) - # * However, if `nans_equals = True` and if _both_ values are NaN, then True - either_nan = self_is_nan | other_is_nan - result = (is_close | is_same_inf) & ~either_nan - - if nans_equal: - both_nan = self_is_nan & other_is_nan - result = result | both_nan - - return result diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index 1210ee08e0..80b0c9dee1 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -343,6 +343,8 @@ def _eval_names_indices(indices: Sequence[int], /) -> EvalNames[PolarsDataFrame] fill_null: Method[Self] gather_every: Method[Self] head: Method[Self] + is_between: Method[Self] + is_duplicated: Method[Self] is_finite: Method[Self] is_first_distinct: Method[Self] is_in: Method[Self] @@ -372,6 +374,11 @@ def _eval_names_indices(indices: Sequence[int], /) -> EvalNames[PolarsDataFrame] tail: Method[Self] unique: Method[Self] var: Method[Self] + __rfloordiv__: Method[Self] + __rsub__: Method[Self] + __rmod__: Method[Self] + __rpow__: Method[Self] + __rtruediv__: Method[Self] class PolarsExprNamespace(PolarsAnyNamespace[PolarsExpr, pl.Expr]): diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index 6dfa5317b9..047952916b 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -34,7 +34,7 @@ class PolarsNamespace: when: Method[CompliantWhen[PolarsDataFrame, PolarsSeries, PolarsExpr]] - _implementation = Implementation.POLARS + _implementation: Implementation = Implementation.POLARS @property def _backend_version(self) -> tuple[int, ...]: diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index e94f712534..0f219d1db2 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -92,6 +92,8 @@ "head", "is_between", "is_close", + "is_duplicated", + "is_empty", "is_finite", "is_first_distinct", "is_in", @@ -677,6 +679,8 @@ def struct(self) -> PolarsSeriesStructNamespace: gather_every: Method[Self] head: Method[Self] is_between: Method[Self] + is_duplicated: Method[Self] + is_empty: Method[bool] is_finite: Method[Self] is_first_distinct: Method[Self] is_in: Method[Self] diff --git a/narwhals/expr.py b/narwhals/expr.py index 935882dac1..6d34f7789c 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -1,6 +1,7 @@ from __future__ import annotations import math +import operator as op from collections.abc import Iterable, Mapping, Sequence from typing import TYPE_CHECKING, Any, Callable @@ -179,177 +180,91 @@ def cast(self, dtype: IntoDType) -> Self: ) # --- binary --- - def __eq__(self, other: Self | Any) -> Self: # type: ignore[override] + def _with_binary( + self, + function: Callable[[Any, Any], Any], + other: Self | Any, + *, + str_as_lit: bool = True, + ) -> Self: return self.__class__( lambda plx: apply_n_ary_operation( - plx, lambda x, y: x == y, self, other, str_as_lit=True + plx, function, self, other, str_as_lit=str_as_lit ), ExprMetadata.from_binary_op(self, other), ) + def __eq__(self, other: Self | Any) -> Self: # type: ignore[override] + return self._with_binary(op.eq, other) + def __ne__(self, other: Self | Any) -> Self: # type: ignore[override] - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x != y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.ne, other) def __and__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x & y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.and_, other) def __rand__(self, other: Any) -> Self: return (self & other).alias("literal") # type: ignore[no-any-return] def __or__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x | y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.or_, other) def __ror__(self, other: Any) -> Self: return (self | other).alias("literal") # type: ignore[no-any-return] def __add__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x + y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.add, other) def __radd__(self, other: Any) -> Self: return (self + other).alias("literal") # type: ignore[no-any-return] def __sub__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x - y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.sub, other) def __rsub__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x.__rsub__(y), self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(lambda x, y: x.__rsub__(y), other) def __truediv__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x / y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.truediv, other) def __rtruediv__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x.__rtruediv__(y), self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(lambda x, y: x.__rtruediv__(y), other) def __mul__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x * y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.mul, other) def __rmul__(self, other: Any) -> Self: return (self * other).alias("literal") # type: ignore[no-any-return] def __le__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x <= y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.le, other) def __lt__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x < y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.lt, other) def __gt__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x > y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.gt, other) def __ge__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x >= y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.ge, other) def __pow__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x**y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.pow, other) def __rpow__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x.__rpow__(y), self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(lambda x, y: x.__rpow__(y), other) def __floordiv__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x // y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.floordiv, other) def __rfloordiv__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x.__rfloordiv__(y), self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(lambda x, y: x.__rfloordiv__(y), other) def __mod__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x % y, self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(op.mod, other) def __rmod__(self, other: Any) -> Self: - return self.__class__( - lambda plx: apply_n_ary_operation( - plx, lambda x, y: x.__rmod__(y), self, other, str_as_lit=True - ), - ExprMetadata.from_binary_op(self, other), - ) + return self._with_binary(lambda x, y: x.__rmod__(y), other) # --- unary --- def __invert__(self) -> Self: @@ -1003,32 +918,24 @@ def is_between( | 4 5 False | └──────────────────┘ """ - - def func( - compliant_expr: CompliantExpr[Any, Any], - lb: CompliantExpr[Any, Any], - ub: CompliantExpr[Any, Any], - ) -> CompliantExpr[Any, Any]: - if closed == "left": - return (compliant_expr >= lb) & (compliant_expr < ub) - if closed == "right": - return (compliant_expr > lb) & (compliant_expr <= ub) - if closed == "none": - return (compliant_expr > lb) & (compliant_expr < ub) - return (compliant_expr >= lb) & (compliant_expr <= ub) - + metadata = combine_metadata( + self, + lower_bound, + upper_bound, + str_as_lit=False, + allow_multi_output=False, + to_single_output=False, + ) return self.__class__( lambda plx: apply_n_ary_operation( - plx, func, self, lower_bound, upper_bound, str_as_lit=False - ), - combine_metadata( + plx, + lambda slf, lb, ub: slf.is_between(lb, ub, closed=closed), self, lower_bound, upper_bound, str_as_lit=False, - allow_multi_output=False, - to_single_output=False, ), + metadata, ) def is_in(self, other: Any) -> Self: @@ -1387,7 +1294,7 @@ def is_duplicated(self) -> Self: |3 1 c True False| └─────────────────────────────────────────┘ """ - return ~self.is_unique() + return self._with_window(lambda plx: self._to_compliant_expr(plx).is_duplicated()) def is_unique(self) -> Self: r"""Return a boolean mask indicating unique values. diff --git a/narwhals/series.py b/narwhals/series.py index 0add7675ff..af9094de58 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -1588,7 +1588,7 @@ def is_duplicated(self) -> Self: ] ] """ - return ~self.is_unique() + return self._with_compliant(self._compliant_series.is_duplicated()) def is_empty(self) -> bool: r"""Check if the series is empty. @@ -1605,7 +1605,7 @@ def is_empty(self) -> bool: >>> s_nw.filter(s_nw > 10).is_empty() True """ - return self._compliant_series.len() == 0 + return self._compliant_series.is_empty() def is_unique(self) -> Self: r"""Get a mask of all unique rows in the Series.