Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ def tail(self, n: int) -> Self:
return self._with_native(self.native.slice(max(0, num_rows - n)))
return self._with_native(self.native.slice(abs(n)))

def is_in(self, other: Any) -> Self:
def is_in(self, other: Sequence[Any] | ChunkedArrayAny) -> Self:
if self._is_native(other):
value_set: ArrayOrChunkedArray = other
else:
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_compliant/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def is_duplicated(self) -> Self:

def is_finite(self) -> Self: ...
def is_first_distinct(self) -> Self: ...
def is_in(self, other: Any) -> Self: ...
def is_in(self, other: Sequence[Any]) -> Self: ...
def is_last_distinct(self) -> Self: ...
def is_nan(self) -> Self: ...
def is_null(self) -> Self: ...
Expand Down
6 changes: 4 additions & 2 deletions narwhals/_compliant/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
if TYPE_CHECKING:
from collections.abc import Mapping, Sequence

from typing_extensions import Self, TypeIs
from typing_extensions import Self, TypeAlias, TypeIs

from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace
from narwhals._compliant.series import CompliantSeries
Expand All @@ -61,6 +61,8 @@

__all__ = ["CompliantExpr", "DepthTrackingExpr", "EagerExpr", "LazyExpr", "NativeExpr"]

Incomplete: TypeAlias = "Any"


class NativeExpr(Protocol):
"""An `Expr`-like object from a package with [Lazy-only support](https://narwhals-dev.github.io/narwhals/extending/#levels-of-support).
Expand Down Expand Up @@ -613,7 +615,7 @@ def fill_null(
"fill_null", value=value, scalar_kwargs={"strategy": strategy, "limit": limit}
)

def is_in(self, other: Any) -> Self:
def is_in(self, other: Sequence[Any] | Incomplete) -> Self:
return self._reuse_series("is_in", other=other)

def arg_true(self) -> Self:
Expand Down
1 change: 1 addition & 0 deletions narwhals/_compliant/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def head(self, n: int) -> Self: ...
def is_empty(self) -> bool:
return self.len() == 0

def is_in(self, other: Sequence[Any] | NativeSeriesT) -> Self: ...
def is_sorted(self, *, descending: bool) -> bool: ...
def item(self, index: int | None) -> Any: ...
def kurtosis(self) -> float | None: ...
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ def func(expr: dx.Series) -> dx.Series:

return self._with_callable(func, "is_unique")

def is_in(self, other: Any) -> Self:
def is_in(self, other: Sequence[Any]) -> Self:
return self._with_callable(lambda expr: expr.isin(other), "is_in")

def null_count(self) -> Self:
Expand Down
8 changes: 4 additions & 4 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@

PandasHistData: TypeAlias = "HistData[pd.Series[Any], list[float]]"


Incomplete: TypeAlias = "Any"
PANDAS_TO_NUMPY_DTYPE_NO_MISSING = {
"Int64": "int64",
"int64[pyarrow]": "int64",
Expand Down Expand Up @@ -109,7 +109,7 @@
}


class PandasLikeSeries(EagerSeries[Any]):
class PandasLikeSeries(EagerSeries[Incomplete]):
def __init__(
self, native_series: Any, *, implementation: Implementation, version: Version
) -> None:
Expand All @@ -125,7 +125,7 @@ def __init__(
self._broadcast = False

@property
def native(self) -> Any:
def native(self) -> Incomplete:
return self._native_series

def __native_namespace__(self) -> ModuleType:
Expand Down Expand Up @@ -362,7 +362,7 @@ def is_between(
assert_never(closed)
return self._with_native(res).alias(ser.name)

def is_in(self, other: Any) -> Self:
def is_in(self, other: Sequence[Any] | Incomplete) -> Self:
return self._with_native(self.native.isin(other))

def arg_true(self) -> Self:
Expand Down
56 changes: 56 additions & 0 deletions narwhals/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
is_numpy_array_1d_int,
is_pandas_like_dataframe,
is_pandas_like_series,
is_polars_series,
)
from narwhals.exceptions import ColumnNotFoundError, DuplicateError, InvalidOperationError

Expand Down Expand Up @@ -124,6 +125,7 @@
CompliantLazyFrame,
CompliantSeries,
DTypes,
EagerAllowed,
FileSource,
IntoSeriesT,
MultiIndexSelector,
Expand Down Expand Up @@ -2120,3 +2122,57 @@ def extend_bool(
Stolen from https://github.com/pola-rs/polars/blob/b8bfb07a4a37a8d449d6d1841e345817431142df/py-polars/polars/_utils/various.py#L580-L594
"""
return (value,) * n_match if isinstance(value, bool) else tuple(value)


class _CanTo_List(Protocol): # noqa: N801
def to_list(self, *args: Any, **kwds: Any) -> list[Any]: ...


class _CanToList(Protocol):
def tolist(self, *args: Any, **kwds: Any) -> list[Any]: ...
Comment on lines +2127 to +2132
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW I hate this as well πŸ˜‚

Copy link
Member Author

@dangotbanned dangotbanned Oct 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a better idea would be to ...

Rename _CanTo_List -> ToList, and move to _translate.py alongside:

class ToDict(Protocol[ToDictDT_co]):
def to_dict(self, *args: Any, **kwds: Any) -> ToDictDT_co: ...

Move these as well, but rename to reflect they their naming originates from numpy and pyarrow (respectively):

narwhals/narwhals/_utils.py

Lines 2131 to 2132 in 0c66432

class _CanToList(Protocol):
def tolist(self, *args: Any, **kwds: Any) -> list[Any]: ...

narwhals/narwhals/_utils.py

Lines 2135 to 2136 in 0c66432

class _CanTo_PyList(Protocol): # noqa: N801
def to_pylist(self, *args: Any, **kwds: Any) -> list[Any]: ...

The names of the guards can still stay the same, since their implementations will (after updating the protocol names) the link between origin, protocol, method name:

narwhals/narwhals/_utils.py

Lines 2139 to 2144 in 0c66432

def can_to_list(obj: Any) -> TypeIs[_CanTo_List]:
return (
is_narwhals_series(obj)
or is_polars_series(obj)
or _hasattr_static(obj, "to_list")
)

narwhals/narwhals/_utils.py

Lines 2147 to 2148 in 0c66432

def can_tolist(obj: Any) -> TypeIs[_CanToList]:
return is_numpy_array_1d(obj) or _hasattr_static(obj, "tolist")

narwhals/narwhals/_utils.py

Lines 2151 to 2154 in 0c66432

def can_to_pylist(obj: Any) -> TypeIs[_CanTo_PyList]:
return (
(pa := get_pyarrow()) and isinstance(obj, (pa.Array, pa.ChunkedArray))
) or _hasattr_static(obj, "to_pylist")

Copy link
Member

@FBruzzesi FBruzzesi Oct 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated my comment (#3207 (comment)) - I would argue that native series are not ok, while numpy 1d arrays are.

My thought process for this is that if someone is doing something along the lines of:

import narwhals as nw
import polars as pl

def agnostic_func(frame: IntoDataFrameT) -> IntoDataFrameT:
    other = pl.Series([1, 2, 3])  # <- notice how this is a native series!!!
    return nw.from_native(frame).filter(nw.col("x").is_in(other)).to_native()

then the function is clearly not agnostic and polars would be required in this case.

A different case would be if a narwhals series with a different backend is provided. This could mean that the function is agnostic but a user is "mixin" backends:

def is_left_in_right(left_series: IntoSeriesT, right_series: IntoSeriesT) -> IntoSeriesT:
    left_nw = nw.from_native(left_series, series_only=True)
    right_nw = nw.from_native(right_series, series_only=True)
    return left_nw.is_in(right_nw).to_native()

# but now it a user to mix it up, not the library itself

is_left_in_right(pl.Series([1,2,3]), pd.Series([0, 1]))

This is the case I suggested to yell at the user with a warning.


From our side, I think it would greatly simplify (read as, get rid of) most of the protocols here, the type guards as well as iterable_to_sequence function.



class _CanTo_PyList(Protocol): # noqa: N801
def to_pylist(self, *args: Any, **kwds: Any) -> list[Any]: ...


def can_to_list(obj: Any) -> TypeIs[_CanTo_List]:
return (
is_narwhals_series(obj)
or is_polars_series(obj)
or _hasattr_static(obj, "to_list")
)


def can_tolist(obj: Any) -> TypeIs[_CanToList]:
return is_numpy_array_1d(obj) or _hasattr_static(obj, "tolist")


def can_to_pylist(obj: Any) -> TypeIs[_CanTo_PyList]:
return (
(pa := get_pyarrow()) and isinstance(obj, (pa.Array, pa.ChunkedArray))
) or _hasattr_static(obj, "to_pylist")


# TODO @dangotbanned: Add (brief) doc
def iterable_to_sequence(
iterable: Iterable[Any], /, *, backend: EagerAllowed | None = None
) -> Sequence[Any]:
result: Sequence[Any]
if backend is not None:
from narwhals.series import Series

result = Series.from_iterable("", iterable, backend=backend).to_list()
elif isinstance(iterable, (tuple, list)):
result = iterable
elif isinstance(iterable, (Iterator, Sequence)):
result = tuple(iterable)
elif can_to_list(iterable):
result = iterable.to_list()
elif can_tolist(iterable):
result = iterable.tolist()
elif can_to_pylist(iterable):
result = iterable.to_pylist()
else:
result = tuple(iterable)
return result
17 changes: 11 additions & 6 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,14 @@
ExprMetadata,
apply_n_ary_operation,
combine_metadata,
is_series,
)
from narwhals._utils import (
_validate_rolling_arguments,
ensure_type,
flatten,
iterable_to_sequence,
)
from narwhals._utils import _validate_rolling_arguments, ensure_type, flatten
from narwhals.dtypes import _validate_dtype
from narwhals.exceptions import ComputeError, InvalidOperationError
from narwhals.expr_cat import ExprCatNamespace
Expand All @@ -19,7 +25,6 @@
from narwhals.expr_name import ExprNameNamespace
from narwhals.expr_str import ExprStringNamespace
from narwhals.expr_struct import ExprStructNamespace
from narwhals.translate import to_native

if TYPE_CHECKING:
from typing import NoReturn, TypeVar
Expand Down Expand Up @@ -968,7 +973,7 @@ def is_between(
upper_bound,
)

def is_in(self, other: Any) -> Self:
def is_in(self, other: Iterable[Any]) -> Self:
"""Check if elements of this expression are present in the other iterable.

Arguments:
Expand All @@ -991,10 +996,10 @@ def is_in(self, other: Any) -> Self:
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
if isinstance(other, Iterable) and not isinstance(other, (str, bytes)):
other = other.to_native() if is_series(other) else iterable_to_sequence(other)
return self._with_elementwise(
lambda plx: self._to_compliant_expr(plx).is_in(
to_native(other, pass_through=True)
)
# TODO @dangotbanned: Fix after getting feedback on https://github.com/narwhals-dev/narwhals/pull/3207#discussion_r2430089632
lambda plx: self._to_compliant_expr(plx).is_in(other) # pyright: ignore[reportArgumentType]
)
msg = "Narwhals `is_in` doesn't accept expressions as an argument, as opposed to Polars. You should provide an iterable instead."
raise NotImplementedError(msg)
Expand Down
11 changes: 7 additions & 4 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
is_compliant_series,
is_eager_allowed,
is_index_selector,
iterable_to_sequence,
qualified_type_name,
supports_arrow_c_stream,
)
Expand All @@ -25,7 +26,6 @@
from narwhals.series_list import SeriesListNamespace
from narwhals.series_str import SeriesStringNamespace
from narwhals.series_struct import SeriesStructNamespace
from narwhals.translate import to_native
from narwhals.typing import IntoSeriesT

if TYPE_CHECKING:
Expand Down Expand Up @@ -926,7 +926,7 @@ def last(self) -> PythonLiteral:
"""
return self._compliant_series.last()

def is_in(self, other: Any) -> Self:
def is_in(self, other: Iterable[Any]) -> Self:
"""Check if the elements of this Series are in the other sequence.

Arguments:
Expand All @@ -948,9 +948,12 @@ def is_in(self, other: Any) -> Self:
]
]
"""
return self._with_compliant(
self._compliant_series.is_in(to_native(other, pass_through=True))
other = (
other.to_native()
if isinstance(other, Series)
else iterable_to_sequence(other, backend=self.implementation)
)
return self._with_compliant(self._compliant_series.is_in(other))

def arg_true(self) -> Self:
"""Find elements where boolean Series is True.
Expand Down
Loading
Loading