- 
                Notifications
    You must be signed in to change notification settings 
- Fork 170
          refactor: Add CompliantSeries.from_numpy
          #2196
        
          New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
All other backends already had these
- Less repetition, but also helps document what the 2nd `TypeVar` is for (`from_`) - It has to be in that position to follow the rules of https://typing.python.org/en/latest/spec/generics.html#default-ordering-and-subscription-rules
- We've already got the compat handled there - `polars` handles the rest in https://github.com/pola-rs/polars/blob/889a2a7a57be5da432b6fa854ab698bbaf1b02ff/py-polars/polars/series/series.py#L1357-L1399
| if isinstance(arg, Series): | ||
| return arg._compliant_series._to_expr() | ||
| if isinstance(arg, Expr): | ||
| return arg._to_compliant_expr(self.__narwhals_namespace__()) | ||
| return arg._to_compliant_expr(self.__narwhals_namespace__()) # comment | ||
| if isinstance(arg, str): | ||
| return plx.col(arg) | ||
| if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that starting from here we should be handing off entirely to a CompliantNamespace method.
In addition to simplifying this part - we could reuse that again instead of:
narwhals/narwhals/_expression_parsing.py
Lines 93 to 107 in 7611bd4
| def extract_compliant( | |
| plx: CompliantNamespace[CompliantFrameT, CompliantSeriesOrNativeExprT_co], | |
| other: Any, | |
| *, | |
| str_as_lit: bool, | |
| ) -> CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co] | object: | |
| if is_expr(other): | |
| return other._to_compliant_expr(plx) | |
| if isinstance(other, str) and not str_as_lit: | |
| return plx.col(other) | |
| if is_narwhals_series(other): | |
| return other._compliant_series._to_expr() | |
| if is_numpy_array(other): | |
| return plx._create_compliant_series(other)._to_expr() # type: ignore[attr-defined] | |
| return other | 
Then everywhere we currently do:
from narwhals._expression_parsing import extract_compliant
plx: CompliantNamespace
extract_compliant(plx, ...)Would be something like this:
plx: CompliantNamespace
plx._extract_compliant(...)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had some notes on this locally:
from __future__ import annotations
from typing import Any, Protocol
class ParseCompliant(Protocol):
    """Somewhat of an extended [polars._utils.parse.parse_into_expression]
    Covers cases that are similar, but the latter is narrower:
    - `nw.dataframe.BaseFrame._extract_compliant`
    - `nw._expression_parsing.extract_compliant`
    General
    - Most usage requires a ref to `__narwhals_namespace__`
    - Series/BaseFrame can convert internally
    [polars._utils.parse.parse_into_expression]: https://github.com/pola-rs/polars/blob/9092a0e90005aa98077217f01e725ac4f386a335/py-polars/polars/_utils/parse/expr.py#L20-L63
    """  # noqa: D415
    def _parse_compliant(self, arg: Any, /) -> Any: ...| @classmethod | ||
| def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: ... | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Linking this back to nw.functions.new_series, we might wanna have this as:
    @classmethod
    def from_numpy(
        cls,
        data: Into1DArray,
        /,
        *,
        context: _FullContext,
        name: str = "",
        dtype: DType | type[DType] | None = None,
    ) -> Self: ...narwhals/narwhals/functions.py
Lines 188 to 194 in 7611bd4
| def new_series( | |
| name: str, | |
| values: Any, | |
| dtype: DType | type[DType] | None = None, | |
| *, | |
| native_namespace: ModuleType, | |
| ) -> Series[Any]: | 
| else: # pragma: no cover | ||
| import sys | ||
| from importlib.util import find_spec | ||
|  | ||
| if sys.version_info >= (3, 13): | ||
| from typing import TypeVar | ||
| elif find_spec("typing_extensions"): | ||
| from typing_extensions import TypeVar | ||
| else: | ||
| from typing import TypeVar as _TypeVar | ||
|  | ||
| def TypeVar( # noqa: ANN202, N802 | ||
| name: str, | ||
| *constraints: Any, | ||
| bound: Any | None = None, | ||
| covariant: bool = False, | ||
| contravariant: bool = False, | ||
| **kwds: Any, # noqa: ARG001 | ||
| ): | ||
| return _TypeVar( | ||
| name, | ||
| *constraints, | ||
| bound=bound, | ||
| covariant=covariant, | ||
| contravariant=contravariant, | ||
| ) | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a trick to get TypeVar defaults - but in a move reusable way than (#2110 (comment))
I've used them heavily when testing out the .(to|from_) protocols across the rest of the API
Peek
# NOTE: `nw.dataframe.DataFrame`
class NarwhalsDataFrame(
    ArrowConvertible[_ArrowTable, IntoArrowTable],
    PandasConvertible[_PandasDataFrame],
    PolarsConvertible[_PolarsDataFrame],
    NumpyConvertible[_2DArray],
    NarwhalsDictCovertible,
    NarwhalsNativeConvertible[NativeDataFrameT],
    CompliantConvertible["CompliantDataFrame_[NativeDataFrameT, CompliantSeries_T]"],
    ParseCompliant,
    Generic[NativeDataFrameT, CompliantSeries_T],
):| class NumpyConvertible( | ||
| ToNumpy[ToNumpyT_co], | ||
| FromNumpy[FromNumpyDT_contra], | ||
| Protocol[ToNumpyT_co, FromNumpyDT_contra], | ||
| ): | ||
| def to_numpy(self, dtype: Any, *, copy: bool | None) -> ToNumpyT_co: ... | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In relation to (https://github.com/narwhals-dev/narwhals/pull/2196/files#r1993954699) - we could then do stuff like this which uses the same TypeVar twice:
from narwhals._translate import NumpyConvertible
from narwhals.typing import _2DArray
NumpyConvertible[_2DArray]There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that's closer to what we'd want on CompliantDataFrame and https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.from_numpy
- Only needs to be the extra stuff - `_create_compliant_series` is removed in #2196
| @classmethod | ||
| def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: | ||
| return cls( | ||
| pl.Series(data if is_numpy_array_1d(data) else [data]), | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not sure I follow this condition, where does it come from in the current code?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yeah this might look strange in PolarsSeries.
It seems there isn't an equivalent of ._from_scalar(value=...)
So it is mainly to match the logic of the other backends:
narwhals/narwhals/_arrow/series.py
Lines 141 to 165 in 6a5ed1d
| @classmethod | |
| def _from_iterable( | |
| cls: type[Self], | |
| data: Iterable[Any], | |
| name: str, | |
| *, | |
| context: _FullContext, | |
| ) -> Self: | |
| return cls( | |
| chunked_array([data]), | |
| name=name, | |
| backend_version=context._backend_version, | |
| version=context._version, | |
| ) | |
| def _from_scalar(self, value: Any) -> Self: | |
| if self._backend_version < (13,) and hasattr(value, "as_py"): | |
| value = value.as_py() | |
| return super()._from_scalar(value) | |
| @classmethod | |
| def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: | |
| return cls._from_iterable( | |
| data if is_numpy_array_1d(data) else [data], name="", context=context | |
| ) | 
narwhals/narwhals/_compliant/series.py
Lines 57 to 63 in 6a5ed1d
| def _from_scalar(self, value: Any) -> Self: | |
| return self._from_iterable([value], name=self.name, context=self) | |
| @classmethod | |
| def _from_iterable( | |
| cls: type[Self], data: Iterable[Any], name: str, *, context: _FullContext | |
| ) -> Self: ... | 
narwhals/narwhals/_pandas_like/series.py
Lines 175 to 205 in 6a5ed1d
| @classmethod | |
| def _from_iterable( | |
| cls: type[Self], | |
| data: Iterable[Any], | |
| name: str, | |
| *, | |
| context: _FullContext, | |
| index: Any = None, # NOTE: Originally a liskov substitution principle violation | |
| ) -> Self: | |
| return cls( | |
| native_series_from_iterable( | |
| data, | |
| name=name, | |
| index=[] if index is None else index, | |
| implementation=context._implementation, | |
| ), | |
| implementation=context._implementation, | |
| backend_version=context._backend_version, | |
| version=context._version, | |
| ) | |
| @classmethod | |
| def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: | |
| implementation = context._implementation | |
| arr = data if is_numpy_array_1d(data) else [data] | |
| return cls( | |
| implementation.to_native_namespace().Series(arr, name=""), | |
| implementation=implementation, | |
| backend_version=context._backend_version, | |
| version=context._version, | |
| ) | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I imagine we'd probably end up having CompliantSeries with @classmethod's like:
CompliantSeries.from_numpy
CompliantSeries.from_iterable
CompliantSeries.from_scalar
# Maybe more that aren't relevant hereWhere they might have overlapping and/or default implementations higher up in the protocol.
E.g. (5d609a7)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, i see from the type hint that it's clear actually, we only get here with numpy scalars or numpy 1d arrays
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah exactly!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| @MarcoGorelli this would be the start of putting it all together. 
 Full diff
 
 diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py
index ed1d83b9..c31f2c5b 100644
--- a/narwhals/_compliant/dataframe.py
+++ b/narwhals/_compliant/dataframe.py
@@ -12,6 +12,7 @@ from typing import TypeVar
 from narwhals._compliant.typing import CompliantSeriesT_co
 from narwhals._compliant.typing import EagerSeriesT
 from narwhals._expression_parsing import evaluate_output_names_and_aliases
+from narwhals._translate import NumpyConvertible
 
 if TYPE_CHECKING:
     from typing_extensions import Self
@@ -19,13 +20,14 @@ if TYPE_CHECKING:
 
     from narwhals._compliant.expr import EagerExpr
     from narwhals.dtypes import DType
+    from narwhals.typing import _2DArray  # noqa: F401
 
 __all__ = ["CompliantDataFrame", "CompliantLazyFrame", "EagerDataFrame"]
 
 T = TypeVar("T")
 
 
-class CompliantDataFrame(Protocol[CompliantSeriesT_co]):
+class CompliantDataFrame(NumpyConvertible["_2DArray"], Protocol[CompliantSeriesT_co]):
     def __narwhals_dataframe__(self) -> Self: ...
     def __narwhals_namespace__(self) -> Any: ...
     def simple_select(
diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py
index f5449ec4..338306b6 100644
--- a/narwhals/_compliant/namespace.py
+++ b/narwhals/_compliant/namespace.py
@@ -6,13 +6,17 @@ from typing import Any
 from typing import Container
 from typing import Iterable
 from typing import Literal
+from typing import Mapping
 from typing import Protocol
+from typing import Sequence
+from typing import overload
 
 from narwhals._compliant.typing import CompliantExprT
 from narwhals._compliant.typing import CompliantFrameT
 from narwhals._compliant.typing import EagerDataFrameT
 from narwhals._compliant.typing import EagerExprT
 from narwhals._compliant.typing import EagerSeriesT_co
+from narwhals.dependencies import is_numpy_array_2d
 from narwhals.utils import exclude_column_names
 from narwhals.utils import get_column_names
 from narwhals.utils import passthrough_column_names
@@ -20,6 +24,9 @@ from narwhals.utils import passthrough_column_names
 if TYPE_CHECKING:
     from narwhals._compliant.selectors import CompliantSelectorNamespace
     from narwhals.dtypes import DType
+    from narwhals.schema import Schema
+    from narwhals.typing import Into1DArray
+    from narwhals.typing import _2DArray
     from narwhals.utils import Implementation
     from narwhals.utils import Version
 
@@ -84,3 +91,29 @@ class EagerNamespace(
 ):
     @property
     def _series(self) -> type[EagerSeriesT_co]: ...
+    @property
+    def _dataframe(self) -> type[EagerDataFrameT]: ...
+
+    @overload
+    def from_numpy(
+        self,
+        data: Into1DArray,
+        /,
+        schema: None = ...,
+    ) -> EagerSeriesT_co: ...
+    @overload
+    def from_numpy(
+        self,
+        data: _2DArray,
+        /,
+        schema: Mapping[str, DType] | Schema | Sequence[str],
+    ) -> EagerDataFrameT: ...
+    def from_numpy(
+        self,
+        data: Into1DArray | _2DArray,
+        /,
+        schema: Mapping[str, DType] | Schema | Sequence[str] | None = None,
+    ) -> EagerSeriesT_co | EagerDataFrameT:
+        if is_numpy_array_2d(data):
+            return self._dataframe.from_numpy(data, schema=schema)
+        return self._series.from_numpy(data, context=self)
Just the fun stuffclass EagerNamespace(
    CompliantNamespace[EagerDataFrameT, EagerExprT],
    Protocol[EagerDataFrameT, EagerSeriesT_co, EagerExprT],
):
    @property
    def _series(self) -> type[EagerSeriesT_co]: ...
    @property
    def _dataframe(self) -> type[EagerDataFrameT]: ...
    def from_numpy( # <--------------- regular method for `Namespace` only
        self,
        data: Into1DArray | _2DArray,
        /,
        schema: Mapping[str, DType] | Schema | Sequence[str] | None = None,
    ) -> EagerSeriesT_co | EagerDataFrameT:
        if is_numpy_array_2d(data):
            return self._dataframe.from_numpy(data, schema=schema)
        return self._series.from_numpy(data, context=self)I really think we can get some mileage out of this pattern  | 
| cool thanks! merge when ready | 

What type of PR is this? (check all applicable)
Related issues
_compliantsub-package #2149 (comment)_compliantsub-package #2149 (comment)Checklist
If you have comments or can explain your changes, please do so below
from_numpynw._translate.pywould be the home for similar protocols