diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index acbbdcda77..22650ab25f 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -53,12 +53,14 @@ from narwhals._arrow.typing import Mask # type: ignore[attr-defined] from narwhals._arrow.typing import Order # type: ignore[attr-defined] from narwhals.dtypes import DType + from narwhals.schema import Schema from narwhals.typing import CompliantDataFrame from narwhals.typing import CompliantLazyFrame from narwhals.typing import SizeUnit from narwhals.typing import _1DArray from narwhals.typing import _2DArray from narwhals.utils import Version + from narwhals.utils import _FullContext JoinType: TypeAlias = Literal[ "left semi", @@ -91,6 +93,29 @@ def __init__( self._version = version validate_backend_version(self._implementation, self._backend_version) + @classmethod + def from_numpy( + cls, + data: _2DArray, + /, + *, + context: _FullContext, + schema: Mapping[str, DType] | Schema | Sequence[str] | None, + ) -> Self: + from narwhals.schema import Schema + + arrays = [pa.array(val) for val in data.T] + if isinstance(schema, (Mapping, Schema)): + native = pa.Table.from_arrays(arrays, schema=Schema(schema).to_arrow()) + else: + native = pa.Table.from_arrays(arrays, cls._numpy_column_names(data, schema)) + return cls( + native, + backend_version=context._backend_version, + version=context._version, + validate_column_names=True, + ) + def __narwhals_namespace__(self: Self) -> ArrowNamespace: from narwhals._arrow.namespace import ArrowNamespace @@ -511,7 +536,7 @@ def to_polars(self: Self) -> pl.DataFrame: return pl.from_arrow(self.native) # type: ignore[return-value] - def to_numpy(self: Self) -> _2DArray: + def to_numpy(self: Self, dtype: Any = None, *, copy: bool | None = None) -> _2DArray: import numpy as np # ignore-banned-import arr: Any = np.column_stack([col.to_numpy() for col in self.native.columns]) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index ae27db0576..acd4d98385 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -38,6 +38,10 @@ class ArrowNamespace(EagerNamespace[ArrowDataFrame, ArrowSeries, ArrowExpr]): + @property + def _dataframe(self) -> type[ArrowDataFrame]: + return ArrowDataFrame + @property def _expr(self) -> type[ArrowExpr]: return ArrowExpr diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index 39e3571715..76f4fdc333 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -18,6 +18,7 @@ from narwhals._compliant.typing import EagerSeriesT from narwhals._compliant.typing import NativeFrameT_co from narwhals._expression_parsing import evaluate_output_names_and_aliases +from narwhals._translate import NumpyConvertible from narwhals.utils import Version from narwhals.utils import _StoresNative from narwhals.utils import deprecated @@ -34,9 +35,11 @@ from narwhals._compliant.group_by import CompliantGroupBy from narwhals.dtypes import DType + from narwhals.schema import Schema from narwhals.typing import SizeUnit from narwhals.typing import _2DArray from narwhals.utils import Implementation + from narwhals.utils import _FullContext Incomplete: TypeAlias = Any @@ -46,6 +49,7 @@ class CompliantDataFrame( + NumpyConvertible["_2DArray", "_2DArray"], _StoresNative[NativeFrameT_co], Sized, Protocol[CompliantSeriesT, CompliantExprT_contra, NativeFrameT_co], @@ -57,6 +61,15 @@ class CompliantDataFrame( def __narwhals_dataframe__(self) -> Self: ... def __narwhals_namespace__(self) -> Any: ... + @classmethod + def from_numpy( + cls, + data: _2DArray, + /, + *, + context: _FullContext, + schema: Mapping[str, DType] | Schema | Sequence[str] | None, + ) -> Self: ... def __array__(self, dtype: Any, *, copy: bool | None) -> _2DArray: ... def __getitem__(self, item: Any) -> CompliantSeriesT | Self: ... def simple_select(self, *column_names: str) -> Self: @@ -143,7 +156,6 @@ def sort( ) -> Self: ... def tail(self, n: int) -> Self: ... def to_arrow(self) -> pa.Table: ... - def to_numpy(self) -> _2DArray: ... def to_pandas(self) -> pd.DataFrame: ... def to_polars(self) -> pl.DataFrame: ... @overload @@ -286,7 +298,8 @@ def _evaluate_expr(self, expr: EagerExprT_contra, /) -> EagerSeriesT: return result[0] def _evaluate_into_exprs(self, *exprs: EagerExprT_contra) -> Sequence[EagerSeriesT]: - return list(chain.from_iterable(self._evaluate_into_expr(expr) for expr in exprs)) + # NOTE: Ignore is to avoid an intermittent false positive + return list(chain.from_iterable(self._evaluate_into_expr(expr) for expr in exprs)) # pyright: ignore[reportArgumentType] def _evaluate_into_expr(self, expr: EagerExprT_contra, /) -> Sequence[EagerSeriesT]: """Return list of raw columns. @@ -308,3 +321,9 @@ def _evaluate_into_expr(self, expr: EagerExprT_contra, /) -> Sequence[EagerSerie def _extract_comparand(self, other: EagerSeriesT, /) -> Any: """Extract native Series, broadcasting to `len(self)` if necessary.""" ... + + @staticmethod + def _numpy_column_names( + data: _2DArray, columns: Sequence[str] | None, / + ) -> list[str]: + return list(columns or (f"column_{x}" for x in range(data.shape[1]))) diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py index d47e7ac688..a32a6b655f 100644 --- a/narwhals/_compliant/namespace.py +++ b/narwhals/_compliant/namespace.py @@ -6,7 +6,10 @@ from typing import Container from typing import Iterable from typing import Literal +from typing import Mapping from typing import Protocol +from typing import Sequence +from typing import overload from narwhals._compliant.typing import CompliantExprT from narwhals._compliant.typing import CompliantFrameT @@ -14,6 +17,7 @@ from narwhals._compliant.typing import EagerDataFrameT from narwhals._compliant.typing import EagerExprT from narwhals._compliant.typing import EagerSeriesT +from narwhals.dependencies import is_numpy_array_2d from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names from narwhals.utils import passthrough_column_names @@ -25,6 +29,9 @@ from narwhals._compliant.when_then import CompliantWhen from narwhals._compliant.when_then import EagerWhen from narwhals.dtypes import DType + from narwhals.schema import Schema + from narwhals.typing import Into1DArray + from narwhals.typing import _2DArray from narwhals.utils import Implementation from narwhals.utils import Version @@ -109,8 +116,36 @@ class EagerNamespace( DepthTrackingNamespace[EagerDataFrameT, EagerExprT], Protocol[EagerDataFrameT, EagerSeriesT, EagerExprT], ): + @property + def _dataframe(self) -> type[EagerDataFrameT]: ... @property def _series(self) -> type[EagerSeriesT]: ... def when( self, predicate: EagerExprT ) -> EagerWhen[EagerDataFrameT, EagerSeriesT, EagerExprT, Incomplete]: ... + + @overload + def from_numpy( + self, + data: Into1DArray, + /, + schema: None = ..., + ) -> EagerSeriesT: ... + + @overload + def from_numpy( + self, + data: _2DArray, + /, + schema: Mapping[str, DType] | Schema | Sequence[str] | None, + ) -> EagerDataFrameT: ... + + def from_numpy( + self, + data: Into1DArray | _2DArray, + /, + schema: Mapping[str, DType] | Schema | Sequence[str] | None = None, + ) -> EagerDataFrameT | EagerSeriesT: + if is_numpy_array_2d(data): + return self._dataframe.from_numpy(data, schema=schema, context=self) + return self._series.from_numpy(data, context=self) diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index ac40e3b256..7fe858f379 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -2,6 +2,8 @@ from typing import TYPE_CHECKING from typing import Any +from typing import Callable +from typing import Iterable from typing import Iterator from typing import Literal from typing import Mapping @@ -17,6 +19,7 @@ from narwhals._pandas_like.utils import align_series_full_broadcast from narwhals._pandas_like.utils import check_column_names_are_unique from narwhals._pandas_like.utils import convert_str_slice_to_int_slice +from narwhals._pandas_like.utils import get_dtype_backend from narwhals._pandas_like.utils import horizontal_concat from narwhals._pandas_like.utils import native_to_narwhals_dtype from narwhals._pandas_like.utils import object_native_to_narwhals_dtype @@ -46,17 +49,23 @@ import pandas as pd import polars as pl from typing_extensions import Self + from typing_extensions import TypeAlias from narwhals._pandas_like.expr import PandasLikeExpr from narwhals._pandas_like.group_by import PandasLikeGroupBy from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals.dtypes import DType + from narwhals.schema import Schema from narwhals.typing import CompliantDataFrame from narwhals.typing import CompliantLazyFrame + from narwhals.typing import DTypeBackend from narwhals.typing import SizeUnit from narwhals.typing import _1DArray from narwhals.typing import _2DArray from narwhals.utils import Version + from narwhals.utils import _FullContext + + Constructor: TypeAlias = Callable[..., pd.DataFrame] CLASSICAL_NUMPY_DTYPES: frozenset[np.dtype[Any]] = frozenset( @@ -104,6 +113,37 @@ def __init__( if validate_column_names: check_column_names_are_unique(native_dataframe.columns) + @classmethod + def from_numpy( + cls, + data: _2DArray, + /, + *, + context: _FullContext, + schema: Mapping[str, DType] | Schema | Sequence[str] | None, + ) -> Self: + from narwhals.schema import Schema + + implementation = context._implementation + DataFrame: Constructor = implementation.to_native_namespace().DataFrame # noqa: N806 + if isinstance(schema, (Mapping, Schema)): + it: Iterable[DTypeBackend] = ( + get_dtype_backend(native_type, implementation) + for native_type in schema.values() + ) + native = DataFrame(data, columns=schema.keys()).astype( + Schema(schema).to_pandas(it) + ) + else: + native = DataFrame(data, columns=cls._numpy_column_names(data, schema)) + return cls( + native, + implementation=implementation, + backend_version=context._backend_version, + version=context._version, + validate_column_names=True, + ) + def __narwhals_dataframe__(self: Self) -> Self: return self diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 053308b671..27b5ff96b3 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -34,6 +34,10 @@ class PandasLikeNamespace( EagerNamespace[PandasLikeDataFrame, PandasLikeSeries, PandasLikeExpr] ): + @property + def _dataframe(self) -> type[PandasLikeDataFrame]: + return PandasLikeDataFrame + @property def _expr(self) -> type[PandasLikeExpr]: return PandasLikeExpr diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index 7cb38790da..02c148afc0 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -36,10 +36,12 @@ from narwhals._polars.group_by import PolarsLazyGroupBy from narwhals._polars.series import PolarsSeries from narwhals.dtypes import DType + from narwhals.schema import Schema from narwhals.typing import CompliantDataFrame from narwhals.typing import CompliantLazyFrame from narwhals.typing import _2DArray from narwhals.utils import Version + from narwhals.utils import _FullContext T = TypeVar("T") R = TypeVar("R") @@ -92,6 +94,27 @@ def __init__( self._version = version validate_backend_version(self._implementation, self._backend_version) + @classmethod + def from_numpy( + cls, + data: _2DArray, + /, + *, + context: _FullContext, # NOTE: Maybe only `Implementation`? + schema: Mapping[str, DType] | Schema | Sequence[str] | None, + ) -> Self: + from narwhals.schema import Schema + + pl_schema = ( + Schema(schema).to_polars() + if isinstance(schema, (Mapping, Schema)) + else schema + ) + native = pl.from_numpy(data, pl_schema) + return cls( + native, backend_version=context._backend_version, version=context._version + ) + @property def native(self) -> pl.DataFrame: return self._native_frame diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index 920d94e9d2..9eb56cba9a 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -5,7 +5,9 @@ from typing import Any from typing import Iterable from typing import Literal +from typing import Mapping from typing import Sequence +from typing import cast from typing import overload import polars as pl @@ -14,6 +16,7 @@ from narwhals._polars.series import PolarsSeries from narwhals._polars.utils import extract_args_kwargs from narwhals._polars.utils import narwhals_to_native_dtype +from narwhals.dependencies import is_numpy_array_2d from narwhals.dtypes import DType from narwhals.utils import Implementation @@ -21,14 +24,36 @@ from datetime import timezone from typing_extensions import Self + from typing_extensions import TypeAlias + from narwhals._compliant import CompliantSelectorNamespace + from narwhals._compliant import CompliantWhen + from narwhals._polars.dataframe import Method from narwhals._polars.dataframe import PolarsDataFrame from narwhals._polars.dataframe import PolarsLazyFrame + from narwhals._polars.typing import FrameT + from narwhals.schema import Schema + from narwhals.typing import Into1DArray from narwhals.typing import TimeUnit + from narwhals.typing import _2DArray from narwhals.utils import Version + from narwhals.utils import _FullContext + + Incomplete: TypeAlias = Any class PolarsNamespace: + all: Method[PolarsExpr] + col: Method[PolarsExpr] + exclude: Method[PolarsExpr] + all_horizontal: Method[PolarsExpr] + any_horizontal: Method[PolarsExpr] + sum_horizontal: Method[PolarsExpr] + min_horizontal: Method[PolarsExpr] + max_horizontal: Method[PolarsExpr] + # NOTE: `PolarsSeries`, `PolarsExpr` still have gaps + when: Method[CompliantWhen[PolarsDataFrame, Incomplete, Incomplete]] + def __init__( self: Self, *, backend_version: tuple[int, ...], version: Version ) -> None: @@ -47,6 +72,12 @@ def func(*args: Any, **kwargs: Any) -> Any: return func + @property + def _dataframe(self) -> type[PolarsDataFrame]: + from narwhals._polars.dataframe import PolarsDataFrame + + return PolarsDataFrame + @property def _expr(self) -> type[PolarsExpr]: return PolarsExpr @@ -55,6 +86,32 @@ def _expr(self) -> type[PolarsExpr]: def _series(self) -> type[PolarsSeries]: return PolarsSeries + @overload + def from_numpy( + self, + data: Into1DArray, + /, + schema: None = ..., + ) -> PolarsSeries: ... + + @overload + def from_numpy( + self, + data: _2DArray, + /, + schema: Mapping[str, DType] | Schema | Sequence[str] | None, + ) -> PolarsDataFrame: ... + + def from_numpy( + self, + data: Into1DArray | _2DArray, + /, + schema: Mapping[str, DType] | Schema | Sequence[str] | None = None, + ) -> PolarsDataFrame | PolarsSeries: + if is_numpy_array_2d(data): + return self._dataframe.from_numpy(data, schema=schema, context=self) + return self._series.from_numpy(data, context=self) # pragma: no cover + def nth(self: Self, *indices: int) -> PolarsExpr: if self._backend_version < (1, 0, 0): msg = "`nth` is only supported for Polars>=1.0.0. Please use `col` for columns selection instead." @@ -74,41 +131,21 @@ def len(self: Self) -> PolarsExpr: pl.len(), version=self._version, backend_version=self._backend_version ) - @overload - def concat( - self: Self, - items: Sequence[PolarsDataFrame], - *, - how: Literal["vertical", "horizontal", "diagonal"], - ) -> PolarsDataFrame: ... - - @overload - def concat( - self: Self, - items: Sequence[PolarsLazyFrame], - *, - how: Literal["vertical", "horizontal", "diagonal"], - ) -> PolarsLazyFrame: ... - def concat( self: Self, - items: Sequence[PolarsDataFrame] | Sequence[PolarsLazyFrame], + items: Iterable[FrameT], *, how: Literal["vertical", "horizontal", "diagonal"], ) -> PolarsDataFrame | PolarsLazyFrame: - from narwhals._polars.dataframe import PolarsDataFrame from narwhals._polars.dataframe import PolarsLazyFrame - dfs: list[Any] = [item._native_frame for item in items] - result = pl.concat(dfs, how=how) + result = pl.concat((item.native for item in items), how=how) if isinstance(result, pl.DataFrame): - return PolarsDataFrame( - result, - backend_version=items[0]._backend_version, - version=items[0]._version, + return self._dataframe( + result, backend_version=self._backend_version, version=self._version ) return PolarsLazyFrame( - result, backend_version=items[0]._backend_version, version=items[0]._version + result, backend_version=self._backend_version, version=self._version ) def lit(self: Self, value: Any, dtype: DType | type[DType] | None) -> PolarsExpr: @@ -190,15 +227,21 @@ def concat_str( backend_version=self._backend_version, ) + # NOTE: Implementation is too different to annotate correctly (vs other `*SelectorNamespace`) + # 1. Others have lots of private stuff for code reuse + # i. None of that is useful here + # 2. We don't have a `PolarsSelector` abstraction, and just use `PolarsExpr` + # 3. `PolarsExpr` still has it's own gaps in the spec @property - def selectors(self: Self) -> PolarsSelectors: - return PolarsSelectors(self._version, backend_version=self._backend_version) + def selectors(self: Self) -> CompliantSelectorNamespace[Any, Any]: + return cast("CompliantSelectorNamespace[Any, Any]", PolarsSelectorNamespace(self)) -class PolarsSelectors: - def __init__(self: Self, version: Version, backend_version: tuple[int, ...]) -> None: - self._version = version - self._backend_version = backend_version +class PolarsSelectorNamespace: + def __init__(self: Self, context: _FullContext, /) -> None: + self._implementation = context._implementation + self._backend_version = context._backend_version + self._version = context._version def by_dtype(self: Self, dtypes: Iterable[DType]) -> PolarsExpr: native_dtypes = [ diff --git a/narwhals/_polars/typing.py b/narwhals/_polars/typing.py index 3323a11482..959d76199c 100644 --- a/narwhals/_polars/typing.py +++ b/narwhals/_polars/typing.py @@ -5,13 +5,17 @@ if TYPE_CHECKING: import sys + from typing import TypeVar if sys.version_info >= (3, 10): from typing import TypeAlias else: from typing_extensions import TypeAlias + from narwhals._polars.dataframe import PolarsDataFrame + from narwhals._polars.dataframe import PolarsLazyFrame from narwhals._polars.expr import PolarsExpr from narwhals._polars.series import PolarsSeries IntoPolarsExpr: TypeAlias = Union[PolarsExpr, PolarsSeries] + FrameT = TypeVar("FrameT", PolarsDataFrame, PolarsLazyFrame) diff --git a/narwhals/functions.py b/narwhals/functions.py index 4ddec1f869..a91d9fd8b6 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -32,9 +32,11 @@ from narwhals.translate import to_native from narwhals.utils import Implementation from narwhals.utils import Version +from narwhals.utils import _into_compliant_namespace from narwhals.utils import deprecate_native_namespace from narwhals.utils import flatten from narwhals.utils import is_compliant_expr +from narwhals.utils import is_eager_allowed from narwhals.utils import is_sequence_but_not_str from narwhals.utils import parse_version from narwhals.utils import validate_laziness @@ -42,9 +44,10 @@ if TYPE_CHECKING: from types import ModuleType - import polars as pl import pyarrow as pa from typing_extensions import Self + from typing_extensions import TypeAlias + from typing_extensions import TypeIs from narwhals._compliant import CompliantExpr from narwhals._compliant import CompliantNamespace @@ -62,6 +65,8 @@ from narwhals.typing import NativeLazyFrame from narwhals.typing import _2DArray + _IntoSchema: TypeAlias = "Mapping[str, DType] | Schema | Sequence[str] | None" + class ArrowStreamExportable(Protocol): def __arrow_c_stream__( self, requested_schema: object | None = None @@ -508,7 +513,7 @@ def from_numpy( └──────────────────┘ """ backend = cast("ModuleType | Implementation | str", backend) - return _from_numpy_impl(data, schema, backend=backend) + return _from_numpy_impl(data, schema, backend=backend, version=Version.MAIN) def _from_numpy_impl( @@ -516,77 +521,25 @@ def _from_numpy_impl( schema: Mapping[str, DType] | Schema | Sequence[str] | None = None, *, backend: ModuleType | Implementation | str, + version: Version, ) -> DataFrame[Any]: - from narwhals.schema import Schema - - implementation = Implementation.from_backend(backend) - native_namespace = implementation.to_native_namespace() - if not is_numpy_array_2d(data): msg = "`from_numpy` only accepts 2D numpy arrays" raise ValueError(msg) - implementation = Implementation.from_native_namespace(native_namespace) - - if implementation is Implementation.POLARS: - if isinstance(schema, (Mapping, Schema)): - schema_pl: pl.Schema | Sequence[str] | None = Schema(schema).to_polars() - elif is_sequence_but_not_str(schema) or schema is None: - schema_pl = schema - else: - msg = ( - "`schema` is expected to be one of the following types: " - "Mapping[str, DType] | Schema | Sequence[str]. " - f"Got {type(schema)}." - ) - raise TypeError(msg) - native_frame = native_namespace.from_numpy(data, schema=schema_pl) - - elif implementation.is_pandas_like(): - if isinstance(schema, (Mapping, Schema)): - from narwhals._pandas_like.utils import get_dtype_backend - - it: Iterable[DTypeBackend] = ( - get_dtype_backend(native_type, implementation) - for native_type in schema.values() - ) - native_frame = native_namespace.DataFrame(data, columns=schema.keys()).astype( - Schema(schema).to_pandas(it) - ) - elif is_sequence_but_not_str(schema): - native_frame = native_namespace.DataFrame(data, columns=list(schema)) - elif schema is None: - native_frame = native_namespace.DataFrame( - data, columns=[f"column_{x}" for x in range(data.shape[1])] - ) - else: - msg = ( - "`schema` is expected to be one of the following types: " - "Mapping[str, DType] | Schema | Sequence[str]. " - f"Got {type(schema)}." - ) - raise TypeError(msg) - - elif implementation is Implementation.PYARROW: - pa_arrays = [native_namespace.array(val) for val in data.T] - if isinstance(schema, (Mapping, Schema)): - schema_pa = Schema(schema).to_arrow() - native_frame = native_namespace.Table.from_arrays(pa_arrays, schema=schema_pa) - elif is_sequence_but_not_str(schema): - native_frame = native_namespace.Table.from_arrays( - pa_arrays, names=list(schema) - ) - elif schema is None: - native_frame = native_namespace.Table.from_arrays( - pa_arrays, names=[f"column_{x}" for x in range(data.shape[1])] - ) - else: - msg = ( - "`schema` is expected to be one of the following types: " - "Mapping[str, DType] | Schema | Sequence[str]. " - f"Got {type(schema)}." - ) - raise TypeError(msg) + if not _is_into_schema(schema): + msg = ( + "`schema` is expected to be one of the following types: " + "Mapping[str, DType] | Schema | Sequence[str]. " + f"Got {type(schema)}." + ) + raise TypeError(msg) + implementation = Implementation.from_backend(backend) + if is_eager_allowed(implementation): + ns = _into_compliant_namespace(implementation, version) + frame = ns.from_numpy(data, schema) + return from_native(frame, eager_only=True) else: # pragma: no cover + native_namespace = implementation.to_native_namespace() try: # implementation is UNKNOWN, Narwhals extension using this feature should # implement `from_numpy` function in the top-level namespace. @@ -594,7 +547,15 @@ def _from_numpy_impl( except AttributeError as e: msg = "Unknown namespace is expected to implement `from_numpy` function." raise AttributeError(msg) from e - return from_native(native_frame, eager_only=True) + return from_native(native_frame, eager_only=True) + + +def _is_into_schema(obj: Any) -> TypeIs[_IntoSchema]: + from narwhals.schema import Schema + + return ( + obj is None or isinstance(obj, (Mapping, Schema)) or is_sequence_but_not_str(obj) + ) @deprecate_native_namespace(warn_version="1.31.0", required=True) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index debbcaac14..e2a1037774 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -2338,7 +2338,7 @@ def from_numpy( A new DataFrame. """ backend = cast("ModuleType | Implementation | str", backend) - return _stableify(_from_numpy_impl(data, schema, backend=backend)) # type: ignore[no-any-return] + return _stableify(_from_numpy_impl(data, schema, backend=backend, version=Version.V1)) # type: ignore[no-any-return] @deprecate_native_namespace(required=True) diff --git a/narwhals/utils.py b/narwhals/utils.py index 1d1fdd0b15..fe5ad29ccd 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -52,13 +52,21 @@ from typing_extensions import LiteralString from typing_extensions import ParamSpec from typing_extensions import Self + from typing_extensions import TypeAlias from typing_extensions import TypeIs + from narwhals._arrow.namespace import ArrowNamespace from narwhals._compliant import CompliantExpr from narwhals._compliant import CompliantFrameT + from narwhals._compliant import CompliantNamespace from narwhals._compliant import CompliantSeriesOrNativeExprT_co from narwhals._compliant import NativeFrameT_co from narwhals._compliant import NativeSeriesT_co + from narwhals._dask.namespace import DaskNamespace + from narwhals._duckdb.namespace import DuckDBNamespace + from narwhals._pandas_like.namespace import PandasLikeNamespace + from narwhals._polars.namespace import PolarsNamespace + from narwhals._spark_like.namespace import SparkLikeNamespace from narwhals.dataframe import DataFrame from narwhals.dataframe import LazyFrame from narwhals.dtypes import DType @@ -85,6 +93,19 @@ P = ParamSpec("P") R = TypeVar("R") + _PandasLike: TypeAlias = ( + "Literal[Implementation.PANDAS, Implementation.CUDF, Implementation.MODIN]" + ) + _Arrow: TypeAlias = "Literal[Implementation.PYARROW]" + _Polars: TypeAlias = "Literal[Implementation.POLARS]" + _SparkLike: TypeAlias = "Literal[Implementation.PYSPARK, Implementation.SQLFRAME]" + _Dask: TypeAlias = "Literal[Implementation.DASK]" + _DuckDB: TypeAlias = "Literal[Implementation.DUCKDB]" + _EagerOnly: TypeAlias = "_PandasLike | _Arrow" + _EagerAllowed: TypeAlias = "_Polars | _EagerOnly" + _LazyOnly: TypeAlias = "_SparkLike | _Dask | _DuckDB" + _LazyAllowed: TypeAlias = "_Polars | _LazyOnly" + class _SupportsVersion(Protocol): __version__: str @@ -555,6 +576,65 @@ def is_sqlframe(self: Self) -> bool: } +@overload +def _into_compliant_namespace( + impl: _PandasLike, version: Version, / +) -> PandasLikeNamespace: ... +@overload +def _into_compliant_namespace(impl: _Polars, version: Version, /) -> PolarsNamespace: ... +@overload +def _into_compliant_namespace(impl: _Arrow, version: Version, /) -> ArrowNamespace: ... +@overload +def _into_compliant_namespace( + impl: _SparkLike, version: Version, / +) -> SparkLikeNamespace: ... +@overload +def _into_compliant_namespace(impl: _DuckDB, version: Version, /) -> DuckDBNamespace: ... +@overload +def _into_compliant_namespace(impl: _Dask, version: Version, /) -> DaskNamespace: ... +@overload +def _into_compliant_namespace( + impl: _EagerAllowed, version: Version, / +) -> PandasLikeNamespace | PolarsNamespace | ArrowNamespace: ... +def _into_compliant_namespace( + impl: Implementation, version: Version, / +) -> CompliantNamespace[Any, Any]: + native = impl.to_native_namespace() + into_version = native if not impl.is_sqlframe() else native._version + backend_version = parse_version(into_version) + if impl.is_pandas_like(): + from narwhals._pandas_like.namespace import PandasLikeNamespace + + return PandasLikeNamespace( + implementation=impl, backend_version=backend_version, version=version + ) + elif impl.is_polars(): + from narwhals._polars.namespace import PolarsNamespace + + return PolarsNamespace(backend_version=backend_version, version=version) + elif impl.is_pyarrow(): + from narwhals._arrow.namespace import ArrowNamespace + + return ArrowNamespace(backend_version=backend_version, version=version) + elif impl.is_spark_like(): # pragma: no cover + from narwhals._spark_like.namespace import SparkLikeNamespace + + return SparkLikeNamespace( + implementation=impl, backend_version=backend_version, version=version + ) + elif impl.is_duckdb(): # pragma: no cover + from narwhals._duckdb.namespace import DuckDBNamespace + + return DuckDBNamespace(backend_version=backend_version, version=version) + elif impl.is_dask(): # pragma: no cover + from narwhals._dask.namespace import DaskNamespace + + return DaskNamespace(backend_version=backend_version, version=version) + else: + msg = "Not supported Implementation" # pragma: no cover + raise AssertionError(msg) + + def validate_backend_version( implementation: Implementation, backend_version: tuple[int, ...] ) -> None: @@ -1499,6 +1579,26 @@ def is_compliant_expr( return hasattr(obj, "__narwhals_expr__") +def is_eager_allowed(obj: Implementation) -> TypeIs[_EagerAllowed]: + return obj in { + Implementation.PANDAS, + Implementation.MODIN, + Implementation.CUDF, + Implementation.POLARS, + Implementation.PYARROW, + } + + +def is_lazy_allowed(obj: Implementation) -> TypeIs[_LazyAllowed]: # pragma: no cover + return obj in { + Implementation.POLARS, + Implementation.PYSPARK, + Implementation.SQLFRAME, + Implementation.DASK, + Implementation.DUCKDB, + } + + def has_native_namespace(obj: Any) -> TypeIs[SupportsNativeNamespace]: return hasattr(obj, "__native_namespace__") diff --git a/tests/from_numpy_test.py b/tests/from_numpy_test.py index 3d23cef9d9..de0fe20290 100644 --- a/tests/from_numpy_test.py +++ b/tests/from_numpy_test.py @@ -63,16 +63,10 @@ def test_from_numpy_schema_list( assert result.columns == schema -def test_from_numpy_schema_notvalid( - constructor: Constructor, request: pytest.FixtureRequest -) -> None: - if "dask" in str(constructor) or "pyspark" in str(constructor): - request.applymarker(pytest.mark.xfail) +def test_from_numpy_schema_notvalid(constructor: Constructor) -> None: df = nw.from_native(constructor(data)) backend = nw_v1.get_native_namespace(df) - with pytest.raises( - TypeError, match="`schema` is expected to be one of the following types" - ): + with pytest.raises(TypeError, match=r"`schema.*expected.*types"): nw.from_numpy(arr, schema=5, backend=backend) # type: ignore[arg-type]