narwhals-dev · dangotbanned · Mar 26, 2025 · Mar 23, 2025 · Mar 23, 2025 · Mar 23, 2025
diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
@@ -53,12 +53,14 @@
     from narwhals._arrow.typing import Mask  # type: ignore[attr-defined]
     from narwhals._arrow.typing import Order  # type: ignore[attr-defined]
     from narwhals.dtypes import DType
+    from narwhals.schema import Schema
     from narwhals.typing import CompliantDataFrame
     from narwhals.typing import CompliantLazyFrame
     from narwhals.typing import SizeUnit
     from narwhals.typing import _1DArray
     from narwhals.typing import _2DArray
     from narwhals.utils import Version
+    from narwhals.utils import _FullContext
 
     JoinType: TypeAlias = Literal[
         "left semi",
@@ -91,6 +93,29 @@ def __init__(
         self._version = version
         validate_backend_version(self._implementation, self._backend_version)
 
+    @classmethod
+    def from_numpy(
+        cls,
+        data: _2DArray,
+        /,
+        *,
+        context: _FullContext,
+        schema: Mapping[str, DType] | Schema | Sequence[str] | None,
+    ) -> Self:
+        from narwhals.schema import Schema
+
+        arrays = [pa.array(val) for val in data.T]
+        if isinstance(schema, (Mapping, Schema)):
+            native = pa.Table.from_arrays(arrays, schema=Schema(schema).to_arrow())
+        else:
+            native = pa.Table.from_arrays(arrays, cls._numpy_column_names(data, schema))
+        return cls(
+            native,
+            backend_version=context._backend_version,
+            version=context._version,
+            validate_column_names=True,
+        )
+
     def __narwhals_namespace__(self: Self) -> ArrowNamespace:
         from narwhals._arrow.namespace import ArrowNamespace
 
@@ -511,7 +536,7 @@ def to_polars(self: Self) -> pl.DataFrame:
 
         return pl.from_arrow(self.native)  # type: ignore[return-value]
 
-    def to_numpy(self: Self) -> _2DArray:
+    def to_numpy(self: Self, dtype: Any = None, *, copy: bool | None = None) -> _2DArray:
         import numpy as np  # ignore-banned-import
 
         arr: Any = np.column_stack([col.to_numpy() for col in self.native.columns])

diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py
@@ -38,6 +38,10 @@
 
 
 class ArrowNamespace(EagerNamespace[ArrowDataFrame, ArrowSeries, ArrowExpr]):
+    @property
+    def _dataframe(self) -> type[ArrowDataFrame]:
+        return ArrowDataFrame
+
     @property
     def _expr(self) -> type[ArrowExpr]:
         return ArrowExpr

diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py
@@ -18,6 +18,7 @@
 from narwhals._compliant.typing import EagerSeriesT
 from narwhals._compliant.typing import NativeFrameT_co
 from narwhals._expression_parsing import evaluate_output_names_and_aliases
+from narwhals._translate import NumpyConvertible
 from narwhals.utils import Version
 from narwhals.utils import _StoresNative
 from narwhals.utils import deprecated
@@ -34,9 +35,11 @@
 
     from narwhals._compliant.group_by import CompliantGroupBy
     from narwhals.dtypes import DType
+    from narwhals.schema import Schema
     from narwhals.typing import SizeUnit
     from narwhals.typing import _2DArray
     from narwhals.utils import Implementation
+    from narwhals.utils import _FullContext
 
     Incomplete: TypeAlias = Any
 
@@ -46,6 +49,7 @@
 
 
 class CompliantDataFrame(
+    NumpyConvertible["_2DArray", "_2DArray"],
     _StoresNative[NativeFrameT_co],
     Sized,
     Protocol[CompliantSeriesT, CompliantExprT_contra, NativeFrameT_co],
@@ -57,6 +61,15 @@ class CompliantDataFrame(
 
     def __narwhals_dataframe__(self) -> Self: ...
     def __narwhals_namespace__(self) -> Any: ...
+    @classmethod
+    def from_numpy(
+        cls,
+        data: _2DArray,
+        /,
+        *,
+        context: _FullContext,
+        schema: Mapping[str, DType] | Schema | Sequence[str] | None,
+    ) -> Self: ...
     def __array__(self, dtype: Any, *, copy: bool | None) -> _2DArray: ...
     def __getitem__(self, item: Any) -> CompliantSeriesT | Self: ...
     def simple_select(self, *column_names: str) -> Self:
@@ -143,7 +156,6 @@ def sort(
     ) -> Self: ...
     def tail(self, n: int) -> Self: ...
     def to_arrow(self) -> pa.Table: ...
-    def to_numpy(self) -> _2DArray: ...
     def to_pandas(self) -> pd.DataFrame: ...
     def to_polars(self) -> pl.DataFrame: ...
     @overload
@@ -286,7 +298,8 @@ def _evaluate_expr(self, expr: EagerExprT_contra, /) -> EagerSeriesT:
         return result[0]
 
     def _evaluate_into_exprs(self, *exprs: EagerExprT_contra) -> Sequence[EagerSeriesT]:
-        return list(chain.from_iterable(self._evaluate_into_expr(expr) for expr in exprs))
+        # NOTE: Ignore is to avoid an intermittent false positive
+        return list(chain.from_iterable(self._evaluate_into_expr(expr) for expr in exprs))  # pyright: ignore[reportArgumentType]
 
     def _evaluate_into_expr(self, expr: EagerExprT_contra, /) -> Sequence[EagerSeriesT]:
         """Return list of raw columns.
@@ -308,3 +321,9 @@ def _evaluate_into_expr(self, expr: EagerExprT_contra, /) -> Sequence[EagerSerie
     def _extract_comparand(self, other: EagerSeriesT, /) -> Any:
         """Extract native Series, broadcasting to `len(self)` if necessary."""
         ...
+
+    @staticmethod
+    def _numpy_column_names(
+        data: _2DArray, columns: Sequence[str] | None, /
+    ) -> list[str]:
+        return list(columns or (f"column_{x}" for x in range(data.shape[1])))
diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py
@@ -6,14 +6,18 @@
 from typing import Container
 from typing import Iterable
 from typing import Literal
+from typing import Mapping
 from typing import Protocol
+from typing import Sequence
+from typing import overload
 
 from narwhals._compliant.typing import CompliantExprT
 from narwhals._compliant.typing import CompliantFrameT
 from narwhals._compliant.typing import DepthTrackingExprT
 from narwhals._compliant.typing import EagerDataFrameT
 from narwhals._compliant.typing import EagerExprT
 from narwhals._compliant.typing import EagerSeriesT
+from narwhals.dependencies import is_numpy_array_2d
 from narwhals.utils import exclude_column_names
 from narwhals.utils import get_column_names
 from narwhals.utils import passthrough_column_names
@@ -25,6 +29,9 @@
     from narwhals._compliant.when_then import CompliantWhen
     from narwhals._compliant.when_then import EagerWhen
     from narwhals.dtypes import DType
+    from narwhals.schema import Schema
+    from narwhals.typing import Into1DArray
+    from narwhals.typing import _2DArray
     from narwhals.utils import Implementation
     from narwhals.utils import Version
 
@@ -109,8 +116,36 @@ class EagerNamespace(
     DepthTrackingNamespace[EagerDataFrameT, EagerExprT],
     Protocol[EagerDataFrameT, EagerSeriesT, EagerExprT],
 ):
+    @property
+    def _dataframe(self) -> type[EagerDataFrameT]: ...
     @property
     def _series(self) -> type[EagerSeriesT]: ...
     def when(
         self, predicate: EagerExprT
     ) -> EagerWhen[EagerDataFrameT, EagerSeriesT, EagerExprT, Incomplete]: ...
+
+    @overload
+    def from_numpy(
+        self,
+        data: Into1DArray,
+        /,
+        schema: None = ...,
+    ) -> EagerSeriesT: ...
+
+    @overload
+    def from_numpy(
+        self,
+        data: _2DArray,
+        /,
+        schema: Mapping[str, DType] | Schema | Sequence[str] | None,
+    ) -> EagerDataFrameT: ...
+
+    def from_numpy(
+        self,
+        data: Into1DArray | _2DArray,
+        /,
+        schema: Mapping[str, DType] | Schema | Sequence[str] | None = None,
+    ) -> EagerDataFrameT | EagerSeriesT:
+        if is_numpy_array_2d(data):
+            return self._dataframe.from_numpy(data, schema=schema, context=self)
+        return self._series.from_numpy(data, context=self)
diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
@@ -2,6 +2,8 @@
 
 from typing import TYPE_CHECKING
 from typing import Any
+from typing import Callable
+from typing import Iterable
 from typing import Iterator
 from typing import Literal
 from typing import Mapping
@@ -17,6 +19,7 @@
 from narwhals._pandas_like.utils import align_series_full_broadcast
 from narwhals._pandas_like.utils import check_column_names_are_unique
 from narwhals._pandas_like.utils import convert_str_slice_to_int_slice
+from narwhals._pandas_like.utils import get_dtype_backend
 from narwhals._pandas_like.utils import horizontal_concat
 from narwhals._pandas_like.utils import native_to_narwhals_dtype
 from narwhals._pandas_like.utils import object_native_to_narwhals_dtype
@@ -46,17 +49,23 @@
     import pandas as pd
     import polars as pl
     from typing_extensions import Self
+    from typing_extensions import TypeAlias
 
     from narwhals._pandas_like.expr import PandasLikeExpr
     from narwhals._pandas_like.group_by import PandasLikeGroupBy
     from narwhals._pandas_like.namespace import PandasLikeNamespace
     from narwhals.dtypes import DType
+    from narwhals.schema import Schema
     from narwhals.typing import CompliantDataFrame
     from narwhals.typing import CompliantLazyFrame
+    from narwhals.typing import DTypeBackend
     from narwhals.typing import SizeUnit
     from narwhals.typing import _1DArray
     from narwhals.typing import _2DArray
     from narwhals.utils import Version
+    from narwhals.utils import _FullContext
+
+    Constructor: TypeAlias = Callable[..., pd.DataFrame]
 
 
 CLASSICAL_NUMPY_DTYPES: frozenset[np.dtype[Any]] = frozenset(
@@ -104,6 +113,37 @@ def __init__(
         if validate_column_names:
             check_column_names_are_unique(native_dataframe.columns)
 
+    @classmethod
+    def from_numpy(
+        cls,
+        data: _2DArray,
+        /,
+        *,
+        context: _FullContext,
+        schema: Mapping[str, DType] | Schema | Sequence[str] | None,
+    ) -> Self:
+        from narwhals.schema import Schema
+
+        implementation = context._implementation
+        DataFrame: Constructor = implementation.to_native_namespace().DataFrame  # noqa: N806
+        if isinstance(schema, (Mapping, Schema)):
+            it: Iterable[DTypeBackend] = (
+                get_dtype_backend(native_type, implementation)
+                for native_type in schema.values()
+            )
+            native = DataFrame(data, columns=schema.keys()).astype(
+                Schema(schema).to_pandas(it)
+            )
+        else:
+            native = DataFrame(data, columns=cls._numpy_column_names(data, schema))
+        return cls(
+            native,
+            implementation=implementation,
+            backend_version=context._backend_version,
+            version=context._version,
+            validate_column_names=True,
+        )
+
     def __narwhals_dataframe__(self: Self) -> Self:
         return self
 

diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py
@@ -34,6 +34,10 @@
 class PandasLikeNamespace(
     EagerNamespace[PandasLikeDataFrame, PandasLikeSeries, PandasLikeExpr]
 ):
+    @property
+    def _dataframe(self) -> type[PandasLikeDataFrame]:
+        return PandasLikeDataFrame
+
     @property
     def _expr(self) -> type[PandasLikeExpr]:
         return PandasLikeExpr

diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py
@@ -36,10 +36,12 @@
     from narwhals._polars.group_by import PolarsLazyGroupBy
     from narwhals._polars.series import PolarsSeries
     from narwhals.dtypes import DType
+    from narwhals.schema import Schema
     from narwhals.typing import CompliantDataFrame
     from narwhals.typing import CompliantLazyFrame
     from narwhals.typing import _2DArray
     from narwhals.utils import Version
+    from narwhals.utils import _FullContext
 
     T = TypeVar("T")
     R = TypeVar("R")
@@ -92,6 +94,27 @@ def __init__(
         self._version = version
         validate_backend_version(self._implementation, self._backend_version)
 
+    @classmethod
+    def from_numpy(
+        cls,
+        data: _2DArray,
+        /,
+        *,
+        context: _FullContext,  # NOTE: Maybe only `Implementation`?
+        schema: Mapping[str, DType] | Schema | Sequence[str] | None,
+    ) -> Self:
+        from narwhals.schema import Schema
+
+        pl_schema = (
+            Schema(schema).to_polars()
+            if isinstance(schema, (Mapping, Schema))
+            else schema
+        )
+        native = pl.from_numpy(data, pl_schema)
+        return cls(
+            native, backend_version=context._backend_version, version=context._version
+        )
+
     @property
     def native(self) -> pl.DataFrame:
         return self._native_frame