Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
154dc54
feat: Define `CompliantDataFrame.from_numpy`
dangotbanned Mar 23, 2025
3d533a0
feat: Add `PolarsDataFrame.from_numpy`
dangotbanned Mar 23, 2025
b333351
fix: Align `to_numpy` signatures
dangotbanned Mar 23, 2025
4805551
feat: Add `ArrowDataFrame.from_numpy`
dangotbanned Mar 23, 2025
c689161
refactor: Simplify `polars` impl
dangotbanned Mar 23, 2025
b63fa86
feat: Add `PandasLikeDataFrame.from_numpy`
dangotbanned Mar 24, 2025
d0e36f3
fix: Use `TypeVar` defaults correctly?
dangotbanned Mar 24, 2025
1591c2d
feat: Add `Implementation._to_compliant_namespace`
dangotbanned Mar 24, 2025
c5500a3
chore: Make `PolarsNamespace.concat` compliant
dangotbanned Mar 24, 2025
39382c5
chore(typing): Fill in more `__getattr__` gaps
dangotbanned Mar 24, 2025
7cd9ca7
chore: start aligning `PolarsSelectorNamespace`
dangotbanned Mar 24, 2025
aca796a
chore(typing): Pragmatic solution for `PolarsNamespace.selectors`
dangotbanned Mar 24, 2025
7101b62
feat(typing): Add `EagerNamespace._dataframe`
dangotbanned Mar 24, 2025
91bd274
feat: Add `PolarsNamespace._dataframe`
dangotbanned Mar 24, 2025
7f9fb6b
feat: Implement `EagerNamespace.from_numpy`
dangotbanned Mar 24, 2025
0c6a484
feat: Add `PolarsNamespace.from_numpy`
dangotbanned Mar 24, 2025
4f99e2d
chore: add `version` to `_from_numpy_impl`
dangotbanned Mar 24, 2025
14fe87f
feat(typing): `_to_compliant_namespace` overloads
dangotbanned Mar 24, 2025
7749bcd
feat(typing): Add `is_(eager|lazy)_allowed` guards
dangotbanned Mar 24, 2025
d52b405
refactor: Rewrite `from_numpy` using new protocols
dangotbanned Mar 24, 2025
bbc8a3d
remove duplicate overload
dangotbanned Mar 24, 2025
b3c791f
ignore `mypy` for now
dangotbanned Mar 24, 2025
2d49da4
Merge remote-tracking branch 'upstream/main' into from-numpy-2d-ns
dangotbanned Mar 24, 2025
b076950
fix: Un-confuse `mypy`
dangotbanned Mar 24, 2025
36c6b19
refactor(typing): Make aliases readable
dangotbanned Mar 24, 2025
50a39df
refactor: validate `schema` before passing to `Compliant*`
dangotbanned Mar 24, 2025
0273426
test: remove `xfail` from lazy backends
dangotbanned Mar 24, 2025
4e8591a
refactor: Add `EagerDataFrame._numpy_column_names`
dangotbanned Mar 24, 2025
694960d
refresh ci
dangotbanned Mar 24, 2025
f07cf38
ignore coverage on future stuff
dangotbanned Mar 24, 2025
31a5259
Merge remote-tracking branch 'upstream/main' into from-numpy-2d-ns
dangotbanned Mar 24, 2025
7a137b3
Merge branch 'main' into from-numpy-2d-ns
dangotbanned Mar 25, 2025
dabe5d4
Merge branch 'main' into from-numpy-2d-ns
dangotbanned Mar 25, 2025
8770d37
Merge branch 'main' into from-numpy-2d-ns
dangotbanned Mar 25, 2025
60a5469
Merge branch 'main' into from-numpy-2d-ns
dangotbanned Mar 26, 2025
fc74ba1
Merge branch 'main' into from-numpy-2d-ns
dangotbanned Mar 26, 2025
0e67a1e
fix(typing): Satisfy `pyright` in `PolarsNamespace.concat`
dangotbanned Mar 26, 2025
4f1b172
Merge remote-tracking branch 'upstream/main' into from-numpy-2d-ns
dangotbanned Mar 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@
from narwhals._arrow.typing import Mask # type: ignore[attr-defined]
from narwhals._arrow.typing import Order # type: ignore[attr-defined]
from narwhals.dtypes import DType
from narwhals.schema import Schema
from narwhals.typing import CompliantDataFrame
from narwhals.typing import CompliantLazyFrame
from narwhals.typing import SizeUnit
from narwhals.typing import _1DArray
from narwhals.typing import _2DArray
from narwhals.utils import Version
from narwhals.utils import _FullContext

JoinType: TypeAlias = Literal[
"left semi",
Expand Down Expand Up @@ -91,6 +93,29 @@ def __init__(
self._version = version
validate_backend_version(self._implementation, self._backend_version)

@classmethod
def from_numpy(
cls,
data: _2DArray,
/,
*,
context: _FullContext,
schema: Mapping[str, DType] | Schema | Sequence[str] | None,
) -> Self:
from narwhals.schema import Schema

arrays = [pa.array(val) for val in data.T]
if isinstance(schema, (Mapping, Schema)):
native = pa.Table.from_arrays(arrays, schema=Schema(schema).to_arrow())
else:
native = pa.Table.from_arrays(arrays, cls._numpy_column_names(data, schema))
return cls(
native,
backend_version=context._backend_version,
version=context._version,
validate_column_names=True,
)

def __narwhals_namespace__(self: Self) -> ArrowNamespace:
from narwhals._arrow.namespace import ArrowNamespace

Expand Down Expand Up @@ -511,7 +536,7 @@ def to_polars(self: Self) -> pl.DataFrame:

return pl.from_arrow(self.native) # type: ignore[return-value]

def to_numpy(self: Self) -> _2DArray:
def to_numpy(self: Self, dtype: Any = None, *, copy: bool | None = None) -> _2DArray:
import numpy as np # ignore-banned-import

arr: Any = np.column_stack([col.to_numpy() for col in self.native.columns])
Expand Down
4 changes: 4 additions & 0 deletions narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@


class ArrowNamespace(EagerNamespace[ArrowDataFrame, ArrowSeries, ArrowExpr]):
@property
def _dataframe(self) -> type[ArrowDataFrame]:
return ArrowDataFrame

@property
def _expr(self) -> type[ArrowExpr]:
return ArrowExpr
Expand Down
23 changes: 21 additions & 2 deletions narwhals/_compliant/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from narwhals._compliant.typing import EagerSeriesT
from narwhals._compliant.typing import NativeFrameT_co
from narwhals._expression_parsing import evaluate_output_names_and_aliases
from narwhals._translate import NumpyConvertible
from narwhals.utils import Version
from narwhals.utils import _StoresNative
from narwhals.utils import deprecated
Expand All @@ -34,9 +35,11 @@

from narwhals._compliant.group_by import CompliantGroupBy
from narwhals.dtypes import DType
from narwhals.schema import Schema
from narwhals.typing import SizeUnit
from narwhals.typing import _2DArray
from narwhals.utils import Implementation
from narwhals.utils import _FullContext

Incomplete: TypeAlias = Any

Expand All @@ -46,6 +49,7 @@


class CompliantDataFrame(
NumpyConvertible["_2DArray", "_2DArray"],
_StoresNative[NativeFrameT_co],
Sized,
Protocol[CompliantSeriesT, CompliantExprT_contra, NativeFrameT_co],
Expand All @@ -57,6 +61,15 @@ class CompliantDataFrame(

def __narwhals_dataframe__(self) -> Self: ...
def __narwhals_namespace__(self) -> Any: ...
@classmethod
def from_numpy(
cls,
data: _2DArray,
/,
*,
context: _FullContext,
schema: Mapping[str, DType] | Schema | Sequence[str] | None,
) -> Self: ...
def __array__(self, dtype: Any, *, copy: bool | None) -> _2DArray: ...
def __getitem__(self, item: Any) -> CompliantSeriesT | Self: ...
def simple_select(self, *column_names: str) -> Self:
Expand Down Expand Up @@ -143,7 +156,6 @@ def sort(
) -> Self: ...
def tail(self, n: int) -> Self: ...
def to_arrow(self) -> pa.Table: ...
def to_numpy(self) -> _2DArray: ...
def to_pandas(self) -> pd.DataFrame: ...
def to_polars(self) -> pl.DataFrame: ...
@overload
Expand Down Expand Up @@ -286,7 +298,8 @@ def _evaluate_expr(self, expr: EagerExprT_contra, /) -> EagerSeriesT:
return result[0]

def _evaluate_into_exprs(self, *exprs: EagerExprT_contra) -> Sequence[EagerSeriesT]:
return list(chain.from_iterable(self._evaluate_into_expr(expr) for expr in exprs))
# NOTE: Ignore is to avoid an intermittent false positive
return list(chain.from_iterable(self._evaluate_into_expr(expr) for expr in exprs)) # pyright: ignore[reportArgumentType]

def _evaluate_into_expr(self, expr: EagerExprT_contra, /) -> Sequence[EagerSeriesT]:
"""Return list of raw columns.
Expand All @@ -308,3 +321,9 @@ def _evaluate_into_expr(self, expr: EagerExprT_contra, /) -> Sequence[EagerSerie
def _extract_comparand(self, other: EagerSeriesT, /) -> Any:
"""Extract native Series, broadcasting to `len(self)` if necessary."""
...

@staticmethod
def _numpy_column_names(
data: _2DArray, columns: Sequence[str] | None, /
) -> list[str]:
return list(columns or (f"column_{x}" for x in range(data.shape[1])))
35 changes: 35 additions & 0 deletions narwhals/_compliant/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,18 @@
from typing import Container
from typing import Iterable
from typing import Literal
from typing import Mapping
from typing import Protocol
from typing import Sequence
from typing import overload

from narwhals._compliant.typing import CompliantExprT
from narwhals._compliant.typing import CompliantFrameT
from narwhals._compliant.typing import DepthTrackingExprT
from narwhals._compliant.typing import EagerDataFrameT
from narwhals._compliant.typing import EagerExprT
from narwhals._compliant.typing import EagerSeriesT
from narwhals.dependencies import is_numpy_array_2d
from narwhals.utils import exclude_column_names
from narwhals.utils import get_column_names
from narwhals.utils import passthrough_column_names
Expand All @@ -25,6 +29,9 @@
from narwhals._compliant.when_then import CompliantWhen
from narwhals._compliant.when_then import EagerWhen
from narwhals.dtypes import DType
from narwhals.schema import Schema
from narwhals.typing import Into1DArray
from narwhals.typing import _2DArray
from narwhals.utils import Implementation
from narwhals.utils import Version

Expand Down Expand Up @@ -109,8 +116,36 @@ class EagerNamespace(
DepthTrackingNamespace[EagerDataFrameT, EagerExprT],
Protocol[EagerDataFrameT, EagerSeriesT, EagerExprT],
):
@property
def _dataframe(self) -> type[EagerDataFrameT]: ...
@property
def _series(self) -> type[EagerSeriesT]: ...
def when(
self, predicate: EagerExprT
) -> EagerWhen[EagerDataFrameT, EagerSeriesT, EagerExprT, Incomplete]: ...

@overload
def from_numpy(
self,
data: Into1DArray,
/,
schema: None = ...,
) -> EagerSeriesT: ...

@overload
def from_numpy(
self,
data: _2DArray,
/,
schema: Mapping[str, DType] | Schema | Sequence[str] | None,
) -> EagerDataFrameT: ...

def from_numpy(
self,
data: Into1DArray | _2DArray,
/,
schema: Mapping[str, DType] | Schema | Sequence[str] | None = None,
) -> EagerDataFrameT | EagerSeriesT:
if is_numpy_array_2d(data):
return self._dataframe.from_numpy(data, schema=schema, context=self)
return self._series.from_numpy(data, context=self)
40 changes: 40 additions & 0 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Iterable
from typing import Iterator
from typing import Literal
from typing import Mapping
Expand All @@ -17,6 +19,7 @@
from narwhals._pandas_like.utils import align_series_full_broadcast
from narwhals._pandas_like.utils import check_column_names_are_unique
from narwhals._pandas_like.utils import convert_str_slice_to_int_slice
from narwhals._pandas_like.utils import get_dtype_backend
from narwhals._pandas_like.utils import horizontal_concat
from narwhals._pandas_like.utils import native_to_narwhals_dtype
from narwhals._pandas_like.utils import object_native_to_narwhals_dtype
Expand Down Expand Up @@ -46,17 +49,23 @@
import pandas as pd
import polars as pl
from typing_extensions import Self
from typing_extensions import TypeAlias

from narwhals._pandas_like.expr import PandasLikeExpr
from narwhals._pandas_like.group_by import PandasLikeGroupBy
from narwhals._pandas_like.namespace import PandasLikeNamespace
from narwhals.dtypes import DType
from narwhals.schema import Schema
from narwhals.typing import CompliantDataFrame
from narwhals.typing import CompliantLazyFrame
from narwhals.typing import DTypeBackend
from narwhals.typing import SizeUnit
from narwhals.typing import _1DArray
from narwhals.typing import _2DArray
from narwhals.utils import Version
from narwhals.utils import _FullContext

Constructor: TypeAlias = Callable[..., pd.DataFrame]


CLASSICAL_NUMPY_DTYPES: frozenset[np.dtype[Any]] = frozenset(
Expand Down Expand Up @@ -104,6 +113,37 @@ def __init__(
if validate_column_names:
check_column_names_are_unique(native_dataframe.columns)

@classmethod
def from_numpy(
cls,
data: _2DArray,
/,
*,
context: _FullContext,
schema: Mapping[str, DType] | Schema | Sequence[str] | None,
) -> Self:
from narwhals.schema import Schema

implementation = context._implementation
DataFrame: Constructor = implementation.to_native_namespace().DataFrame # noqa: N806
if isinstance(schema, (Mapping, Schema)):
it: Iterable[DTypeBackend] = (
get_dtype_backend(native_type, implementation)
for native_type in schema.values()
)
native = DataFrame(data, columns=schema.keys()).astype(
Schema(schema).to_pandas(it)
)
else:
native = DataFrame(data, columns=cls._numpy_column_names(data, schema))
return cls(
native,
implementation=implementation,
backend_version=context._backend_version,
version=context._version,
validate_column_names=True,
)

def __narwhals_dataframe__(self: Self) -> Self:
return self

Expand Down
4 changes: 4 additions & 0 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
class PandasLikeNamespace(
EagerNamespace[PandasLikeDataFrame, PandasLikeSeries, PandasLikeExpr]
):
@property
def _dataframe(self) -> type[PandasLikeDataFrame]:
return PandasLikeDataFrame

@property
def _expr(self) -> type[PandasLikeExpr]:
return PandasLikeExpr
Expand Down
23 changes: 23 additions & 0 deletions narwhals/_polars/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@
from narwhals._polars.group_by import PolarsLazyGroupBy
from narwhals._polars.series import PolarsSeries
from narwhals.dtypes import DType
from narwhals.schema import Schema
from narwhals.typing import CompliantDataFrame
from narwhals.typing import CompliantLazyFrame
from narwhals.typing import _2DArray
from narwhals.utils import Version
from narwhals.utils import _FullContext

T = TypeVar("T")
R = TypeVar("R")
Expand Down Expand Up @@ -92,6 +94,27 @@ def __init__(
self._version = version
validate_backend_version(self._implementation, self._backend_version)

@classmethod
def from_numpy(
cls,
data: _2DArray,
/,
*,
context: _FullContext, # NOTE: Maybe only `Implementation`?
schema: Mapping[str, DType] | Schema | Sequence[str] | None,
) -> Self:
from narwhals.schema import Schema

pl_schema = (
Schema(schema).to_polars()
if isinstance(schema, (Mapping, Schema))
else schema
)
native = pl.from_numpy(data, pl_schema)
return cls(
native, backend_version=context._backend_version, version=context._version
)

@property
def native(self) -> pl.DataFrame:
return self._native_frame
Expand Down
Loading
Loading