Skip to content

Commit 42850e3

Browse files
committed
refactor(typing): Add _native.py w/ protocols, aliases
Follow-up to (#3016 (comment))
1 parent 2869349 commit 42850e3

File tree

1 file changed

+141
-0
lines changed

1 file changed

+141
-0
lines changed

narwhals/_native.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
from __future__ import annotations
2+
3+
from collections.abc import Collection, Iterable, Sized
4+
from typing import TYPE_CHECKING, Any, Protocol
5+
6+
if TYPE_CHECKING:
7+
import duckdb
8+
import pandas as pd
9+
import polars as pl
10+
import pyarrow as pa
11+
from sqlframe.base.dataframe import BaseDataFrame as _BaseDataFrame
12+
from typing_extensions import Self, TypeAlias
13+
14+
SQLFrameDataFrame = _BaseDataFrame[Any, Any, Any, Any, Any]
15+
16+
__all__ = [
17+
"NativeAny",
18+
"NativeArrow",
19+
"NativeCuDF",
20+
"NativeDask",
21+
"NativeDataFrame",
22+
"NativeDuckDB",
23+
"NativeFrame",
24+
"NativeIbis",
25+
"NativeKnown",
26+
"NativeLazyFrame",
27+
"NativeModin",
28+
"NativePandas",
29+
"NativePandasLike",
30+
"NativePandasLikeDataFrame",
31+
"NativePandasLikeSeries",
32+
"NativePolars",
33+
"NativePySpark",
34+
"NativePySparkConnect",
35+
"NativeSQLFrame",
36+
"NativeSeries",
37+
"NativeSparkLike",
38+
"NativeUnknown",
39+
]
40+
41+
42+
Incomplete: TypeAlias = Any
43+
44+
45+
# All dataframes supported by Narwhals have a
46+
# `columns` property. Their similarities don't extend
47+
# _that_ much further unfortunately...
48+
class NativeFrame(Protocol):
49+
@property
50+
def columns(self) -> Any: ...
51+
def join(self, *args: Any, **kwargs: Any) -> Any: ...
52+
53+
54+
class NativeDataFrame(Sized, NativeFrame, Protocol): ...
55+
56+
57+
class NativeLazyFrame(NativeFrame, Protocol):
58+
def explain(self, *args: Any, **kwargs: Any) -> Any: ...
59+
60+
61+
class NativeSeries(Sized, Iterable[Any], Protocol):
62+
def filter(self, *args: Any, **kwargs: Any) -> Any: ...
63+
64+
65+
class _BasePandasLike(Sized, Protocol):
66+
index: Any
67+
"""`mypy` doesn't like the asymmetric `property` setter in `pandas`."""
68+
69+
def __getitem__(self, key: Any, /) -> Any: ...
70+
def __mul__(self, other: float | Collection[float] | Self, /) -> Self: ...
71+
def __floordiv__(self, other: float | Collection[float] | Self, /) -> Self: ...
72+
@property
73+
def loc(self) -> Any: ...
74+
@property
75+
def shape(self) -> tuple[int, ...]: ...
76+
def set_axis(self, labels: Any, *, axis: Any = ..., copy: bool = ...) -> Self: ...
77+
def copy(self, deep: bool = ...) -> Self: ... # noqa: FBT001
78+
def rename(self, *args: Any, **kwds: Any) -> Self | Incomplete:
79+
"""`mypy` & `pyright` disagree on overloads.
80+
81+
`Incomplete` used to fix [more important issue](https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296139744).
82+
"""
83+
84+
85+
class _BasePandasLikeFrame(NativeDataFrame, _BasePandasLike, Protocol): ...
86+
87+
88+
class _BasePandasLikeSeries(NativeSeries, _BasePandasLike, Protocol):
89+
def where(self, cond: Any, other: Any = ..., /) -> Self | Incomplete: ...
90+
91+
92+
class NativeDask(NativeLazyFrame, Protocol):
93+
_partition_type: type[pd.DataFrame]
94+
95+
96+
class _CuDFDataFrame(_BasePandasLikeFrame, Protocol):
97+
def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ...
98+
99+
100+
class _CuDFSeries(_BasePandasLikeSeries, Protocol):
101+
def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ...
102+
103+
104+
class NativeIbis(Protocol):
105+
def sql(self, *args: Any, **kwds: Any) -> Any: ...
106+
def __pyarrow_result__(self, *args: Any, **kwds: Any) -> Any: ...
107+
def __pandas_result__(self, *args: Any, **kwds: Any) -> Any: ...
108+
def __polars_result__(self, *args: Any, **kwds: Any) -> Any: ...
109+
110+
111+
class _ModinDataFrame(_BasePandasLikeFrame, Protocol):
112+
_pandas_class: type[pd.DataFrame]
113+
114+
115+
class _ModinSeries(_BasePandasLikeSeries, Protocol):
116+
_pandas_class: type[pd.Series[Any]]
117+
118+
119+
# NOTE: Using `pyspark.sql.DataFrame` creates false positives in overloads when not installed
120+
class _PySparkDataFrame(NativeLazyFrame, Protocol):
121+
# Arbitrary method that `sqlframe` doesn't have and unlikely to appear anywhere else
122+
# https://github.com/apache/spark/blob/8530444e25b83971da4314c608aa7d763adeceb3/python/pyspark/sql/dataframe.py#L4875
123+
def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # noqa: N802
124+
125+
126+
NativePolars: TypeAlias = "pl.DataFrame | pl.LazyFrame | pl.Series"
127+
NativeArrow: TypeAlias = "pa.Table | pa.ChunkedArray[Any]"
128+
NativeDuckDB: TypeAlias = "duckdb.DuckDBPyRelation"
129+
NativePandas: TypeAlias = "pd.DataFrame | pd.Series[Any]"
130+
NativeModin: TypeAlias = "_ModinDataFrame | _ModinSeries"
131+
NativeCuDF: TypeAlias = "_CuDFDataFrame | _CuDFSeries"
132+
NativePandasLikeSeries: TypeAlias = "pd.Series[Any] | _CuDFSeries | _ModinSeries"
133+
NativePandasLikeDataFrame: TypeAlias = "pd.DataFrame | _CuDFDataFrame | _ModinDataFrame"
134+
NativePandasLike: TypeAlias = "NativePandasLikeDataFrame | NativePandasLikeSeries"
135+
NativeSQLFrame: TypeAlias = "_BaseDataFrame[Any, Any, Any, Any, Any]"
136+
NativePySpark: TypeAlias = _PySparkDataFrame
137+
NativePySparkConnect: TypeAlias = _PySparkDataFrame
138+
NativeSparkLike: TypeAlias = "NativeSQLFrame | NativePySpark | NativePySparkConnect"
139+
NativeKnown: TypeAlias = "NativePolars | NativeArrow | NativePandasLike | NativeSparkLike | NativeDuckDB | NativeDask | NativeIbis"
140+
NativeUnknown: TypeAlias = "NativeDataFrame | NativeSeries | NativeLazyFrame"
141+
NativeAny: TypeAlias = "NativeKnown | NativeUnknown"

0 commit comments

Comments
 (0)