Skip to content

Commit 54bf3d3

Browse files
authored
refactor: Cache, reuse Implementation._backend_version (#2764)
* docs: Remove outdated doc I shoul've caught that in review (#2620 (comment)) * perf: Cache `Implementation._backend_version` Mentioned in - #2620 (comment) - #1657 (reply in thread) Now that validation happens **within** that call, we can remove it from every `Series`, `DataFrame`, `LazyFrame` constructor and stop passing it around * perf: Remove `validate_backend_version` * test: Update `test_to_native_namespace_min_version` * refactor: Port over `EagerDataFrame` `lazy()` and `collect()` between backends is really the only place we might need to preserve the current checks * refactor: Port over `EagerSeries` * refactor: Port `Eager(Expr|Then)`, `(Arrow|Pandas)Selector` * refactor: Port `EagerNamespace` * refactor: Port `EagerSeriesNamespace` * refactor: No more `backend_version` eager utils! * docs: Fix `how_it_works.md` We don't need the version anymore https://github.com/narwhals-dev/narwhals/actions/runs/15982913249/job/45081217959?pr=2764 * revert: Remove unused `EagerSeries._validate_backend_version` https://github.com/narwhals-dev/narwhals/actions/runs/15982913252/job/45081168507 * ci: fix pandas cov https://github.com/narwhals-dev/narwhals/actions/runs/15982913252/job/45081168507 * fix cov pattern https://github.com/narwhals-dev/narwhals/actions/runs/15983209178/job/45082089100?pr=2764 * refactor: Port `LazyExpr` + friends * refactor: Port `PolarsExpr`, `CompliantExpr` * refactor: Port `PolarsSeries`, `_polars.utils` * refactor: Port `_dask.utils` Now `mypy` understands the same error as `pyright` 😏 * refactor: Port `CompliantNamespace` + friends * refactor: Port `Compliant(Data|Lazy)Frame` + friends * refactor: Move `_validate_backend_version` into a protocol * chore: `polars` cov * feat: Support `ibis` in `.lazy()`? - Seemed the easiest way to get coverage - Will split into another PR - https://ibis-project.org/reference/expression-tables#ibis.memtable * test: `from_arrow` coverage * ignore spark cov * chore: `get_dask` coverage * fix imports and `import_check` Was broken on windows > UnicodeDecodeError: 'charmap' codec can't decode byte 0x90 in position 21406: character maps to <undefined> * revert: just ignore `dask` cov - https://github.com/narwhals-dev/narwhals/actions/runs/16004869123/job/45148957046 - 2de5910 * refactor: Cleaning up * refactor: Replace more `parse_version`
1 parent 3db4ab4 commit 54bf3d3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+536
-1015
lines changed

docs/how_it_works.md

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ from narwhals._utils import parse_version, Version
7474

7575
pn = PandasLikeNamespace(
7676
implementation=Implementation.PANDAS,
77-
backend_version=parse_version(pd.__version__),
7877
version=Version.MAIN,
7978
)
8079
print(nw.col("a")._to_compliant_expr(pn))
@@ -101,15 +100,13 @@ import pandas as pd
101100

102101
pn = PandasLikeNamespace(
103102
implementation=Implementation.PANDAS,
104-
backend_version=parse_version(pd.__version__),
105103
version=Version.MAIN,
106104
)
107105

108106
df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
109107
df = PandasLikeDataFrame(
110108
df_pd,
111109
implementation=Implementation.PANDAS,
112-
backend_version=parse_version(pd.__version__),
113110
version=Version.MAIN,
114111
validate_column_names=True,
115112
)
@@ -199,7 +196,6 @@ import pandas as pd
199196

200197
pn = PandasLikeNamespace(
201198
implementation=Implementation.PANDAS,
202-
backend_version=parse_version(pd.__version__),
203199
version=Version.MAIN,
204200
)
205201

@@ -214,7 +210,6 @@ backend, and it does so by passing a Narwhals-compliant namespace to `nw.Expr._t
214210
```python exec="1" result="python" session="pandas_api_mapping" source="above"
215211
pn = PandasLikeNamespace(
216212
implementation=Implementation.PANDAS,
217-
backend_version=parse_version(pd.__version__),
218213
version=Version.MAIN,
219214
)
220215
expr = (nw.col("a") + 1)._to_compliant_expr(pn)

narwhals/_arrow/dataframe.py

Lines changed: 23 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,8 @@
1919
generate_temporary_column_name,
2020
not_implemented,
2121
parse_columns_to_drop,
22-
parse_version,
2322
scale_bytes,
2423
supports_arrow_c_stream,
25-
validate_backend_version,
2624
)
2725
from narwhals.dependencies import is_numpy_array_1d
2826
from narwhals.exceptions import ShapeError
@@ -46,7 +44,7 @@
4644
)
4745
from narwhals._compliant.typing import CompliantDataFrameAny, CompliantLazyFrameAny
4846
from narwhals._translate import IntoArrowTable
49-
from narwhals._utils import Version, _FullContext
47+
from narwhals._utils import Version, _LimitedContext
5048
from narwhals.dtypes import DType
5149
from narwhals.schema import Schema
5250
from narwhals.typing import (
@@ -77,25 +75,26 @@
7775
class ArrowDataFrame(
7876
EagerDataFrame["ArrowSeries", "ArrowExpr", "pa.Table", "ChunkedArrayAny"]
7977
):
78+
_implementation = Implementation.PYARROW
79+
8080
def __init__(
8181
self,
8282
native_dataframe: pa.Table,
8383
*,
84-
backend_version: tuple[int, ...],
8584
version: Version,
8685
validate_column_names: bool,
86+
validate_backend_version: bool = False,
8787
) -> None:
8888
if validate_column_names:
8989
check_column_names_are_unique(native_dataframe.column_names)
90+
if validate_backend_version:
91+
self._validate_backend_version()
9092
self._native_frame = native_dataframe
91-
self._implementation = Implementation.PYARROW
92-
self._backend_version = backend_version
9393
self._version = version
94-
validate_backend_version(self._implementation, self._backend_version)
9594

9695
@classmethod
97-
def from_arrow(cls, data: IntoArrowTable, /, *, context: _FullContext) -> Self:
98-
backend_version = context._backend_version
96+
def from_arrow(cls, data: IntoArrowTable, /, *, context: _LimitedContext) -> Self:
97+
backend_version = context._implementation._backend_version()
9998
if cls._is_native(data):
10099
native = data
101100
elif backend_version >= (14,) or isinstance(data, Collection):
@@ -114,7 +113,7 @@ def from_dict(
114113
data: Mapping[str, Any],
115114
/,
116115
*,
117-
context: _FullContext,
116+
context: _LimitedContext,
118117
schema: Mapping[str, DType] | Schema | None,
119118
) -> Self:
120119
from narwhals.schema import Schema
@@ -128,21 +127,16 @@ def _is_native(obj: pa.Table | Any) -> TypeIs[pa.Table]:
128127
return isinstance(obj, pa.Table)
129128

130129
@classmethod
131-
def from_native(cls, data: pa.Table, /, *, context: _FullContext) -> Self:
132-
return cls(
133-
data,
134-
backend_version=context._backend_version,
135-
version=context._version,
136-
validate_column_names=True,
137-
)
130+
def from_native(cls, data: pa.Table, /, *, context: _LimitedContext) -> Self:
131+
return cls(data, version=context._version, validate_column_names=True)
138132

139133
@classmethod
140134
def from_numpy(
141135
cls,
142136
data: _2DArray,
143137
/,
144138
*,
145-
context: _FullContext,
139+
context: _LimitedContext,
146140
schema: Mapping[str, DType] | Schema | Sequence[str] | None,
147141
) -> Self:
148142
from narwhals.schema import Schema
@@ -157,9 +151,7 @@ def from_numpy(
157151
def __narwhals_namespace__(self) -> ArrowNamespace:
158152
from narwhals._arrow.namespace import ArrowNamespace
159153

160-
return ArrowNamespace(
161-
backend_version=self._backend_version, version=self._version
162-
)
154+
return ArrowNamespace(version=self._version)
163155

164156
def __native_namespace__(self) -> ModuleType:
165157
if self._implementation is Implementation.PYARROW:
@@ -175,19 +167,11 @@ def __narwhals_lazyframe__(self) -> Self:
175167
return self
176168

177169
def _with_version(self, version: Version) -> Self:
178-
return self.__class__(
179-
self.native,
180-
backend_version=self._backend_version,
181-
version=version,
182-
validate_column_names=False,
183-
)
170+
return self.__class__(self.native, version=version, validate_column_names=False)
184171

185172
def _with_native(self, df: pa.Table, *, validate_column_names: bool = True) -> Self:
186173
return self.__class__(
187-
df,
188-
backend_version=self._backend_version,
189-
version=self._version,
190-
validate_column_names=validate_column_names,
174+
df, version=self._version, validate_column_names=validate_column_names
191175
)
192176

193177
@property
@@ -536,9 +520,7 @@ def lazy(self, *, backend: Implementation | None = None) -> CompliantLazyFrameAn
536520

537521
df = self.native # noqa: F841
538522
return DuckDBLazyFrame(
539-
duckdb.table("df"),
540-
backend_version=parse_version(duckdb),
541-
version=self._version,
523+
duckdb.table("df"), validate_backend_version=True, version=self._version
542524
)
543525
elif backend is Implementation.POLARS:
544526
import polars as pl # ignore-banned-import
@@ -547,18 +529,17 @@ def lazy(self, *, backend: Implementation | None = None) -> CompliantLazyFrameAn
547529

548530
return PolarsLazyFrame(
549531
cast("pl.DataFrame", pl.from_arrow(self.native)).lazy(),
550-
backend_version=parse_version(pl),
532+
validate_backend_version=True,
551533
version=self._version,
552534
)
553535
elif backend is Implementation.DASK:
554-
import dask # ignore-banned-import
555536
import dask.dataframe as dd # ignore-banned-import
556537

557538
from narwhals._dask.dataframe import DaskLazyFrame
558539

559540
return DaskLazyFrame(
560541
dd.from_pandas(self.native.to_pandas()),
561-
backend_version=parse_version(dask),
542+
validate_backend_version=True,
562543
version=self._version,
563544
)
564545
elif backend.is_ibis():
@@ -568,9 +549,10 @@ def lazy(self, *, backend: Implementation | None = None) -> CompliantLazyFrameAn
568549

569550
return IbisLazyFrame(
570551
ibis.memtable(self.native, columns=self.columns),
571-
backend_version=parse_version(ibis),
552+
validate_backend_version=True,
572553
version=self._version,
573554
)
555+
574556
raise AssertionError # pragma: no cover
575557

576558
def collect(
@@ -580,21 +562,16 @@ def collect(
580562
from narwhals._arrow.dataframe import ArrowDataFrame
581563

582564
return ArrowDataFrame(
583-
self.native,
584-
backend_version=self._backend_version,
585-
version=self._version,
586-
validate_column_names=False,
565+
self.native, version=self._version, validate_column_names=False
587566
)
588567

589568
if backend is Implementation.PANDAS:
590-
import pandas as pd # ignore-banned-import
591-
592569
from narwhals._pandas_like.dataframe import PandasLikeDataFrame
593570

594571
return PandasLikeDataFrame(
595572
self.native.to_pandas(),
596573
implementation=Implementation.PANDAS,
597-
backend_version=parse_version(pd),
574+
validate_backend_version=True,
598575
version=self._version,
599576
validate_column_names=False,
600577
)
@@ -606,7 +583,7 @@ def collect(
606583

607584
return PolarsDataFrame(
608585
cast("pl.DataFrame", pl.from_arrow(self.native)),
609-
backend_version=parse_version(pl),
586+
validate_backend_version=True,
610587
version=self._version,
611588
)
612589

narwhals/_arrow/expr.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from narwhals._arrow.namespace import ArrowNamespace
2323
from narwhals._compliant.typing import AliasNames, EvalNames, EvalSeries, ScalarKwargs
2424
from narwhals._expression_parsing import ExprMetadata
25-
from narwhals._utils import Version, _FullContext
25+
from narwhals._utils import Version, _LimitedContext
2626

2727

2828
class ArrowExpr(EagerExpr["ArrowDataFrame", ArrowSeries]):
@@ -36,7 +36,6 @@ def __init__(
3636
function_name: str,
3737
evaluate_output_names: EvalNames[ArrowDataFrame],
3838
alias_output_names: AliasNames | None,
39-
backend_version: tuple[int, ...],
4039
version: Version,
4140
scalar_kwargs: ScalarKwargs | None = None,
4241
implementation: Implementation | None = None,
@@ -47,7 +46,6 @@ def __init__(
4746
self._depth = depth
4847
self._evaluate_output_names = evaluate_output_names
4948
self._alias_output_names = alias_output_names
50-
self._backend_version = backend_version
5149
self._version = version
5250
self._scalar_kwargs = scalar_kwargs or {}
5351
self._metadata: ExprMetadata | None = None
@@ -58,17 +56,14 @@ def from_column_names(
5856
evaluate_column_names: EvalNames[ArrowDataFrame],
5957
/,
6058
*,
61-
context: _FullContext,
59+
context: _LimitedContext,
6260
function_name: str = "",
6361
) -> Self:
6462
def func(df: ArrowDataFrame) -> list[ArrowSeries]:
6563
try:
6664
return [
6765
ArrowSeries(
68-
df.native[column_name],
69-
name=column_name,
70-
backend_version=df._backend_version,
71-
version=df._version,
66+
df.native[column_name], name=column_name, version=df._version
7267
)
7368
for column_name in evaluate_column_names(df)
7469
]
@@ -83,12 +78,11 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
8378
function_name=function_name,
8479
evaluate_output_names=evaluate_column_names,
8580
alias_output_names=None,
86-
backend_version=context._backend_version,
8781
version=context._version,
8882
)
8983

9084
@classmethod
91-
def from_column_indices(cls, *column_indices: int, context: _FullContext) -> Self:
85+
def from_column_indices(cls, *column_indices: int, context: _LimitedContext) -> Self:
9286
def func(df: ArrowDataFrame) -> list[ArrowSeries]:
9387
tbl = df.native
9488
cols = df.columns
@@ -103,16 +97,13 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
10397
function_name="nth",
10498
evaluate_output_names=cls._eval_names_indices(column_indices),
10599
alias_output_names=None,
106-
backend_version=context._backend_version,
107100
version=context._version,
108101
)
109102

110103
def __narwhals_namespace__(self) -> ArrowNamespace:
111104
from narwhals._arrow.namespace import ArrowNamespace
112105

113-
return ArrowNamespace(
114-
backend_version=self._backend_version, version=self._version
115-
)
106+
return ArrowNamespace(version=self._version)
116107

117108
def __narwhals_expr__(self) -> None: ...
118109

@@ -175,7 +166,6 @@ def func(df: ArrowDataFrame) -> Sequence[ArrowSeries]:
175166
function_name=self._function_name + "->over",
176167
evaluate_output_names=self._evaluate_output_names,
177168
alias_output_names=self._alias_output_names,
178-
backend_version=self._backend_version,
179169
version=self._version,
180170
)
181171

narwhals/_arrow/namespace.py

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
class ArrowNamespace(
3232
EagerNamespace[ArrowDataFrame, ArrowSeries, ArrowExpr, pa.Table, "ChunkedArrayAny"]
3333
):
34+
_implementation = Implementation.PYARROW
35+
3436
@property
3537
def _dataframe(self) -> type[ArrowDataFrame]:
3638
return ArrowDataFrame
@@ -43,10 +45,7 @@ def _expr(self) -> type[ArrowExpr]:
4345
def _series(self) -> type[ArrowSeries]:
4446
return ArrowSeries
4547

46-
# --- not in spec ---
47-
def __init__(self, *, backend_version: tuple[int, ...], version: Version) -> None:
48-
self._backend_version = backend_version
49-
self._implementation = Implementation.PYARROW
48+
def __init__(self, *, version: Version) -> None:
5049
self._version = version
5150

5251
def len(self) -> ArrowExpr:
@@ -59,7 +58,6 @@ def len(self) -> ArrowExpr:
5958
function_name="len",
6059
evaluate_output_names=lambda _df: ["len"],
6160
alias_output_names=None,
62-
backend_version=self._backend_version,
6361
version=self._version,
6462
)
6563

@@ -78,7 +76,6 @@ def _lit_arrow_series(_: ArrowDataFrame) -> ArrowSeries:
7876
function_name="lit",
7977
evaluate_output_names=lambda _df: ["literal"],
8078
alias_output_names=None,
81-
backend_version=self._backend_version,
8279
version=self._version,
8380
)
8481

@@ -168,12 +165,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
168165
pc.min_element_wise, [s.native for s in series], init_series.native
169166
)
170167
return [
171-
ArrowSeries(
172-
native_series,
173-
name=init_series.name,
174-
backend_version=self._backend_version,
175-
version=self._version,
176-
)
168+
ArrowSeries(native_series, name=init_series.name, version=self._version)
177169
]
178170

179171
return self._expr._from_callable(
@@ -194,12 +186,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
194186
pc.max_element_wise, [s.native for s in series], init_series.native
195187
)
196188
return [
197-
ArrowSeries(
198-
native_series,
199-
name=init_series.name,
200-
backend_version=self._backend_version,
201-
version=self._version,
202-
)
189+
ArrowSeries(native_series, name=init_series.name, version=self._version)
203190
]
204191

205192
return self._expr._from_callable(
@@ -262,7 +249,6 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
262249
compliant = self._series(
263250
concat_str(*it, separator_scalar, null_handling=null_handling),
264251
name=name,
265-
backend_version=self._backend_version,
266252
version=self._version,
267253
)
268254
return [compliant]

0 commit comments

Comments
 (0)