From 020dad295941ceb80e5f53f53da7e71c97548149 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 21 May 2025 11:54:28 +0100 Subject: [PATCH 1/3] chore: enable pyright reportPossiblyUnboundVariable --- narwhals/_duckdb/dataframe.py | 6 ++---- narwhals/_duckdb/expr.py | 24 ++++++++++++++++++++---- narwhals/_pandas_like/expr.py | 3 ++- pyproject.toml | 2 ++ 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 8ee43ecbb4..2ca84e3f3a 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -1,6 +1,5 @@ from __future__ import annotations -import contextlib from functools import reduce from operator import and_ from typing import TYPE_CHECKING @@ -52,9 +51,6 @@ from narwhals.typing import LazyUniqueKeepStrategy from narwhals.utils import _FullContext -with contextlib.suppress(ImportError): # requires duckdb>=1.3.0 - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] - class DuckDBLazyFrame( CompliantLazyFrame[ @@ -387,6 +383,8 @@ def unique( "with `subset` specified." ) raise NotImplementedError(msg) + from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + # Sanitise input if any(x not in self.columns for x in subset_): msg = f"Columns {set(subset_).difference(self.columns)} not found in {self.columns}." diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 340b7d4d3e..8cf9947691 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -1,6 +1,5 @@ from __future__ import annotations -import contextlib import operator from typing import TYPE_CHECKING from typing import Any @@ -55,9 +54,6 @@ from narwhals.utils import Version from narwhals.utils import _FullContext -with contextlib.suppress(ImportError): # requires duckdb>=1.3.0 - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] - class DuckDBExpr(LazyExpr["DuckDBLazyFrame", "Expression"]): _implementation = Implementation.DUCKDB @@ -104,6 +100,8 @@ def _cum_window_func( reverse: bool, func_name: Literal["sum", "max", "min", "count", "product"], ) -> WindowFunction: + from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + def func(window_inputs: WindowInputs) -> Expression: order_by_sql = generate_order_by_sql( *window_inputs.order_by, ascending=not reverse @@ -126,6 +124,8 @@ def _rolling_window_func( min_samples: int, ddof: int | None = None, ) -> WindowFunction: + from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + ensure_type(window_size, int, type(None)) ensure_type(min_samples, int) supported_funcs = ["sum", "mean", "std", "var"] @@ -171,6 +171,7 @@ def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Se if self._backend_version < (1, 3): msg = "At least version 1.3 of DuckDB is required for binary operations between aggregates and columns." raise NotImplementedError(msg) + from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] template = "{expr} over ()" @@ -501,6 +502,8 @@ def null_count(self) -> Self: @requires.backend_version((1, 3)) def over(self, partition_by: Sequence[str], order_by: Sequence[str] | None) -> Self: + from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + if (window_function := self._window_function) is not None: assert order_by is not None # noqa: S101 @@ -560,6 +563,8 @@ def round(self, decimals: int) -> Self: @requires.backend_version((1, 3)) def shift(self, n: int) -> Self: + from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + ensure_type(n, int) def func(window_inputs: WindowInputs) -> Expression: @@ -574,6 +579,8 @@ def func(window_inputs: WindowInputs) -> Expression: @requires.backend_version((1, 3)) def is_first_distinct(self) -> Self: + from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + def func(window_inputs: WindowInputs) -> Expression: order_by_sql = generate_order_by_sql(*window_inputs.order_by, ascending=True) if window_inputs.partition_by: @@ -590,6 +597,8 @@ def func(window_inputs: WindowInputs) -> Expression: @requires.backend_version((1, 3)) def is_last_distinct(self) -> Self: + from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + def func(window_inputs: WindowInputs) -> Expression: order_by_sql = generate_order_by_sql(*window_inputs.order_by, ascending=False) if window_inputs.partition_by: @@ -606,6 +615,8 @@ def func(window_inputs: WindowInputs) -> Expression: @requires.backend_version((1, 3)) def diff(self) -> Self: + from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + def func(window_inputs: WindowInputs) -> Expression: order_by_sql = generate_order_by_sql(*window_inputs.order_by, ascending=True) partition_by_sql = generate_partition_by_sql(*window_inputs.partition_by) @@ -704,6 +715,7 @@ def fill_null( if self._backend_version < (1, 3): # pragma: no cover msg = f"`fill_null` with `strategy={strategy}` is only available in 'duckdb>=1.3.0'." raise NotImplementedError(msg) + from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] def _fill_with_strategy(window_inputs: WindowInputs) -> Expression: order_by_sql = generate_order_by_sql( @@ -740,6 +752,8 @@ def func(expr: Expression) -> Expression: @requires.backend_version((1, 3)) def is_unique(self) -> Self: + from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + def func(expr: Expression) -> Expression: sql = f"count(*) over (partition by {expr})" return SQLExpression(sql) == lit(1) # type: ignore[no-any-return, unused-ignore] @@ -748,6 +762,8 @@ def func(expr: Expression) -> Expression: @requires.backend_version((1, 3)) def rank(self, method: RankMethod, *, descending: bool) -> Self: + from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + if method in {"min", "max", "average"}: func = FunctionExpression("rank") elif method == "dense": diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 42cb4a4563..59c05044c1 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -270,7 +270,6 @@ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: # noqa: C901, .with_row_index(token) .sort(*order_by, descending=reverse, nulls_last=reverse) ) - sorting_indices = df.get_column(token) elif reverse: columns = list(set(partition_by).union(output_names)) df = df.simple_select(*columns)._gather_slice(slice(None, None, -1)) @@ -312,6 +311,8 @@ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: # noqa: C901, ) results = [result_frame.get_column(name) for name in aliases] if order_by: + # `token` was set in `if order_by` block above + sorting_indices = df.get_column(token) # pyright: ignore[reportPossiblyUnboundVariable] for s in results: s._scatter_in_place(sorting_indices, s) return results diff --git a/pyproject.toml b/pyproject.toml index 04f6d3cdcc..db9dcaf91d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -324,8 +324,10 @@ warn_return_any = false pythonPlatform = "All" # NOTE (stubs do unsafe `TypeAlias` and `TypeVar` imports) # pythonVersion = "3.8" +reportFunctionMemberAccess = "error" reportMissingImports = "none" reportMissingModuleSource = "none" +reportPossiblyUnboundVariable = "error" reportPrivateImportUsage = "none" reportUnusedExpression = "none" # handled by (https://docs.astral.sh/ruff/rules/unused-variable/) typeCheckingMode = "basic" From 96d6867997543a8e0d2fda218ff588df4a393aea Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 22 May 2025 12:08:10 +0100 Subject: [PATCH 2/3] update --- narwhals/_duckdb/dataframe.py | 2 +- narwhals/_duckdb/expr.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index caec9f8532..6270fb339a 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -378,7 +378,7 @@ def unique( "with `subset` specified." ) raise NotImplementedError(msg) - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + from duckdb import SQLExpression # Sanitise input if any(x not in self.columns for x in subset_): diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 139e11fbf5..d0ef49bbd0 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -91,7 +91,7 @@ def _cum_window_func( reverse: bool, func_name: Literal["sum", "max", "min", "count", "product"], ) -> WindowFunction: - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + from duckdb import SQLExpression def func(window_inputs: WindowInputs) -> Expression: order_by_sql = generate_order_by_sql( @@ -115,7 +115,7 @@ def _rolling_window_func( min_samples: int, ddof: int | None = None, ) -> WindowFunction: - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + from duckdb import SQLExpression ensure_type(window_size, int, type(None)) ensure_type(min_samples, int) @@ -162,7 +162,7 @@ def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Se if self._backend_version < (1, 3): msg = "At least version 1.3 of DuckDB is required for binary operations between aggregates and columns." raise NotImplementedError(msg) - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + from duckdb import SQLExpression template = "{expr} over ()" @@ -493,7 +493,7 @@ def null_count(self) -> Self: @requires.backend_version((1, 3)) def over(self, partition_by: Sequence[str], order_by: Sequence[str] | None) -> Self: - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + from duckdb import SQLExpression if (window_function := self._window_function) is not None: assert order_by is not None # noqa: S101 @@ -554,7 +554,7 @@ def round(self, decimals: int) -> Self: @requires.backend_version((1, 3)) def shift(self, n: int) -> Self: - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + from duckdb import SQLExpression ensure_type(n, int) @@ -570,7 +570,7 @@ def func(window_inputs: WindowInputs) -> Expression: @requires.backend_version((1, 3)) def is_first_distinct(self) -> Self: - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + from duckdb import SQLExpression def func(window_inputs: WindowInputs) -> Expression: order_by_sql = generate_order_by_sql(*window_inputs.order_by, ascending=True) @@ -588,7 +588,7 @@ def func(window_inputs: WindowInputs) -> Expression: @requires.backend_version((1, 3)) def is_last_distinct(self) -> Self: - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + from duckdb import SQLExpression def func(window_inputs: WindowInputs) -> Expression: order_by_sql = generate_order_by_sql(*window_inputs.order_by, ascending=False) @@ -606,7 +606,7 @@ def func(window_inputs: WindowInputs) -> Expression: @requires.backend_version((1, 3)) def diff(self) -> Self: - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + from duckdb import SQLExpression def func(window_inputs: WindowInputs) -> Expression: order_by_sql = generate_order_by_sql(*window_inputs.order_by, ascending=True) @@ -706,7 +706,7 @@ def fill_null( if self._backend_version < (1, 3): # pragma: no cover msg = f"`fill_null` with `strategy={strategy}` is only available in 'duckdb>=1.3.0'." raise NotImplementedError(msg) - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + from duckdb import SQLExpression def _fill_with_strategy(window_inputs: WindowInputs) -> Expression: order_by_sql = generate_order_by_sql( @@ -743,7 +743,7 @@ def func(expr: Expression) -> Expression: @requires.backend_version((1, 3)) def is_unique(self) -> Self: - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + from duckdb import SQLExpression def func(expr: Expression) -> Expression: sql = f"count(*) over (partition by {expr})" @@ -753,7 +753,7 @@ def func(expr: Expression) -> Expression: @requires.backend_version((1, 3)) def rank(self, method: RankMethod, *, descending: bool) -> Self: - from duckdb import SQLExpression # type: ignore[attr-defined, unused-ignore] + from duckdb import SQLExpression if method in {"min", "max", "average"}: func = FunctionExpression("rank") From f009997e9289b0ae0051219c652cb3f30c11fe56 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 22 May 2025 12:09:47 +0100 Subject: [PATCH 3/3] update --- narwhals/_pandas_like/expr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 2e154cc66f..6bacb68e5a 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -266,6 +266,7 @@ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: # noqa: C901, .with_row_index(token) .sort(*order_by, descending=reverse, nulls_last=reverse) ) + sorting_indices = df.get_column(token) elif reverse: columns = list(set(partition_by).union(output_names)) df = df.simple_select(*columns)._gather_slice(slice(None, None, -1)) @@ -307,10 +308,9 @@ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: # noqa: C901, ) results = [result_frame.get_column(name) for name in aliases] if order_by: - # `token` was set in `if order_by` block above - sorting_indices = df.get_column(token) # pyright: ignore[reportPossiblyUnboundVariable] for s in results: - s._scatter_in_place(sorting_indices, s) + # `sorting_indices` was initialised in `if order_by` block above. + s._scatter_in_place(sorting_indices, s) # pyright: ignore[reportPossiblyUnboundVariable] return results if reverse: return [s._gather_slice(slice(None, None, -1)) for s in results]