chore(typing): Resolve _dask errors (#2087)

dangotbanned · web-flow · commit 45038b0113a0 · 2025-02-25T15:00:41.000Z
diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py
@@ -43,7 +43,7 @@ def __init__(
         version: Version,
         validate_column_names: bool,
     ) -> None:
-        self._native_frame = native_dataframe
+        self._native_frame: dd.DataFrame = native_dataframe
         self._backend_version = backend_version
         self._implementation = Implementation.DASK
         self._version = version
@@ -138,7 +138,7 @@ def collect(
 
     @property
     def columns(self: Self) -> list[str]:
-        return self._native_frame.columns.tolist()  # type: ignore[no-any-return]
+        return self._native_frame.columns.tolist()
 
     def filter(self: Self, predicate: DaskExpr) -> Self:
         # `[0]` is safe as the predicate's expression only returns a single column
@@ -426,7 +426,7 @@ def gather_every(self: Self, n: int, offset: int) -> Self:
         return (
             self.with_row_index(row_index_token)
             .filter(
-                (plx.col(row_index_token) >= offset)  # type: ignore[operator]
+                (plx.col(row_index_token) >= offset)
                 & ((plx.col(row_index_token) - offset) % n == 0)
             )
             .drop([row_index_token], strict=False)
diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
@@ -35,7 +35,7 @@
     from narwhals.utils import Version
 
 
-class DaskExpr(CompliantExpr["DaskLazyFrame", "dx.Series"]):
+class DaskExpr(CompliantExpr["DaskLazyFrame", "dx.Series"]):  # pyright: ignore[reportInvalidTypeArguments] (#2044)
     _implementation: Implementation = Implementation.DASK
 
     def __init__(
@@ -255,7 +255,7 @@ def __ne__(self: Self, other: DaskExpr) -> Self:  # type: ignore[override]
             lambda _input, other: _input.__ne__(other), "__ne__", other=other
         )
 
-    def __ge__(self: Self, other: DaskExpr) -> Self:
+    def __ge__(self: Self, other: DaskExpr | Any) -> Self:
         return self._from_call(
             lambda _input, other: _input.__ge__(other), "__ge__", other=other
         )
@@ -275,7 +275,7 @@ def __lt__(self: Self, other: DaskExpr) -> Self:
             lambda _input, other: _input.__lt__(other), "__lt__", other=other
         )
 
-    def __and__(self: Self, other: DaskExpr) -> Self:
+    def __and__(self: Self, other: DaskExpr | Any) -> Self:
         return self._from_call(
             lambda _input, other: _input.__and__(other), "__and__", other=other
         )
@@ -454,7 +454,7 @@ def func(_input: dx.Series) -> dx.Series:
                 _input.dtype, self._version, self._implementation
             )
             if dtype.is_numeric():
-                return _input != _input  # noqa: PLR0124
+                return _input != _input  # pyright: ignore[reportReturnType] # noqa: PLR0124
             msg = f"`.is_nan` only supported for numeric dtypes and not {dtype}, did you mean `.is_null`?"
             raise InvalidOperationError(msg)
 
@@ -487,31 +487,23 @@ def is_first_distinct(self: Self) -> Self:
         def func(_input: dx.Series) -> dx.Series:
             _name = _input.name
             col_token = generate_temporary_column_name(n_bytes=8, columns=[_name])
-            _input = add_row_index(
-                _input.to_frame(),
-                col_token,
-                backend_version=self._backend_version,
-                implementation=self._implementation,
+            frame = add_row_index(
+                _input.to_frame(), col_token, self._backend_version, self._implementation
             )
-            first_distinct_index = _input.groupby(_name).agg({col_token: "min"})[
-                col_token
-            ]
-            return _input[col_token].isin(first_distinct_index)
+            first_distinct_index = frame.groupby(_name).agg({col_token: "min"})[col_token]
+            return frame[col_token].isin(first_distinct_index)
 
         return self._from_call(func, "is_first_distinct")
 
     def is_last_distinct(self: Self) -> Self:
         def func(_input: dx.Series) -> dx.Series:
             _name = _input.name
             col_token = generate_temporary_column_name(n_bytes=8, columns=[_name])
-            _input = add_row_index(
-                _input.to_frame(),
-                col_token,
-                backend_version=self._backend_version,
-                implementation=self._implementation,
+            frame = add_row_index(
+                _input.to_frame(), col_token, self._backend_version, self._implementation
             )
-            last_distinct_index = _input.groupby(_name).agg({col_token: "max"})[col_token]
-            return _input[col_token].isin(last_distinct_index)
+            last_distinct_index = frame.groupby(_name).agg({col_token: "max"})[col_token]
+            return frame[col_token].isin(last_distinct_index)
 
         return self._from_call(func, "is_last_distinct")
 
diff --git a/narwhals/_dask/expr_dt.py b/narwhals/_dask/expr_dt.py
@@ -96,9 +96,9 @@ def func(s: dx.Series, time_zone: str) -> dx.Series:
                 s.dtype, self._compliant_expr._version, Implementation.DASK
             )
             if dtype.time_zone is None:  # type: ignore[attr-defined]
-                return s.dt.tz_localize("UTC").dt.tz_convert(time_zone)
+                return s.dt.tz_localize("UTC").dt.tz_convert(time_zone)  # pyright: ignore[reportAttributeAccessIssue]
             else:
-                return s.dt.tz_convert(time_zone)
+                return s.dt.tz_convert(time_zone)  # pyright: ignore[reportAttributeAccessIssue]
 
         return self._compliant_expr._from_call(func, "tz_convert", time_zone=time_zone)
 
@@ -125,7 +125,7 @@ def func(s: dx.Series, time_unit: TimeUnit) -> dx.Series:
             else:
                 msg = "Input should be either of Date or Datetime type"
                 raise TypeError(msg)
-            return result.where(~mask_na)
+            return result.where(~mask_na)  # pyright: ignore[reportReturnType]
 
         return self._compliant_expr._from_call(func, "datetime", time_unit=time_unit)
 
diff --git a/narwhals/_dask/group_by.py b/narwhals/_dask/group_by.py
@@ -26,7 +26,6 @@
 
     from narwhals._dask.dataframe import DaskLazyFrame
     from narwhals._dask.expr import DaskExpr
-    from narwhals.typing import CompliantExpr
 
     PandasSeriesGroupBy: TypeAlias = _PandasSeriesGroupBy[Any, Any]
     _AggFn: TypeAlias = Callable[..., Any]
@@ -73,7 +72,7 @@ class DaskLazyGroupBy:
     def __init__(
         self: Self, df: DaskLazyFrame, keys: list[str], *, drop_null_keys: bool
     ) -> None:
-        self._df = df
+        self._df: DaskLazyFrame = df
         self._keys = keys
         self._grouped = self._df._native_frame.groupby(
             list(self._keys),
@@ -93,11 +92,11 @@ def agg(
             self._from_native_frame,
         )
 
-    def _from_native_frame(self: Self, df: DaskLazyFrame) -> DaskLazyFrame:
+    def _from_native_frame(self: Self, df: dd.DataFrame) -> DaskLazyFrame:
         from narwhals._dask.dataframe import DaskLazyFrame
 
         return DaskLazyFrame(
-            df,  # pyright: ignore[reportArgumentType]
+            df,
             backend_version=self._df._backend_version,
             version=self._df._version,
             validate_column_names=True,
@@ -107,7 +106,7 @@ def _from_native_frame(self: Self, df: DaskLazyFrame) -> DaskLazyFrame:
 def agg_dask(
     df: DaskLazyFrame,
     grouped: Any,
-    exprs: Sequence[CompliantExpr[DaskLazyFrame, dx.Series]],
+    exprs: Sequence[DaskExpr],
     keys: list[str],
     from_dataframe: Callable[[Any], DaskLazyFrame],
 ) -> DaskLazyFrame:
@@ -148,7 +147,7 @@ def agg_dask(
             agg_function = POLARS_TO_DASK_AGGREGATIONS.get(function_name, function_name)
             # deal with n_unique case in a "lazy" mode to not depend on dask globally
             agg_function = (
-                agg_function(**expr._call_kwargs)  # type: ignore[attr-defined]
+                agg_function(**expr._call_kwargs)
                 if callable(agg_function)
                 else agg_function
             )
diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py
@@ -8,7 +8,6 @@
 from typing import Iterable
 from typing import Literal
 from typing import Sequence
-from typing import cast
 
 import dask.dataframe as dd
 import pandas as pd
@@ -24,7 +23,6 @@
 from narwhals._expression_parsing import combine_alias_output_names
 from narwhals._expression_parsing import combine_evaluate_output_names
 from narwhals.typing import CompliantNamespace
-from narwhals.utils import is_compliant_expr
 
 if TYPE_CHECKING:
     from typing_extensions import Self
@@ -38,7 +36,7 @@
         import dask_expr as dx
 
 
-class DaskNamespace(CompliantNamespace[DaskLazyFrame, "dx.Series"]):
+class DaskNamespace(CompliantNamespace[DaskLazyFrame, "dx.Series"]):  # pyright: ignore[reportInvalidTypeArguments] (#2044)
     @property
     def selectors(self: Self) -> DaskSelectorNamespace:
         return DaskSelectorNamespace(self)
@@ -347,17 +345,16 @@ def __init__(
         version: Version,
     ) -> None:
         self._backend_version = backend_version
-        self._condition = condition
-        self._then_value = then_value
-        self._otherwise_value = otherwise_value
+        self._condition: DaskExpr = condition
+        self._then_value: DaskExpr | Any = then_value
+        self._otherwise_value: DaskExpr | Any = otherwise_value
         self._version = version
 
     def __call__(self: Self, df: DaskLazyFrame) -> Sequence[dx.Series]:
         condition = self._condition(df)[0]
-        condition = cast("dx.Series", condition)
 
-        if is_compliant_expr(self._then_value):
-            then_value: dx.Series | object = self._then_value(df)[0]
+        if isinstance(self._then_value, DaskExpr):
+            then_value = self._then_value(df)[0]
         else:
             then_value = self._then_value
         (then_series,) = align_series_full_broadcast(df, then_value)
@@ -366,13 +363,13 @@ def __call__(self: Self, df: DaskLazyFrame) -> Sequence[dx.Series]:
         if self._otherwise_value is None:
             return [then_series.where(condition)]
 
-        if is_compliant_expr(self._otherwise_value):
-            otherwise_value: dx.Series | object = self._otherwise_value(df)[0]
+        if isinstance(self._otherwise_value, DaskExpr):
+            otherwise_value = self._otherwise_value(df)[0]
         else:
             otherwise_value = self._otherwise_value
         (otherwise_series,) = align_series_full_broadcast(df, otherwise_value)
         validate_comparand(condition, otherwise_series)
-        return [then_series.where(condition, otherwise_series)]
+        return [then_series.where(condition, otherwise_series)]  # pyright: ignore[reportArgumentType]
 
     def then(self: Self, value: DaskExpr | Any) -> DaskThen:
         self._then_value = value
@@ -405,17 +402,14 @@ def __init__(
     ) -> None:
         self._backend_version = backend_version
         self._version = version
-        self._call = call
+        self._call: DaskWhen = call
         self._depth = depth
         self._function_name = function_name
         self._evaluate_output_names = evaluate_output_names
         self._alias_output_names = alias_output_names
         self._call_kwargs = call_kwargs or {}
 
     def otherwise(self: Self, value: DaskExpr | Any) -> DaskExpr:
-        # type ignore because we are setting the `_call` attribute to a
-        # callable object of type `DaskWhen`, base class has the attribute as
-        # only a `Callable`
-        self._call._otherwise_value = value  # type: ignore[attr-defined]
+        self._call._otherwise_value = value
         self._function_name = "whenotherwise"
         return self
diff --git a/narwhals/_dask/utils.py b/narwhals/_dask/utils.py
@@ -62,7 +62,7 @@ def align_series_full_broadcast(
     return [
         s if isinstance(s, dx.Series) else df._native_frame.assign(_tmp=s)["_tmp"]
         for s in series
-    ]
+    ]  # pyright: ignore[reportReturnType]
 
 
 def add_row_index(
@@ -155,8 +155,8 @@ def narwhals_to_native_dtype(dtype: DType | type[DType], version: Version) -> An
 
 
 def name_preserving_sum(s1: dx.Series, s2: dx.Series) -> dx.Series:
-    return (s1 + s2).rename(s1.name)
+    return (s1 + s2).rename(s1.name)  # pyright: ignore[reportOperatorIssue]
 
 
 def name_preserving_div(s1: dx.Series, s2: dx.Series) -> dx.Series:
-    return (s1 / s2).rename(s1.name)
+    return (s1 / s2).rename(s1.name)  # pyright: ignore[reportOperatorIssue]
diff --git a/pyproject.toml b/pyproject.toml
@@ -269,6 +269,7 @@ module = [
   "*._pandas_like.*",
   "*._ibis.*",
   "*._arrow.*",
+  "*._dask.*",
 ]
 warn_return_any = false
 

Original file line number	Diff line number	Diff line change
`@@ -269,6 +269,7 @@ module = [`
`269`	`269`	`"._pandas_like.",`
`270`	`270`	`"._ibis.",`
`271`	`271`	`"._arrow.",`
	`272`	`+ "._dask.",`
`272`	`273`	`]`
`273`	`274`	`warn_return_any = false`
`274`	`275`