Some typing updates (#208)

dcherian · pre-commit-ci[bot] · web-flow · commit ed51c199d76a · 2023-01-26T04:22:07.000Z
* Some typing updates * Little more typing * Introduce TypedDict for Aggregation.dtype * Cleanup * Upgrade types. * Try with typing_extensions * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Revert "Try with typing_extensions" This reverts commit 21983a5. * Guard with TYPE_CHECKING Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -46,3 +46,9 @@ repos:
       hooks:
         - id: nbstripout
           args: [--extra-keys=metadata.kernelspec metadata.language_info.version]
+    - repo: https://github.com/asottile/pyupgrade
+      rev: v3.3.1
+      hooks:
+        - id: pyupgrade
+          args:
+            - "--py38-plus"
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 #
 # complexity documentation build configuration file, created by
 # sphinx-quickstart on Tue Jul  9 22:26:36 2013.
diff --git a/flox/aggregations.py b/flox/aggregations.py
@@ -3,12 +3,17 @@
 import copy
 import warnings
 from functools import partial
+from typing import TYPE_CHECKING, Any, Callable, TypedDict
 
 import numpy as np
 import numpy_groupies as npg
+from numpy.typing import DTypeLike
 
 from . import aggregate_flox, aggregate_npg, xrdtypes as dtypes, xrutils
 
+if TYPE_CHECKING:
+    FuncTuple = tuple[Callable | str, ...]
+
 
 def _is_arg_reduction(func: str | Aggregation) -> bool:
     if isinstance(func, str) and func in ["argmin", "argmax", "nanargmax", "nanargmin"]:
@@ -18,6 +23,17 @@ def _is_arg_reduction(func: str | Aggregation) -> bool:
     return False
 
 
+class AggDtypeInit(TypedDict):
+    final: DTypeLike | None
+    intermediate: tuple[DTypeLike, ...]
+
+
+class AggDtype(TypedDict):
+    final: np.dtype
+    numpy: tuple[np.dtype | type[np.intp], ...]
+    intermediate: tuple[np.dtype | type[np.intp], ...]
+
+
 def generic_aggregate(
     group_idx,
     array,
@@ -57,7 +73,7 @@ def generic_aggregate(
     return result
 
 
-def _normalize_dtype(dtype, array_dtype, fill_value=None):
+def _normalize_dtype(dtype: DTypeLike, array_dtype: np.dtype, fill_value=None) -> np.dtype:
     if dtype is None:
         dtype = array_dtype
     if dtype is np.floating:
@@ -103,16 +119,16 @@ def __init__(
         self,
         name,
         *,
-        numpy=None,
-        chunk,
-        combine,
-        preprocess=None,
-        aggregate=None,
-        finalize=None,
+        numpy: str | FuncTuple | None = None,
+        chunk: str | FuncTuple | None,
+        combine: str | FuncTuple | None,
+        preprocess: Callable | None = None,
+        aggregate: Callable | None = None,
+        finalize: Callable | None = None,
         fill_value=None,
         final_fill_value=dtypes.NA,
         dtypes=None,
-        final_dtype=None,
+        final_dtype: DTypeLike | None = None,
         reduction_type="reduce",
     ):
         """
@@ -162,15 +178,15 @@ def __init__(
         self.preprocess = preprocess
         # Use "chunk_reduce" or "chunk_argreduce"
         self.reduction_type = reduction_type
-        self.numpy = (numpy,) if numpy else (self.name,)
+        self.numpy: FuncTuple = (numpy,) if numpy else (self.name,)
         # initialize blockwise reduction
-        self.chunk = _atleast_1d(chunk)
+        self.chunk: FuncTuple = _atleast_1d(chunk)
         # how to aggregate results after first round of reduction
-        self.combine = _atleast_1d(combine)
+        self.combine: FuncTuple = _atleast_1d(combine)
         # final aggregation
-        self.aggregate = aggregate if aggregate else self.combine[0]
+        self.aggregate: Callable | str = aggregate if aggregate else self.combine[0]
         # finalize results (see mean)
-        self.finalize = finalize if finalize else lambda x: x
+        self.finalize: Callable | None = finalize
 
         self.fill_value = {}
         # This is used for the final reindexing
@@ -180,13 +196,15 @@ def __init__(
         # They should make sense when aggregated together with results from other blocks
         self.fill_value["intermediate"] = self._normalize_dtype_fill_value(fill_value, "fill_value")
 
-        self.dtype = {}
-        self.dtype[name] = final_dtype
-        self.dtype["intermediate"] = self._normalize_dtype_fill_value(dtypes, "dtype")
+        self.dtype_init: AggDtypeInit = {
+            "final": final_dtype,
+            "intermediate": self._normalize_dtype_fill_value(dtypes, "dtype"),
+        }
+        self.dtype: AggDtype = None  # type: ignore
 
         # The following are set by _initialize_aggregation
-        self.finalize_kwargs = {}
-        self.min_count = None
+        self.finalize_kwargs: dict[Any, Any] = {}
+        self.min_count: int | None = None
 
     def _normalize_dtype_fill_value(self, value, name):
         value = _atleast_1d(value)
@@ -211,15 +229,15 @@ def __dask_tokenize__(self):
             self.dtype,
         )
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return "\n".join(
             (
-                f"{self.name}, fill: {np.unique(self.fill_value.values())}, dtype: {self.dtype}",
-                f"chunk: {self.chunk}",
-                f"combine: {self.combine}",
-                f"aggregate: {self.aggregate}",
-                f"finalize: {self.finalize}",
-                f"min_count: {self.min_count}",
+                f"{self.name!r}, fill: {self.fill_value.values()!r}, dtype: {self.dtype}",
+                f"chunk: {self.chunk!r}",
+                f"combine: {self.combine!r}",
+                f"aggregate: {self.aggregate!r}",
+                f"finalize: {self.finalize!r}",
+                f"min_count: {self.min_count!r}",
             )
         )
 
@@ -484,7 +502,7 @@ def _initialize_aggregation(
     array_dtype,
     fill_value,
     min_count: int | None,
-    finalize_kwargs,
+    finalize_kwargs: dict[Any, Any] | None,
 ) -> Aggregation:
     if not isinstance(func, Aggregation):
         try:
@@ -502,24 +520,30 @@ def _initialize_aggregation(
 
     # np.dtype(None) == np.dtype("float64")!!!
     # so check for not None
-    if dtype is not None and not isinstance(dtype, np.dtype):
-        dtype = np.dtype(dtype)
+    dtype_: np.dtype | None = (
+        np.dtype(dtype) if dtype is not None and not isinstance(dtype, np.dtype) else dtype
+    )
 
-    agg.dtype[func] = _normalize_dtype(dtype or agg.dtype[func], array_dtype, fill_value)
-    agg.dtype["numpy"] = (agg.dtype[func],)
-    agg.dtype["intermediate"] = [
-        _normalize_dtype(int_dtype, np.result_type(array_dtype, agg.dtype[func]), int_fv)
-        if int_dtype is None
-        else int_dtype
-        for int_dtype, int_fv in zip(agg.dtype["intermediate"], agg.fill_value["intermediate"])
-    ]
+    final_dtype = _normalize_dtype(dtype_ or agg.dtype_init["final"], array_dtype, fill_value)
+    agg.dtype = {
+        "final": final_dtype,
+        "numpy": (final_dtype,),
+        "intermediate": tuple(
+            _normalize_dtype(int_dtype, np.result_type(array_dtype, final_dtype), int_fv)
+            if int_dtype is None
+            else np.dtype(int_dtype)
+            for int_dtype, int_fv in zip(
+                agg.dtype_init["intermediate"], agg.fill_value["intermediate"]
+            )
+        ),
+    }
 
     # Replace sentinel fill values according to dtype
     agg.fill_value["intermediate"] = tuple(
         _get_fill_value(dt, fv)
         for dt, fv in zip(agg.dtype["intermediate"], agg.fill_value["intermediate"])
     )
-    agg.fill_value[func] = _get_fill_value(agg.dtype[func], agg.fill_value[func])
+    agg.fill_value[func] = _get_fill_value(agg.dtype["final"], agg.fill_value[func])
 
     fv = fill_value if fill_value is not None else agg.fill_value[agg.name]
     if _is_arg_reduction(agg):
diff --git a/flox/core.py b/flox/core.py
@@ -807,7 +807,7 @@ def _finalize_results(
     else:
         finalized["groups"] = squeezed["groups"]
 
-    finalized[agg.name] = finalized[agg.name].astype(agg.dtype[agg.name], copy=False)
+    finalized[agg.name] = finalized[agg.name].astype(agg.dtype["final"], copy=False)
     return finalized
 
 
@@ -884,6 +884,7 @@ def _simple_combine(
         assert array.ndim >= 2
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore", r"All-NaN (slice|axis) encountered")
+            assert isinstance(combine, str)
             result = getattr(np, combine)(array, axis=axis_, keepdims=True)
         if is_aggregate:
             # squeeze out DUMMY_AXIS if this is the last step i.e. called from _aggregate
@@ -1015,7 +1016,7 @@ def _grouped_combine(
             if array.shape[-1] == 0:
                 # all empty when combined
                 results["intermediates"].append(
-                    np.empty(shape=(1,) * (len(axis) - 1) + (0,), dtype=agg.dtype)
+                    np.empty(shape=(1,) * (len(axis) - 1) + (0,), dtype=dtype)
                 )
                 results["groups"] = np.empty(
                     shape=(1,) * (len(neg_axis) - 1) + (0,), dtype=groups.dtype
@@ -1059,10 +1060,11 @@ def _reduce_blockwise(
     agg.finalize = None
 
     assert agg.finalize_kwargs is not None
-    finalize_kwargs = agg.finalize_kwargs
-    if isinstance(finalize_kwargs, Mapping):
-        finalize_kwargs = (finalize_kwargs,)
-    finalize_kwargs = finalize_kwargs + ({},) + ({},)
+    if isinstance(agg.finalize_kwargs, Mapping):
+        finalize_kwargs_: tuple[dict[Any, Any], ...] = (agg.finalize_kwargs,)
+    else:
+        finalize_kwargs_ = agg.finalize_kwargs
+    finalize_kwargs_ += ({},) + ({},)
 
     results = chunk_reduce(
         array,
@@ -1075,7 +1077,7 @@ def _reduce_blockwise(
         # (see below)
         fill_value=agg.fill_value["numpy"],
         dtype=agg.dtype["numpy"],
-        kwargs=finalize_kwargs,
+        kwargs=finalize_kwargs_,
         engine=engine,
         sort=sort,
         reindex=reindex,
@@ -1102,7 +1104,7 @@ def _normalize_indexes(array: DaskArray, flatblocks, blkshape) -> tuple:
     """
     unraveled = np.unravel_index(flatblocks, blkshape)
 
-    normalized: list[Union[int, slice, list[int]]] = []
+    normalized: list[int | slice | list[int]] = []
     for ax, idx in enumerate(unraveled):
         i = _unique(idx).squeeze()
         if i.ndim == 0:
@@ -1303,7 +1305,7 @@ def dask_groupby_agg(
         name=f"{name}-chunk-{token}",
     )
 
-    group_chunks: tuple[tuple[Union[int, float], ...]]
+    group_chunks: tuple[tuple[int | float, ...]]
 
     if method in ["map-reduce", "cohorts"]:
         combine: Callable[..., IntermediateDict]
@@ -1402,7 +1404,7 @@ def dask_groupby_agg(
         reduced,
         inds,
         adjust_chunks=dict(zip(out_inds, output_chunks)),
-        dtype=agg.dtype[agg.name],
+        dtype=agg.dtype["final"],
         key=agg.name,
         name=f"{name}-{token}",
         concatenate=False,
@@ -1600,7 +1602,7 @@ def groupby_reduce(
     method: T_Method = "map-reduce",
     engine: T_Engine = "numpy",
     reindex: bool | None = None,
-    finalize_kwargs: Mapping | None = None,
+    finalize_kwargs: dict[Any, Any] | None = None,
 ) -> tuple[DaskArray, np.ndarray | DaskArray]:
     """
     GroupBy reductions using tree reductions for dask.array
diff --git a/flox/xarray.py b/flox/xarray.py
@@ -223,7 +223,7 @@ def xarray_reduce(
         raise NotImplementedError("sort must be True for xarray_reduce")
 
     # eventually drop the variables we are grouping by
-    maybe_drop = set(b for b in by if isinstance(b, Hashable))
+    maybe_drop = {b for b in by if isinstance(b, Hashable)}
     unindexed_dims = tuple(
         b
         for b, isbin_ in zip(by, isbins)

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-# -- coding: utf-8 --`
`2`	`1`	`#`
`3`	`2`	`# complexity documentation build configuration file, created by`
`4`	`3`	`# sphinx-quickstart on Tue Jul 9 22:26:36 2013.`
Original file line number	Diff line number	Diff line change
`@@ -223,7 +223,7 @@ def xarray_reduce(`
`223`	`223`	`raise NotImplementedError("sort must be True for xarray_reduce")`
`224`	`224`
`225`	`225`	`# eventually drop the variables we are grouping by`
`226`		`- maybe_drop = set(b for b in by if isinstance(b, Hashable))`
	`226`	`+ maybe_drop = {b for b in by if isinstance(b, Hashable)}`
`227`	`227`	`unindexed_dims = tuple(`
`228`	`228`	`b`
`229`	`229`	`for b, isbin_ in zip(by, isbins)`