fix some type annotations

alexfikl · alexfikl · commit 683460d1be64 · 2025-02-24T15:54:43.000+02:00
diff --git a/sumpy/__init__.py b/sumpy/__init__.py
@@ -24,7 +24,9 @@
 """
 
 import os
+from collections.abc import Hashable
 
+import loopy as lp
 from pytools.persistent_dict import WriteOncePersistentDict
 
 from sumpy.e2e import (
@@ -59,7 +61,7 @@
 ]
 
 
-code_cache = (
+code_cache: WriteOncePersistentDict[Hashable, lp.TranslationUnit] = (
     WriteOncePersistentDict(f"sumpy-code-cache-v7-{VERSION_TEXT}", safe_sync=False))
 
 
diff --git a/sumpy/fmm.py b/sumpy/fmm.py
@@ -29,6 +29,8 @@
 .. autoclass:: SumpyExpansionWrangler
 """
 
+from typing import TYPE_CHECKING
+
 import numpy as np
 
 from arraycontext import Array
@@ -58,6 +60,10 @@
 )
 
 
+if TYPE_CHECKING:
+    import pyopencl
+
+
 # {{{ tree-independent data for wrangler
 
 class SumpyTreeIndependentDataForWrangler(TreeIndependentDataForWrangler):
@@ -731,7 +737,7 @@ def multipole_to_local(self,
             local_exps_view_func = self.local_expansions_view
 
         for lev in range(self.tree.nlevels):
-            wait_for = []
+            wait_for: list[pyopencl.Event] = []
 
             start, stop = level_start_target_box_nrs[lev:lev+2]
             if start == stop:
diff --git a/sumpy/p2p.py b/sumpy/p2p.py
@@ -183,7 +183,10 @@ def get_default_src_tgt_arguments(self):
                     if self.exclude_self else [])
                 + gather_loopy_source_arguments(self.source_kernels))
 
-    def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array):
+    def get_optimized_kernel(self, *,
+                             targets_is_obj_array: bool = False,
+                             sources_is_obj_array: bool = False,
+                             **kwargs: Any) -> lp.TranslationUnit:
         # FIXME
         knl = self.get_kernel()
 
@@ -194,10 +197,8 @@ def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array):
 
         knl = lp.split_iname(knl, "itgt", 1024, outer_tag="g.0")
         knl = self._allow_redundant_execution_of_knl_scaling(knl)
-        knl = lp.set_options(knl,
-                enforce_variable_access_ordered="no_check")
+        knl = lp.set_options(knl, enforce_variable_access_ordered="no_check")
 
-        knl = register_optimization_preambles(knl, self.device)
         return knl
 
 
@@ -475,9 +476,11 @@ class P2PFromCSR(P2PBase):
     def default_name(self):
         return "p2p_from_csr"
 
-    def get_kernel(self,
-            max_nsources_in_one_box: int, max_ntargets_in_one_box: int, *,
-            work_items_per_group: int = 32, is_gpu: bool = False):
+    def get_kernel(self, *,
+            max_nsources_in_one_box: int = 32,
+            max_ntargets_in_one_box: int = 32,
+            work_items_per_group: int = 32,
+            is_gpu: bool = False, **kwargs: Any) -> lp.TranslationUnit:
         loopy_insns, _result_names = self.get_loopy_insns_and_result_names()
         arguments = [
                 *self.get_default_src_tgt_arguments(),
@@ -674,8 +677,10 @@ def get_kernel(self,
                 "noutputs": len(self.target_kernels)},
             )
 
-        loopy_knl = lp.add_dtypes(
-                loopy_knl, {"nsources": np.int32, "ntargets": np.int32})
+        loopy_knl = lp.add_dtypes(loopy_knl, {
+            "nsources": np.dtype(np.int32),
+            "ntargets": np.dtype(np.int32),
+            })
 
         loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
         loopy_knl = lp.tag_inames(loopy_knl, "istrength*:unr")
@@ -687,19 +692,24 @@ def get_kernel(self,
 
         return loopy_knl
 
-    def get_optimized_kernel(self,
-            max_nsources_in_one_box: int,
-            max_ntargets_in_one_box: int,
-            strength_dtype: np.dtype[Any],
-            source_dtype: np.dtype[Any],
-            local_mem_size: int,
-            is_gpu: bool):
+    def get_optimized_kernel(self, *,
+            max_nsources_in_one_box: int = 32,
+            max_ntargets_in_one_box: int = 32,
+            strength_dtype: np.dtype[Any] | None = None,
+            source_dtype: np.dtype[Any] | None = None,
+            local_mem_size: int = 32,
+            is_gpu: bool = False, **kwargs) -> lp.TranslationUnit:
         if not is_gpu:
-            knl = self.get_kernel(max_nsources_in_one_box,
-                    max_ntargets_in_one_box, is_gpu=is_gpu)
+            knl = self.get_kernel(
+                    max_nsources_in_one_box=max_nsources_in_one_box,
+                    max_ntargets_in_one_box=max_ntargets_in_one_box,
+                    is_gpu=is_gpu)
             knl = lp.split_iname(knl, "itgt_box", 4, outer_tag="g.0")
             knl = self._allow_redundant_execution_of_knl_scaling(knl)
         else:
+            assert strength_dtype is not None
+            assert source_dtype is not None
+
             dtype_size = np.dtype(strength_dtype).alignment
             work_items_per_group = min(256, max_ntargets_in_one_box)
             total_local_mem = max_nsources_in_one_box * \
@@ -708,8 +718,9 @@ def get_optimized_kernel(self,
             # can be scheduled at the same time for latency hiding
             nprefetch = (2 * total_local_mem - 1) // local_mem_size + 1
 
-            knl = self.get_kernel(max_nsources_in_one_box,
-                    max_ntargets_in_one_box,
+            knl = self.get_kernel(
+                    max_nsources_in_one_box=max_nsources_in_one_box,
+                    max_ntargets_in_one_box=max_ntargets_in_one_box,
                     work_items_per_group=work_items_per_group,
                     is_gpu=is_gpu)
             knl = lp.tag_inames(knl, {"itgt_box": "g.0", "inner": "l.0"})
@@ -771,12 +782,8 @@ def get_optimized_kernel(self,
             knl = lp.add_inames_to_insn(knl,
                     "inner", "id:init_* or id:*_scaling or id:src_box_insn_*")
             knl = lp.add_inames_to_insn(knl, "itgt_box", "id:*_scaling")
-            # knl = lp.set_options(knl, write_code=True)
-
-        knl = lp.set_options(knl,
-                enforce_variable_access_ordered="no_check")
 
-        knl = register_optimization_preambles(knl, self.device)
+        knl = lp.set_options(knl, enforce_variable_access_ordered="no_check")
         return knl
 
     def __call__(self, actx: PyOpenCLArrayContext, **kwargs):
@@ -786,8 +793,8 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs):
 
         is_gpu = not is_cl_cpu(actx)
         if is_gpu:
-            source_dtype = kwargs.get("sources")[0].dtype
-            strength_dtype = kwargs.get("strength").dtype
+            source_dtype = kwargs["sources"][0].dtype
+            strength_dtype = kwargs["strength"].dtype
         else:
             # these are unused for not GPU and defeats the caching
             # set them to None to keep the caching across dtypes
diff --git a/sumpy/qbx.py b/sumpy/qbx.py
@@ -27,6 +27,7 @@
 """
 
 import logging
+from typing import Any
 
 import numpy as np
 
@@ -195,14 +196,14 @@ def get_kernel(self):
         raise NotImplementedError
 
     def get_optimized_kernel(self, *,
-            is_cpu: bool,
-            targets_is_obj_array: bool,
-            sources_is_obj_array: bool,
-            centers_is_obj_array: bool,
+            is_cpu: bool = True,
+            targets_is_obj_array: bool = False,
+            sources_is_obj_array: bool = False,
+            centers_is_obj_array: bool = False,
             # Used by pytential to override the name of the loop to be
             # parallelized. In the case of QBX, that's the loop over QBX
             # targets (not global targets).
-            itgt_name: str = "itgt"):
+            itgt_name: str = "itgt", **kwargs: Any) -> lp.TranslationUnit:
         # FIXME specialize/tune for GPU/CPU
         loopy_knl = self.get_kernel()
 
diff --git a/sumpy/tools.py b/sumpy/tools.py
@@ -429,11 +429,11 @@ def get_cache_key(self) -> tuple[Hashable, ...]:
         ...
 
     @abstractmethod
-    def get_kernel(self, **kwargs: Any) -> lp.TranslationUnit:
+    def get_kernel(self, **kwargs) -> lp.TranslationUnit:
         ...
 
     @abstractmethod
-    def get_optimized_kernel(self, **kwargs: Any) -> lp.TranslationUnit:
+    def get_optimized_kernel(self, **kwargs) -> lp.TranslationUnit:
         ...
 
     @memoize_method
diff --git a/sumpy/toys.py b/sumpy/toys.py
@@ -509,8 +509,7 @@ def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray:
     def __neg__(self) -> PotentialSource:
         return -1*self
 
-    def __add__(self, other: Number_ish | PotentialSource
-                ) -> PotentialSource:
+    def __add__(self, other: Number_ish | PotentialSource) -> PotentialSource:
         if isinstance(other, Number | np.number):
             other = ConstantPotential(self.toy_ctx, other)
         elif not isinstance(other, PotentialSource):
@@ -520,18 +519,14 @@ def __add__(self, other: Number_ish | PotentialSource
 
     __radd__ = __add__
 
-    def __sub__(self,
-                other: Number_ish | PotentialSource) -> PotentialSource:
+    def __sub__(self, other: Number_ish | PotentialSource) -> PotentialSource:
         return self.__add__(-other)
 
-    def __rsub__(
-            self,
-            other: Number | np.number | PotentialSource
-            ) -> PotentialSource:
+    def __rsub__(self,  # type: ignore[misc]
+                 other: Number_ish | PotentialSource) -> PotentialSource:
         return (-self).__add__(other)
 
-    def __mul__(self,
-                other: Number_ish | PotentialSource) -> PotentialSource:
+    def __mul__(self, other: Number_ish | PotentialSource) -> PotentialSource:
         if isinstance(other, Number | np.number):
             other = ConstantPotential(self.toy_ctx, other)
         elif not isinstance(other, PotentialSource):
@@ -722,9 +717,9 @@ class Sum(PotentialExpressionNode):
     """
 
     def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray:
-        result = 0
+        result = np.zeros(targets.shape[1])
         for psource in self.psources:
-            result = result + psource.eval(actx, targets)
+            result += psource.eval(actx, targets)
 
         return result
 
@@ -735,9 +730,9 @@ class Product(PotentialExpressionNode):
     """
 
     def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray:
-        result = 1
+        result = np.ones(targets.shape[1])
         for psource in self.psources:
-            result = result * psource.eval(actx, targets)
+            result *= psource.eval(actx, targets)
 
         return result