inducer
diff --git a/‎.basedpyright/baseline.json‎
Lines changed: 5728 additions & 9312 deletions b/‎.basedpyright/baseline.json‎
Lines changed: 5728 additions & 9312 deletions
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.test-conda-env-py3.yml‎
Lines changed: 2 additions & 1 deletion b/‎.test-conda-env-py3.yml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎doc/conf.py‎
Lines changed: 3 additions & 0 deletions b/‎doc/conf.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/curve-pot.py‎
Lines changed: 8 additions & 5 deletions b/‎examples/curve-pot.py‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎examples/expansion-toys.py‎
Lines changed: 7 additions & 8 deletions b/‎examples/expansion-toys.py‎
Lines changed: 7 additions & 8 deletions
diff --git a/‎sumpy/__init__.py‎
Lines changed: 2 additions & 4 deletions b/‎sumpy/__init__.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎sumpy/array_context.py‎
Lines changed: 75 additions & 9 deletions b/‎sumpy/array_context.py‎
Lines changed: 75 additions & 9 deletions
diff --git a/‎sumpy/codegen.py‎
Lines changed: 1 addition & 23 deletions b/‎sumpy/codegen.py‎
Lines changed: 1 addition & 23 deletions
@@ -121,8 +121,8 @@ jobs:
             run: |
                 curl -L -O https://tiker.net/ci-support-v0
                 . ./ci-support-v0
-                if [[ "$DOWNSTREAM_PROJECT" == "pytential" && "$GITHUB_HEAD_REF" == "e2p" ]]; then
-                    DOWNSTREAM_PROJECT=https://github.com/isuruf/pytential.git@e2p
+                if [[ "$DOWNSTREAM_PROJECT" == "pytential" && "$GITHUB_HEAD_REF" == "towards-array-context-merge" ]]; then
+                   DOWNSTREAM_PROJECT=https://github.com/alexfikl/pytential.git@towards-array-context
                 fi
                 test_downstream "$DOWNSTREAM_PROJECT"
 
 
@@ -8,7 +8,8 @@ dependencies:
 - numpy
 - scipy
 - sympy
-- pocl
+# https://github.com/pocl/pocl/issues/2069
+- pocl<7
 - pocl-cuda
 - islpy
 - pyopencl
 
@@ -26,6 +26,7 @@
 
 nitpick_ignore_regex = [
     ["py:class", r"symengine\.(.+)"],  # :cry:
+    ["py:class", r"ToTagSetConvertible"],  # :cry:
 ]
 
 sphinxconfig_missing_reference_aliases = {
@@ -36,6 +37,7 @@
     "np.complexfloating": "class:numpy.complexfloating",
     "np.inexact": "class:numpy.inexact",
     "np.dtype": "class:numpy.dtype",
+    "np.number": "class:numpy.number",
     # pytools
     "obj_array.ObjectArray1D": "obj:pytools.obj_array.ObjectArray1D",
     # sympy
@@ -53,6 +55,7 @@
     "CallInstruction": "class:loopy.kernel.instruction.CallInstruction",
     # arraycontext
     "Array": "obj:arraycontext.Array",
+    "ArrayContext": "class:arraycontext.ArrayContext",
     # boxtree
     "FMMTraversalInfo": "class:boxtree.traversal.FMMTraversalInfo",
     # sumpy
 
@@ -91,7 +91,7 @@ def draw_pot_figure(aspect_ratio,
         knl_kwargs = {}
 
     vol_source_knl, vol_target_knl = process_kernel(knl, what_operator)
-    p2p = P2P(actx.context,
+    p2p = P2P(
             source_kernels=(vol_source_knl,),
             target_kernels=(vol_target_knl,),
             exclude_self=False,
@@ -100,7 +100,7 @@ def draw_pot_figure(aspect_ratio,
     lpot_source_knl, lpot_target_knl = process_kernel(knl, what_operator_lpot)
 
     from sumpy.qbx import LayerPotential
-    lpot = LayerPotential(actx.context,
+    lpot = LayerPotential(
             expansion=expn_class(knl, order=order),
             source_kernels=(lpot_source_knl,),
             target_kernels=(lpot_target_knl,),
@@ -184,7 +184,8 @@ def map_to_curve(t: NDArray[np.floating]):
 
         def apply_lpot(x: NDArray[np.inexact]) -> NDArray[np.inexact]:
             xovsmp = fim @ x
-            _evt, (y,) = lpot(actx.queue,
+            y, = lpot(
+                    actx,
                     sources,
                     ovsmp_sources,
                     actx.from_numpy(centers),
@@ -209,7 +210,8 @@ def apply_lpot(x: NDArray[np.inexact]) -> NDArray[np.inexact]:
     density = np.cos(mode_nr*2*np.pi*native_t).astype(np.complex128)
     strength = actx.from_numpy(native_curve.speed * native_weights * density)
 
-    _evt, (vol_pot,) = p2p(actx.queue,
+    vol_pot, = p2p(
+            actx,
             targets,
             sources,
             [strength], **volpot_kwargs)
@@ -219,7 +221,8 @@ def apply_lpot(x: NDArray[np.inexact]) -> NDArray[np.inexact]:
     ovsmp_strength = actx.from_numpy(
         ovsmp_curve.speed * ovsmp_weights * ovsmp_density)
 
-    _evt, (curve_pot,) = lpot(actx.queue,
+    curve_pot, = lpot(
+            actx,
             sources,
             ovsmp_sources,
             actx.from_numpy(centers),
 
@@ -21,7 +21,6 @@ def main():
     actx = PyOpenCLArrayContext(queue)
 
     tctx = t.ToyContext(
-            actx.context,
             # LaplaceKernel(2),
             YukawaKernel(2), extra_kernel_kwargs={"lam": 5},
             # HelmholtzKernel(2), extra_kernel_kwargs={"k": 0.3},
@@ -36,22 +35,22 @@ def main():
     fp = FieldPlotter([3, 0], extent=8)
 
     if USE_MATPLOTLIB:
-        t.logplot(fp, pt_src, cmap="jet")
+        t.logplot(actx, fp, pt_src, cmap="jet")
         plt.colorbar()
         plt.show()
 
-    mexp = t.multipole_expand(pt_src, [0, 0], 5)
-    mexp2 = t.multipole_expand(mexp, [0, 0.25])  # noqa: F841
-    lexp = t.local_expand(mexp, [3, 0])
-    lexp2 = t.local_expand(lexp, [3, 1], 3)
+    mexp = t.multipole_expand(actx, pt_src, [0, 0], order=5)
+    mexp2 = t.multipole_expand(actx, mexp, [0, 0.25])  # noqa: F841
+    lexp = t.local_expand(actx, mexp, [3, 0])
+    lexp2 = t.local_expand(actx, lexp, [3, 1], order=3)
 
     # diff = mexp - pt_src
     # diff = mexp2 - pt_src
     diff = lexp2 - pt_src
 
-    print(t.l_inf(diff, 1.2, center=lexp2.center))
+    print(t.l_inf(actx, diff, 1.2, center=lexp2.center))
     if USE_MATPLOTLIB:
-        t.logplot(fp, diff, cmap="jet", vmin=-3, vmax=0)
+        t.logplot(actx, fp, diff, cmap="jet", vmin=-3, vmax=0)
         plt.colorbar()
         plt.show()
 
 
@@ -66,8 +66,8 @@
 ]
 
 
-code_cache: WriteOncePersistentDict[Hashable, lp.TranslationUnit] = \
-        WriteOncePersistentDict("sumpy-code-cache-v6-"+VERSION_TEXT, safe_sync=False)
+code_cache: WriteOncePersistentDict[Hashable, lp.TranslationUnit] = (
+    WriteOncePersistentDict(f"sumpy-code-cache-v8-{VERSION_TEXT}", safe_sync=False))
 
 
 # {{{ optimization control
@@ -87,8 +87,6 @@ def set_optimization_enabled(flag):
 
 # {{{ cache control
 
-CACHING_ENABLED = True
-
 CACHING_ENABLED = (
     "SUMPY_NO_CACHE" not in os.environ
     and "CG_NO_CACHE" not in os.environ)
 
@@ -23,36 +23,102 @@
 THE SOFTWARE.
 """
 
+from typing import TYPE_CHECKING, Any
+
 from boxtree.array_context import PyOpenCLArrayContext as PyOpenCLArrayContextBase
+from typing_extensions import override
 
+import loopy as lp
 from arraycontext.pytest import (
     _PytestPyOpenCLArrayContextFactoryWithClass,
     register_pytest_array_context_factory,
 )
 
 
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+
+    from numpy.typing import DTypeLike
+
+    from arraycontext import ArrayContext
+    from loopy import TranslationUnit
+    from loopy.codegen import PreambleInfo
+    from pytools.tag import ToTagSetConvertible
+
+
 __doc__ = """
 Array Context
 -------------
 
+.. autofunction:: make_loopy_program
 .. autoclass:: PyOpenCLArrayContext
 """
 
 
 # {{{ PyOpenCLArrayContext
 
+def make_loopy_program(
+        domains, statements,
+        kernel_data: list[Any] | None = None, *,
+        name: str = "sumpy_loopy_kernel",
+        silenced_warnings: list[str] | str | None = None,
+        assumptions: str = "",
+        fixed_parameters: dict[str, Any] | None = None,
+        index_dtype: DTypeLike | None = None,
+        tags: ToTagSetConvertible = None):
+    """Return a :class:`loopy.LoopKernel` suitable for use with
+    :meth:`arraycontext.ArrayContext.call_loopy`.
+    """
+    if kernel_data is None:
+        kernel_data = [...]
+
+    if silenced_warnings is None:
+        silenced_warnings = []
+
+    import loopy as lp
+    from arraycontext.loopy import _DEFAULT_LOOPY_OPTIONS
+
+    return lp.make_kernel(
+            domains,
+            statements,
+            kernel_data=kernel_data,
+            options=_DEFAULT_LOOPY_OPTIONS,
+            default_offset=lp.auto,
+            name=name,
+            lang_version=lp.MOST_RECENT_LANGUAGE_VERSION,
+            assumptions=assumptions,
+            fixed_parameters=fixed_parameters,
+            silenced_warnings=silenced_warnings,
+            index_dtype=index_dtype,
+            tags=tags)
+
+
+def _fp_contract_fast_preamble(
+        preamble_info: PreambleInfo
+    ) -> Iterator[tuple[str, str]]:
+    yield ("fp_contract_fast_pocl", "#pragma clang fp contract(fast)")
+
+
 class PyOpenCLArrayContext(PyOpenCLArrayContextBase):
-    def transform_loopy_program(self, t_unit):
-        default_ep = t_unit.default_entrypoint
-        options = default_ep.options
+    @override
+    def transform_loopy_program(self, t_unit: TranslationUnit):
+        import pyopencl as cl
+        device = self.queue.device
+        if (device.platform.name == "Portable Computing Language"
+                and (device.type & cl.device_type.GPU)):
+            t_unit = lp.register_preamble_generators(
+                t_unit,
+                [_fp_contract_fast_preamble])
+
+        return t_unit
 
-        if not (options.return_dict and options.no_numpy):
-            raise ValueError("Loopy kernel passed to call_loopy must "
-                    "have return_dict and no_numpy options set. "
-                    "Did you use arraycontext.make_loopy_program "
-                    "to create this kernel?")
 
-        return super().transform_loopy_program(t_unit)
+def is_cl_cpu(actx: ArrayContext) -> bool:
+    if not isinstance(actx, PyOpenCLArrayContext):
+        return False
+
+    import pyopencl as cl
+    return all(dev.type & cl.device_type.CPU for dev in actx.context.devices)
 
 # }}}
 
 
@@ -47,8 +47,6 @@
 
     from numpy.typing import DTypeLike
 
-    import pyopencl as cl
-    from loopy.codegen import PreambleInfo
     from loopy.target import TargetBase
     from loopy.translation_unit import CallablesInferenceContext
     from loopy.types import LoopyType
@@ -249,26 +247,6 @@ def register_bessel_callables(loopy_knl: lp.TranslationUnit) -> lp.TranslationUn
 
     return loopy_knl
 
-
-def _fp_contract_fast_preamble(
-        preamble_info: PreambleInfo
-    ) -> Iterator[tuple[str, str]]:
-    yield ("fp_contract_fast_pocl", "#pragma clang fp contract(fast)")
-
-
-def register_optimization_preambles(
-        loopy_knl: lp.TranslationUnit, device: cl.Device
-    ) -> lp.TranslationUnit:
-    if isinstance(loopy_knl.target, lp.PyOpenCLTarget):
-        import pyopencl as cl
-        if (device.platform.name == "Portable Computing Language"
-                and (device.type & cl.device_type.GPU)):
-            loopy_knl = lp.register_preamble_generators(
-                loopy_knl,
-                [_fp_contract_fast_preamble])
-
-    return loopy_knl
-
 # }}}
 
 
@@ -753,7 +731,7 @@ def to_loopy_insns(
         assignments: Iterable[tuple[str, sym.Expr]],
         vector_names: Set[str] | None = None,
         pymbolic_expr_maps: Sequence[Callable[[Expression], Expression]] = (),
-        complex_dtype: DTypeLike = None,
+        complex_dtype: DTypeLike | None = None,
         retain_names: Set[str] | None = None,
     ) -> Sequence[Assignment | CallInstruction]:
     if vector_names is None: