Skip to content

Commit 5216e4a

Browse files
induceralexfikl
andauthored
Towards array context (2025 merge) (#256)
* Initial work towards using array contexts * update tests to pass actx * sumpy.array_context additions * port p2p to arraycontext * port p2e to arraycontext * port e2p to arraycontext * port e2e to arraycontext * port tools and toys to arraycontext * port test_tools to arraycontext * port test_misc to arraycontext * bump requirements * start porting test_kernels to arraycontext * more porting in test_kernels * add arraycontext to docs * add some annotations to make_loopy_program * finish porting in test_kernels * port qbx to arraycontext * port curve-pot to arraycontext * add pytools to intersphinx * add assumptions at kernel creation * port expansion-toys to arraycontext * move get_kernel calls to separate line for debugging * add fixed_parameters to make_loopy_program * port test_qbx to arraycontext * port test_matrixgen to arraycontext * continue porting fmm to arraycontext * update drive_fmm from boxtree * more work towards getting the fmm working * fix up fmm tests * port distributed to arraycontext * add missing actx * fix kernel return values * actually loop over all results * back up some more dictionary kernel accesses * fix matrix generation * rip out timing collection * remove unused imports (flake8) * fix return value for form_locals * remove ctx arg in KernelComputation * back out some unneeded changes * point ci to updated pytential * fix kwargs name * fix merge * docs: add pytools to intersphinx * fix device handling in p2p * fix bad merge * fix bad merge * fix some type annotations * fix some pylint errors * Drop register_optimization_preambles, handle FP contract in actx * Fix some make_obj_array stragglers * get_cached_kernel_executor -> get_cached_kernel * Fix toys * Fix jump and target derivative tests * Fix test_unified_single_and_double * Address pocl 7.x pyvkfft miscompilation pocl/pocl#2069 * Fix docs * Use ArrayContext+assert instead of PyOpenCLActx in annotations * Fix some type annotations in P2P * Deal with DTypeLike no longer allowing None * Revert boxtree req back to main * Update baseline * Port to actx.np.zeros * Update downstream repo on CI --------- Co-authored-by: Alexandru Fikl <[email protected]>
1 parent b6d0310 commit 5216e4a

26 files changed

+6953
-10526
lines changed

.basedpyright/baseline.json

Lines changed: 5728 additions & 9312 deletions
Large diffs are not rendered by default.

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,8 @@ jobs:
121121
run: |
122122
curl -L -O https://tiker.net/ci-support-v0
123123
. ./ci-support-v0
124-
if [[ "$DOWNSTREAM_PROJECT" == "pytential" && "$GITHUB_HEAD_REF" == "e2p" ]]; then
125-
DOWNSTREAM_PROJECT=https://github.com/isuruf/pytential.git@e2p
124+
if [[ "$DOWNSTREAM_PROJECT" == "pytential" && "$GITHUB_HEAD_REF" == "towards-array-context-merge" ]]; then
125+
DOWNSTREAM_PROJECT=https://github.com/alexfikl/pytential.git@towards-array-context
126126
fi
127127
test_downstream "$DOWNSTREAM_PROJECT"
128128

.test-conda-env-py3.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ dependencies:
88
- numpy
99
- scipy
1010
- sympy
11-
- pocl
11+
# https://github.com/pocl/pocl/issues/2069
12+
- pocl<7
1213
- pocl-cuda
1314
- islpy
1415
- pyopencl

doc/conf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
nitpick_ignore_regex = [
2828
["py:class", r"symengine\.(.+)"], # :cry:
29+
["py:class", r"ToTagSetConvertible"], # :cry:
2930
]
3031

3132
sphinxconfig_missing_reference_aliases = {
@@ -36,6 +37,7 @@
3637
"np.complexfloating": "class:numpy.complexfloating",
3738
"np.inexact": "class:numpy.inexact",
3839
"np.dtype": "class:numpy.dtype",
40+
"np.number": "class:numpy.number",
3941
# pytools
4042
"obj_array.ObjectArray1D": "obj:pytools.obj_array.ObjectArray1D",
4143
# sympy
@@ -53,6 +55,7 @@
5355
"CallInstruction": "class:loopy.kernel.instruction.CallInstruction",
5456
# arraycontext
5557
"Array": "obj:arraycontext.Array",
58+
"ArrayContext": "class:arraycontext.ArrayContext",
5659
# boxtree
5760
"FMMTraversalInfo": "class:boxtree.traversal.FMMTraversalInfo",
5861
# sumpy

examples/curve-pot.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def draw_pot_figure(aspect_ratio,
9191
knl_kwargs = {}
9292

9393
vol_source_knl, vol_target_knl = process_kernel(knl, what_operator)
94-
p2p = P2P(actx.context,
94+
p2p = P2P(
9595
source_kernels=(vol_source_knl,),
9696
target_kernels=(vol_target_knl,),
9797
exclude_self=False,
@@ -100,7 +100,7 @@ def draw_pot_figure(aspect_ratio,
100100
lpot_source_knl, lpot_target_knl = process_kernel(knl, what_operator_lpot)
101101

102102
from sumpy.qbx import LayerPotential
103-
lpot = LayerPotential(actx.context,
103+
lpot = LayerPotential(
104104
expansion=expn_class(knl, order=order),
105105
source_kernels=(lpot_source_knl,),
106106
target_kernels=(lpot_target_knl,),
@@ -184,7 +184,8 @@ def map_to_curve(t: NDArray[np.floating]):
184184

185185
def apply_lpot(x: NDArray[np.inexact]) -> NDArray[np.inexact]:
186186
xovsmp = fim @ x
187-
_evt, (y,) = lpot(actx.queue,
187+
y, = lpot(
188+
actx,
188189
sources,
189190
ovsmp_sources,
190191
actx.from_numpy(centers),
@@ -209,7 +210,8 @@ def apply_lpot(x: NDArray[np.inexact]) -> NDArray[np.inexact]:
209210
density = np.cos(mode_nr*2*np.pi*native_t).astype(np.complex128)
210211
strength = actx.from_numpy(native_curve.speed * native_weights * density)
211212

212-
_evt, (vol_pot,) = p2p(actx.queue,
213+
vol_pot, = p2p(
214+
actx,
213215
targets,
214216
sources,
215217
[strength], **volpot_kwargs)
@@ -219,7 +221,8 @@ def apply_lpot(x: NDArray[np.inexact]) -> NDArray[np.inexact]:
219221
ovsmp_strength = actx.from_numpy(
220222
ovsmp_curve.speed * ovsmp_weights * ovsmp_density)
221223

222-
_evt, (curve_pot,) = lpot(actx.queue,
224+
curve_pot, = lpot(
225+
actx,
223226
sources,
224227
ovsmp_sources,
225228
actx.from_numpy(centers),

examples/expansion-toys.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ def main():
2121
actx = PyOpenCLArrayContext(queue)
2222

2323
tctx = t.ToyContext(
24-
actx.context,
2524
# LaplaceKernel(2),
2625
YukawaKernel(2), extra_kernel_kwargs={"lam": 5},
2726
# HelmholtzKernel(2), extra_kernel_kwargs={"k": 0.3},
@@ -36,22 +35,22 @@ def main():
3635
fp = FieldPlotter([3, 0], extent=8)
3736

3837
if USE_MATPLOTLIB:
39-
t.logplot(fp, pt_src, cmap="jet")
38+
t.logplot(actx, fp, pt_src, cmap="jet")
4039
plt.colorbar()
4140
plt.show()
4241

43-
mexp = t.multipole_expand(pt_src, [0, 0], 5)
44-
mexp2 = t.multipole_expand(mexp, [0, 0.25]) # noqa: F841
45-
lexp = t.local_expand(mexp, [3, 0])
46-
lexp2 = t.local_expand(lexp, [3, 1], 3)
42+
mexp = t.multipole_expand(actx, pt_src, [0, 0], order=5)
43+
mexp2 = t.multipole_expand(actx, mexp, [0, 0.25]) # noqa: F841
44+
lexp = t.local_expand(actx, mexp, [3, 0])
45+
lexp2 = t.local_expand(actx, lexp, [3, 1], order=3)
4746

4847
# diff = mexp - pt_src
4948
# diff = mexp2 - pt_src
5049
diff = lexp2 - pt_src
5150

52-
print(t.l_inf(diff, 1.2, center=lexp2.center))
51+
print(t.l_inf(actx, diff, 1.2, center=lexp2.center))
5352
if USE_MATPLOTLIB:
54-
t.logplot(fp, diff, cmap="jet", vmin=-3, vmax=0)
53+
t.logplot(actx, fp, diff, cmap="jet", vmin=-3, vmax=0)
5554
plt.colorbar()
5655
plt.show()
5756

sumpy/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@
6666
]
6767

6868

69-
code_cache: WriteOncePersistentDict[Hashable, lp.TranslationUnit] = \
70-
WriteOncePersistentDict("sumpy-code-cache-v6-"+VERSION_TEXT, safe_sync=False)
69+
code_cache: WriteOncePersistentDict[Hashable, lp.TranslationUnit] = (
70+
WriteOncePersistentDict(f"sumpy-code-cache-v8-{VERSION_TEXT}", safe_sync=False))
7171

7272

7373
# {{{ optimization control
@@ -87,8 +87,6 @@ def set_optimization_enabled(flag):
8787

8888
# {{{ cache control
8989

90-
CACHING_ENABLED = True
91-
9290
CACHING_ENABLED = (
9391
"SUMPY_NO_CACHE" not in os.environ
9492
and "CG_NO_CACHE" not in os.environ)

sumpy/array_context.py

Lines changed: 75 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,36 +23,102 @@
2323
THE SOFTWARE.
2424
"""
2525

26+
from typing import TYPE_CHECKING, Any
27+
2628
from boxtree.array_context import PyOpenCLArrayContext as PyOpenCLArrayContextBase
29+
from typing_extensions import override
2730

31+
import loopy as lp
2832
from arraycontext.pytest import (
2933
_PytestPyOpenCLArrayContextFactoryWithClass,
3034
register_pytest_array_context_factory,
3135
)
3236

3337

38+
if TYPE_CHECKING:
39+
from collections.abc import Iterator
40+
41+
from numpy.typing import DTypeLike
42+
43+
from arraycontext import ArrayContext
44+
from loopy import TranslationUnit
45+
from loopy.codegen import PreambleInfo
46+
from pytools.tag import ToTagSetConvertible
47+
48+
3449
__doc__ = """
3550
Array Context
3651
-------------
3752
53+
.. autofunction:: make_loopy_program
3854
.. autoclass:: PyOpenCLArrayContext
3955
"""
4056

4157

4258
# {{{ PyOpenCLArrayContext
4359

60+
def make_loopy_program(
61+
domains, statements,
62+
kernel_data: list[Any] | None = None, *,
63+
name: str = "sumpy_loopy_kernel",
64+
silenced_warnings: list[str] | str | None = None,
65+
assumptions: str = "",
66+
fixed_parameters: dict[str, Any] | None = None,
67+
index_dtype: DTypeLike | None = None,
68+
tags: ToTagSetConvertible = None):
69+
"""Return a :class:`loopy.LoopKernel` suitable for use with
70+
:meth:`arraycontext.ArrayContext.call_loopy`.
71+
"""
72+
if kernel_data is None:
73+
kernel_data = [...]
74+
75+
if silenced_warnings is None:
76+
silenced_warnings = []
77+
78+
import loopy as lp
79+
from arraycontext.loopy import _DEFAULT_LOOPY_OPTIONS
80+
81+
return lp.make_kernel(
82+
domains,
83+
statements,
84+
kernel_data=kernel_data,
85+
options=_DEFAULT_LOOPY_OPTIONS,
86+
default_offset=lp.auto,
87+
name=name,
88+
lang_version=lp.MOST_RECENT_LANGUAGE_VERSION,
89+
assumptions=assumptions,
90+
fixed_parameters=fixed_parameters,
91+
silenced_warnings=silenced_warnings,
92+
index_dtype=index_dtype,
93+
tags=tags)
94+
95+
96+
def _fp_contract_fast_preamble(
97+
preamble_info: PreambleInfo
98+
) -> Iterator[tuple[str, str]]:
99+
yield ("fp_contract_fast_pocl", "#pragma clang fp contract(fast)")
100+
101+
44102
class PyOpenCLArrayContext(PyOpenCLArrayContextBase):
45-
def transform_loopy_program(self, t_unit):
46-
default_ep = t_unit.default_entrypoint
47-
options = default_ep.options
103+
@override
104+
def transform_loopy_program(self, t_unit: TranslationUnit):
105+
import pyopencl as cl
106+
device = self.queue.device
107+
if (device.platform.name == "Portable Computing Language"
108+
and (device.type & cl.device_type.GPU)):
109+
t_unit = lp.register_preamble_generators(
110+
t_unit,
111+
[_fp_contract_fast_preamble])
112+
113+
return t_unit
48114

49-
if not (options.return_dict and options.no_numpy):
50-
raise ValueError("Loopy kernel passed to call_loopy must "
51-
"have return_dict and no_numpy options set. "
52-
"Did you use arraycontext.make_loopy_program "
53-
"to create this kernel?")
54115

55-
return super().transform_loopy_program(t_unit)
116+
def is_cl_cpu(actx: ArrayContext) -> bool:
117+
if not isinstance(actx, PyOpenCLArrayContext):
118+
return False
119+
120+
import pyopencl as cl
121+
return all(dev.type & cl.device_type.CPU for dev in actx.context.devices)
56122

57123
# }}}
58124

sumpy/codegen.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@
4747

4848
from numpy.typing import DTypeLike
4949

50-
import pyopencl as cl
51-
from loopy.codegen import PreambleInfo
5250
from loopy.target import TargetBase
5351
from loopy.translation_unit import CallablesInferenceContext
5452
from loopy.types import LoopyType
@@ -249,26 +247,6 @@ def register_bessel_callables(loopy_knl: lp.TranslationUnit) -> lp.TranslationUn
249247

250248
return loopy_knl
251249

252-
253-
def _fp_contract_fast_preamble(
254-
preamble_info: PreambleInfo
255-
) -> Iterator[tuple[str, str]]:
256-
yield ("fp_contract_fast_pocl", "#pragma clang fp contract(fast)")
257-
258-
259-
def register_optimization_preambles(
260-
loopy_knl: lp.TranslationUnit, device: cl.Device
261-
) -> lp.TranslationUnit:
262-
if isinstance(loopy_knl.target, lp.PyOpenCLTarget):
263-
import pyopencl as cl
264-
if (device.platform.name == "Portable Computing Language"
265-
and (device.type & cl.device_type.GPU)):
266-
loopy_knl = lp.register_preamble_generators(
267-
loopy_knl,
268-
[_fp_contract_fast_preamble])
269-
270-
return loopy_knl
271-
272250
# }}}
273251

274252

@@ -753,7 +731,7 @@ def to_loopy_insns(
753731
assignments: Iterable[tuple[str, sym.Expr]],
754732
vector_names: Set[str] | None = None,
755733
pymbolic_expr_maps: Sequence[Callable[[Expression], Expression]] = (),
756-
complex_dtype: DTypeLike = None,
734+
complex_dtype: DTypeLike | None = None,
757735
retain_names: Set[str] | None = None,
758736
) -> Sequence[Assignment | CallInstruction]:
759737
if vector_names is None:

0 commit comments

Comments
 (0)