inducer · nchristensen · Feb 23, 2021 · Feb 23, 2021 · Mar 12, 2021 · Mar 24, 2021
diff --git a/examples/advection/weak.py b/examples/advection/weak.py
@@ -26,7 +26,8 @@
 
 import pyopencl as cl
 
-from meshmode.array_context import PyOpenCLArrayContext
+#from meshmode.array_context import PyOpenCLArrayContext
+from grudge.grudge_array_context import GrudgeArrayContext
 from meshmode.dof_array import thaw, flatten
 
 from grudge import bind, sym
@@ -92,7 +93,8 @@ def __call__(self, evt, basename, overwrite=True):
 def main(ctx_factory, dim=2, order=4, visualize=False):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
-    actx = PyOpenCLArrayContext(queue)
+    #actx = PyOpenCLArrayContext(queue)
+    actx = GrudgeArrayContext(queue)
 
     # {{{ parameters
 

diff --git a/examples/maxwell/cavities.py b/examples/maxwell/cavities.py
@@ -26,7 +26,8 @@
 import numpy as np
 import pyopencl as cl
 
-from meshmode.array_context import PyOpenCLArrayContext
+from grudge.grudge_array_context import GrudgeArrayContext
+#from meshmode.array_context import PyOpenCLArrayContext
 
 from grudge.shortcuts import set_up_rk4
 from grudge import sym, bind, DGDiscretizationWithBoundaries
@@ -40,7 +41,8 @@
 def main(dims, write_output=True, order=4):
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
-    actx = PyOpenCLArrayContext(queue)
+    actx = GrudgeArrayContext(queue)
+    #actx = PyOpenCLArrayContext(queue)
 
     from meshmode.mesh.generation import generate_regular_rect_mesh
     mesh = generate_regular_rect_mesh(

diff --git a/examples/wave/wave-eager.py b/examples/wave/wave-eager.py
@@ -27,7 +27,8 @@
 
 from pytools.obj_array import flat_obj_array, make_obj_array
 
-from meshmode.array_context import PyOpenCLArrayContext
+from grudge.grudge_array_context import GrudgeArrayContext
+#from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.dof_array import thaw
 
 from meshmode.mesh import BTAG_ALL, BTAG_NONE  # noqa
@@ -86,7 +87,7 @@ def wave_operator(discr, c, w):
                     ))
                 )
 
-# }}}
+# }}} 
 
 
 def rk4_step(y, t, h, f):
@@ -118,9 +119,9 @@ def bump(actx, discr, t=0):
 def main():
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
-    actx = PyOpenCLArrayContext(queue)
+    actx = GrudgeArrayContext(queue)#PyOpenCLArrayContext(queue)
 
-    dim = 2
+    dim = 3
     nel_1d = 16
     from meshmode.mesh.generation import generate_regular_rect_mesh
     mesh = generate_regular_rect_mesh(

diff --git a/grudge/execution.py b/grudge/execution.py
@@ -26,21 +26,25 @@
 
 from pytools import memoize_in
 from pytools.obj_array import make_obj_array
+from pytools.tag import Tag
 
 import loopy as lp
 import pyopencl as cl
 import pyopencl.array  # noqa
 
-from meshmode.dof_array import DOFArray, thaw, flatten, unflatten
+from meshmode.dof_array import DOFArray, thaw, flatten, unflatten, DOFTag
 from meshmode.array_context import ArrayContext, make_loopy_program
 
 import grudge.symbolic.mappers as mappers
 from grudge import sym
 from grudge.function_registry import base_function_registry
 
+import loopy_dg_kernels as dgk
+
 import logging
 logger = logging.getLogger(__name__)
 
+MPI_TAG_SEND_TAGS = 1729
 from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2  # noqa: F401
 
 
@@ -49,6 +53,12 @@
 
 # {{{ exec mapper
 
+class VecDOFTag(Tag):
+    pass
+
+class FaceDOFTag(Tag):
+    pass
+
 class ExecutionMapper(mappers.Evaluator,
         mappers.BoundOpMapperMixin,
         mappers.LocalOpReducerMixin):
@@ -263,13 +273,19 @@ def map_elementwise_linear(self, op, field_expr):
 
         @memoize_in(self.array_context, (ExecutionMapper, "elwise_linear_knl"))
         def prg():
+
             result = make_loopy_program(
                 """{[iel, idof, j]:
                     0<=iel<nelements and
                     0<=idof<ndiscr_nodes_out and
                     0<=j<ndiscr_nodes_in}""",
                 "result[iel, idof] = sum(j, mat[idof, j] * vec[iel, j])",
-                name="diff")
+                kernel_data=[
+                    lp.GlobalArg("result", None, shape=lp.auto, tags=DOFTag()),
+                    lp.GlobalArg("vec", None, shape=lp.auto, tags=DOFTag()),
+                    ...
+                ],
+                name="elwise_linear")
 
             result = lp.tag_array_axes(result, "mat", "stride:auto,stride:auto")
             return result
@@ -297,8 +313,8 @@ def prg():
                     prg(),
                     mat=matrix,
                     result=result[out_grp.index],
-                    vec=field[in_grp.index])
-
+                    vec=field[in_grp.index]) #inArg
+            
         return result
 
     def map_projection(self, op, field_expr):
@@ -336,6 +352,11 @@ def prg():
                 """
                 result[iel,idof] = sum(f, sum(j, mat[idof, f, j] * vec[f, iel, j]))
                 """,
+                kernel_data = [
+                    lp.GlobalArg("result", None, shape=lp.auto, tags=DOFTag()),
+                    lp.GlobalArg("vec", None, shape=lp.auto, tags=FaceDOFTag()),
+                    "..."
+                ],
                 name="face_mass")
 
         all_faces_conn = self.discrwb.connection_from_dds("vol", op.dd_in)
@@ -483,7 +504,11 @@ def map_insn_assign_from_discr_scoped(self, insn, profile_data=None):
             self.discrwb._discr_scoped_subexpr_name_to_value[insn.name])], []
 
     def map_insn_diff_batch_assign(self, insn, profile_data=None):
-        field = self.rec(insn.field)
+        # Whence comes the field?
+        ifield = insn.field
+        #import pdb
+        #pdb.set_trace()
+        field = self.rec(ifield)
         repr_op = insn.operators[0]
         # FIXME: There's no real reason why differentiation is special,
         # execution-wise.
@@ -502,13 +527,19 @@ def prg(nmatrices):
                     0<=idof<nunit_nodes_out and
                     0<=j<nunit_nodes_in}""",
                 """
-                result[imatrix, iel, idof] = sum(
+                result[imatrix, iel, idof] = simul_reduce(sum,
                         j, diff_mat[imatrix, idof, j] * vec[iel, j])
                 """,
+                kernel_data=[
+                    lp.GlobalArg("result", None, shape=lp.auto, tags=VecDOFTag()),
+                    lp.GlobalArg("vec", None, shape=lp.auto, tags=DOFTag()),
+                    lp.GlobalArg("diff_mat", None, shape=lp.auto),
+                    ...
+                ],
                 name="diff")
 
             result = lp.fix_parameters(result, nmatrices=nmatrices)
-            result = lp.tag_inames(result, "imatrix: unr")
+            result = lp.tag_inames(result, "imatrix: ilp")
             result = lp.tag_array_axes(result, "result", "sep,c,c")
             return result
 
@@ -540,13 +571,22 @@ def prg(nmatrices):
                 matrices_ary_dev = self.array_context.from_numpy(matrices_ary)
                 self.bound_op.operator_data_cache[cache_key] = matrices_ary_dev
 
+            # Breaks on complex data types without check
+            # TODO Add fallback transformations to hjson file
+            # TODO Use the above kernel rather than the one in loopy_dg_kernels
+            if noperators == 3 and (field.entry_dtype == np.float64 or field.entry_dtype == np.float32):
+                n_out, n_in = matrices_ary_dev[0].shape
+                n_elem = field[in_grp.index].shape[0]
+                options = lp.Options(no_numpy=True, return_dict=True)
+                program = dgk.gen_diff_knl_fortran2(noperators, n_elem, n_in, n_out,options=options, fp_format=field.entry_dtype)
+            else:
+                program = prg(noperators)
+
             self.array_context.call_loopy(
-                    prg(noperators),
+                    program,
                     diff_mat=matrices_ary_dev,
-                    result=make_obj_array([
-                        result[iop][out_grp.index]
-                        for iop in range(noperators)
-                        ]), vec=field[in_grp.index])
+                    result=make_obj_array([result[iop][out_grp.index] for iop in range(noperators)]),
+                    vec=field[in_grp.index])
 
         return [(name, result[i]) for i, name in enumerate(insn.names)], []
 

diff --git a/grudge/function_registry.py b/grudge/function_registry.py
@@ -25,7 +25,6 @@
 
 
 import numpy as np
-
 from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2  # noqa
 from pytools import RecordWithoutPickling
 

diff --git a/grudge/grudge_array_context.py b/grudge/grudge_array_context.py
@@ -0,0 +1,129 @@
+from meshmode.array_context import PyOpenCLArrayContext, make_loopy_program
+from meshmode.dof_array import DOFTag
+from grudge.execution import VecDOFTag, FaceDOFTag
+import loopy as lp
+import pyopencl
+import pyopencl.array as cla
+import loopy_dg_kernels as dgk
+import numpy as np
+
+ctof_knl = lp.make_copy_kernel("f,f", old_dim_tags="c,c")
+ftoc_knl = lp.make_copy_kernel("c,c", old_dim_tags="f,f")
+
+# Really this is more of an Nvidia array context probably
+# Maybe not if loading from file?
+class GrudgeArrayContext(PyOpenCLArrayContext):
+
+    def __init__(self, queue, allocator=None):
+        super().__init__(queue, allocator=allocator)
+
+    def empty(self, shape, dtype):
+        return cla.empty(self.queue, shape=shape, dtype=dtype,
+                allocator=self.allocator, order='F')
+
+    def zeros(self, shape, dtype):
+        return cla.zeros(self.queue, shape=shape, dtype=dtype,
+                allocator=self.allocator, order='F')
+
+    # Probably can delete this
+    '''
+    def call_loopy(self, program, **kwargs):
+
+        #print("Program: " + program.name)
+        if program.name == "opt_diff":
+            #diff_mat = kwargs["diff_mat"]
+            #result = kwargs["result"]
+            #vec = kwargs["vec"]
+            print(kwargs)
+
+            # Create input array
+            #cq = vec.queue
+            #dtp = vec.dtype
+
+            # Esto no deberia hacerse aqui.
+            #_,(inArg,) = ctof_knl(cq, input=vec)
+            #inArg = vec.copy()            
+
+            # Treat as c array, can do this to use c-format diff function
+            # np.array(A, format="F").flatten() == np.array(A.T, format="C").flatten()
+            #inArg.shape = (inArg.shape[1], inArg.shape[0])
+            #inArg.strides = cla._make_strides(vec.dtype.itemsize, inArg.shape, "c")
+            #outShape = inArg.shape
+
+            # Really should be passed in rather than re-allocated each time
+            #... c'est avec kwargs["result"]
+            #argDict = { "result1": cla.Array(cq, vec.shape, dtp, order="f"),
+            #            "result2": cla.Array(cq, vec.shape, dtp, order="f"),
+            #            "result3": cla.Array(cq, vec.shape, dtp, order="f"),
+            #            "vec": vec,
+            #            "mat1": diff_mat[0],
+            #            "mat2": diff_mat[1],
+            #            "mat3": diff_mat[2] }
+
+            #super().call_loopy(program, **argDict)
+
+            #result = [argDict["result1"], argDict["result2"], argDict["result3"]] 
+            #print(result)
+            #result = argDict["result1"] #kwargs["result"]
+            #print("HERE")
+            #print(result)
+            #exit()
+            # Treat as fortran style array again
+            #for i, entry in enumerate(["result1", "result2", "result3"]):
+            #    argDict[entry].shape = (argDict[entry].shape[1], argDict[entry].shape[0])
+            #    argDict[entry].strides = cla._make_strides(argDict[entry].dtype.itemsize, argDict[entry].shape, "f")
+                # This should be unnecessary
+                # Il est necessaire pour le moment a cause du "ctof" d'ici. 
+                #ftoc_knl(cq, input=argDict[entry], output=result[i])
+            result = super().call_loopy(program, **kwargs)
+        #else:
+        result = super().call_loopy(program,**kwargs)
+
+        return result
+    '''
+
+    #@memoize_method
+    def _get_scalar_func_loopy_program(self, name, nargs, naxes):
+        prog = super()._get_scalar_func_loopy_program(name, nargs, naxes)
+        for arg in prog.args:
+            if type(arg) == lp.ArrayArg:
+                arg.tags = DOFTag()
+        return prog
+
+
+    # Side note: the meaning of thawed and frozen seem counterintuitive to me.
+    def thaw(self, array):
+        thawed = super().thaw(array)
+        if type(getattr(array, "tags", None)) == DOFTag:           
+            cq = thawed.queue
+            _, (out,) = ctof_knl(cq, input=thawed)
+            thawed = out
+            # May or may not be needed
+            #thawed.tags = "dof_array"
+        return thawed
+
+    #@memoize_method
+    def transform_loopy_program(self, program):
+
+        #print(program.name)
+        for arg in program.args:
+            if type(arg.tags) == DOFTag:
+                program = lp.tag_array_axes(program, arg.name, "f,f")
+            elif type(arg.tags) == VecDOFTag:
+                program = lp.tag_array_axes(program, arg.name, "sep,f,f")        
+            elif type(arg.tags) == FaceDOFTag:
+                program = lp.tag_array_axes(program, arg.name, "N1,N0,N2")        
+
+        if program.name == "opt_diff":
+            # TODO: Dynamically determine device id, don't hardcode path to transform.hjson.
+            # Also get pn from program
+            filename = "/home/njchris2/Workspace/nick/loopy_dg_kernels/transform.hjson"
+            deviceID = "NVIDIA Titan V"
+            pn = 4
+
+            transformations = dgk.loadTransformationsFromFile(filename, deviceID, pn)            
+            program = dgk.applyTransformationList(program, transformations)
+        else:
+            program = super().transform_loopy_program(program)
+
+        return program
diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
@@ -32,9 +32,8 @@
 from pymbolic.primitives import Variable, Subscript
 from sys import intern
 from functools import reduce
-
 from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2  # noqa: F401
-
+from meshmode.dof_array import DOFTag
 
 # {{{ instructions
 
@@ -1022,6 +1021,11 @@ def map_insn_assign(self, insn):
         if not expr_mapper.non_scalar_vars:
             return insn
 
+        kdata = []
+        #for name in insn.names:
+        # Make all global args
+        #    Global
+
         knl = lp.make_kernel(
                 "{[%(iel)s, %(idof)s]: "
                 "0 <= %(iel)s < nelements and 0 <= %(idof)s < nunit_dofs}"
@@ -1038,6 +1042,9 @@ def map_insn_assign(self, insn):
                     no_numpy=True,
                     )
                 )
+        for arg in knl.args:
+            if type(arg) == lp.ArrayArg:
+                arg.tags = DOFTag()
 
         self.insn_count += 1
Original file line number	Diff line number	Diff line change
Expand Up		@@ -25,7 +25,6 @@


		import numpy as np

		from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa
		from pytools import RecordWithoutPickling

Expand Down