Skip to content

Commit 54d1ff4

Browse files
tj sunsv2518
authored andcommitted
add openmp flag and by pass workaround flag
1 parent 31fadc0 commit 54d1ff4

File tree

3 files changed

+17
-5
lines changed

3 files changed

+17
-5
lines changed

pyop2/codegen/rep2loopy.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,15 @@ def generate(builder, wrapper_name=None):
453453
name=wrapper_name,
454454
# TODO, should these really be silenced?
455455
silenced_warnings=["write_race*"])
456+
from pyop2.configuration import configuration
457+
if configuration["time"]:
458+
batch_size = configuration["simd_width"]
459+
if builder.extruded:
460+
start, end = parameters.layer_start, parameters.layer_end
461+
else:
462+
start, end = "start", "end"
463+
wrapper = loopy.assume(wrapper, "{0} mod {1} = 0".format(end, batch_size))
464+
wrapper = loopy.assume(wrapper, "exists zz: zz > 0 and {0} = {1}*zz + {2}".format(end, configuration["simd_width"], start))
456465

457466
# prioritize loops
458467
for indices in context.index_ordering:

pyop2/compilation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def workaround_cflags(self):
216216
if version.StrictVersion("7.3") <= ver <= version.StrictVersion("7.5"):
217217
# GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90055
218218
# See also https://github.com/firedrakeproject/firedrake/issues/1442
219-
# And https://github.com/firedrakeproject/firedrake/issues/1717
219+
return # enable vectorization for paper
220220
# Bug also on skylake with the vectoriser in this
221221
# combination (disappears without
222222
# -fno-tree-loop-vectorize!)
@@ -394,7 +394,7 @@ class LinuxCompiler(Compiler):
394394
:kwarg comm: Optional communicator to compile the code on (only
395395
rank 0 compiles code) (defaults to COMM_WORLD)."""
396396
def __init__(self, cppargs=[], ldargs=[], cpp=False, comm=None):
397-
opt_flags = ['-march=native', '-O3', '-ffast-math']
397+
opt_flags = ['-O3', '-ffast-math', '-fopenmp']
398398
if configuration['debug']:
399399
opt_flags = ['-O0', '-g']
400400
cc = "mpicc"

pyop2/sequential.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,18 @@ def vectorise(wrapper, iname, batch_size):
8383
kernel = kernel.copy(temporary_variables=tmps)
8484

8585
# split iname and vectorize the inner loop
86+
slabs = (1, 1)
87+
if configuration["time"]:
88+
slabs = (0, 0)
8689
inner_iname = iname + "_batch"
8790

8891
if configuration["vectorization_strategy"] == "ve":
8992
# vectorize using vector extenstions
90-
kernel = loopy.split_iname(kernel, iname, batch_size, slabs=(0, 1), inner_tag="c_vec", inner_iname=inner_iname)
93+
kernel = loopy.split_iname(kernel, iname, batch_size, slabs=slabs, inner_tag="c_vec", inner_iname=inner_iname)
9194
else:
9295
# vectoriza using omp pragma simd
9396
assert configuration["vectorization_strategy"] == "omp"
94-
kernel = loopy.split_iname(kernel, iname, batch_size, slabs=(0, 1), inner_tag="omp_simd", inner_iname=inner_iname)
97+
kernel = loopy.split_iname(kernel, iname, batch_size, slabs=slabs, inner_tag="omp_simd", inner_iname=inner_iname)
9598

9699
alignment = configuration["alignment"]
97100
tmps = dict((name, tv.copy(alignment=alignment)) for name, tv in kernel.temporary_variables.items())
@@ -241,7 +244,7 @@ def set_nbytes(self, args):
241244
seen = set()
242245
for arg in args:
243246
if arg.access is INC:
244-
nbytes += arg.data.nbytes
247+
nbytes += arg.data.nbytes * 2
245248
else:
246249
nbytes += arg.data.nbytes
247250
for map_ in arg.map_tuple:

0 commit comments

Comments
 (0)