Skip to content

Commit 0c608fe

Browse files
committed
Add optimisation scripts to ngarch miniapp and ESNZ site
1 parent cb577a3 commit 0c608fe

File tree

9 files changed

+651
-0
lines changed

9 files changed

+651
-0
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
##############################################################################
2+
# (c) Crown copyright 2025 Met Office. All rights reserved.
3+
# The file LICENCE, distributed with this code, contains details of the terms
4+
# under which the code may be used.
5+
##############################################################################
6+
"""
7+
Optimisation script that replaces existing OpenMP parallelisation with
8+
PSyclone-generated directives to target loops over index i instead of
9+
index j. Trip count of j loops is 1 in LFRic, which prevents parallel
10+
execution.
11+
"""
12+
13+
import logging
14+
from psyclone.transformations import TransformationError
15+
from psyclone.psyir.nodes import Loop
16+
from transmute_psytrans.transmute_functions import (
17+
get_outer_loops,
18+
get_compiler,
19+
first_priv_red_init,
20+
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC,
21+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC
22+
)
23+
24+
25+
def trans(psyir):
26+
"""
27+
Apply OpenMP Directives
28+
"""
29+
30+
# Identify outer loops
31+
outer_loops = [loop for loop in get_outer_loops(psyir)
32+
if not loop.ancestor(Loop)]
33+
34+
# Apply OpenMP parallel do directives and use workaround for
35+
# firstprivate variable issue; replicate dynamic and static
36+
# schedules of the original implementation
37+
try:
38+
for idx, loop in enumerate(outer_loops):
39+
if get_compiler() == 'cce':
40+
first_priv_red_init(loop, ["cf_base", "cf_forced", "dcfl",
41+
"dqcl", "qcl_forced", "qcl_tol"])
42+
if idx == 0:
43+
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC.apply(loop.walk(Loop)[1])
44+
else:
45+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop.walk(Loop)[1])
46+
except (TransformationError, IndexError) as err:
47+
logging.warning("OMPParallelLoopTrans failed: %s", err)
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
##############################################################################
2+
# (c) Crown copyright 2025 Met Office. All rights reserved.
3+
# The file LICENCE, distributed with this code, contains details of the terms
4+
# under which the code may be used.
5+
##############################################################################
6+
"""
7+
Optimisation script that replaces existing OpenMP parallelisation with
8+
PSyclone-generated directives to target loops over index i instead of
9+
index j. Trip count of j loops is 1 in LFRic, which prevents parallel
10+
execution. Private variables need to be declared explicitly as PSyclone
11+
analysis currently misses a scalar variable that a subroutine modifies in
12+
a parallel region. PSyclone thread safety checks need to be overridden;
13+
the subroutines can be safely parallelised. Compiler directives used in
14+
the original code are re-inserted for performance and consistency of output.
15+
"""
16+
17+
import logging
18+
from psyclone.transformations import TransformationError
19+
from psyclone.psyir.nodes import (Loop, CodeBlock)
20+
from transmute_psytrans.transmute_functions import (
21+
set_pure_subroutines,
22+
get_outer_loops,
23+
mark_explicit_privates,
24+
get_compiler,
25+
first_priv_red_init,
26+
match_lhs_assignments,
27+
OMP_PARALLEL_REGION_TRANS,
28+
OMP_DO_LOOP_TRANS_STATIC
29+
)
30+
31+
# Variables in parallel region that need to be private
32+
private_variables = [
33+
"alphal", "alx", "i", "j", "k", "km1", "kp1", "mux", "tmp",
34+
"frac_init", "kk", "kkm1", "kkp1", "qc", "qc_points", "qsl",
35+
"tlx", "qsi", "idx", "deltacl_c", "deltacf_c", "deltaql_c",
36+
"cf_c", "cfl_c", "cff_c"
37+
]
38+
39+
# Subroutines that need to be declared as "pure"
40+
pure_subroutines = ["qsat", "qsat_mix", "qsat_wat", "qsat_wat_mix"]
41+
42+
# Variables that appear on the left-hand side of assignments
43+
# for which PSyclone dependency errors can be ignored
44+
false_dep_vars = [
45+
"qc_points",
46+
"idx"
47+
]
48+
49+
50+
class CompilerDirective():
51+
"""
52+
Custom compiler directive class to avoid an issue
53+
with fparser.two.Fortran2003.Directive that will
54+
be resolved in an upcoming fparser release.
55+
"""
56+
def __init__(self, directive):
57+
self.directive = directive
58+
59+
def tofortran(self):
60+
"""
61+
Return directive with prefix
62+
"""
63+
return "!DIR$ " + self.directive
64+
65+
66+
def trans(psyir):
67+
"""
68+
Apply OpenMP and Compiler Directives
69+
"""
70+
71+
# Declare subroutines as pure to enable parallelisation
72+
# of the encompassing loops
73+
set_pure_subroutines(psyir, pure_subroutines)
74+
75+
# Identify outer loops for setting up parallel regions
76+
outer_loops = [loop for loop in get_outer_loops(psyir)
77+
if not loop.ancestor(Loop)]
78+
79+
# Check if first OpenMP region can be parallelised and
80+
# apply directives
81+
try:
82+
OMP_PARALLEL_REGION_TRANS.validate(outer_loops[0:2])
83+
OMP_PARALLEL_REGION_TRANS.apply(outer_loops[0:2])
84+
OMP_DO_LOOP_TRANS_STATIC.apply(outer_loops[0])
85+
OMP_DO_LOOP_TRANS_STATIC.apply(outer_loops[1].walk(Loop)[1])
86+
except (TransformationError, IndexError) as err:
87+
logging.warning("Parallelisation of the 1st region failed: %s", err)
88+
89+
# Declare private symbols for the last loop nest explicitly,
90+
# PSyclone misses one
91+
mark_explicit_privates(outer_loops[2], private_variables)
92+
93+
# Parallelise the second region and insert compiler directives
94+
# Add redundant variable initialisation to work around a known
95+
# PSyclone issue when using CCE
96+
try:
97+
if get_compiler() == 'cce':
98+
first_priv_red_init(outer_loops[2], ["i", "j", "k"])
99+
100+
OMP_PARALLEL_REGION_TRANS.validate(outer_loops[2:3])
101+
OMP_PARALLEL_REGION_TRANS.apply(outer_loops[2])
102+
103+
# Insert before OpenMP directives to avoid PSyclone errors
104+
if get_compiler() == "cce":
105+
for loop in outer_loops[2].walk(Loop)[3:5]:
106+
cblock = CodeBlock([CompilerDirective("NOFISSION")],
107+
CodeBlock.Structure.STATEMENT)
108+
insert_at = loop.parent.children.index(loop)
109+
loop.parent.children.insert(insert_at, cblock)
110+
111+
for loop in outer_loops[2].walk(Loop)[13:16]:
112+
cblock = CodeBlock([CompilerDirective("IVDEP")],
113+
CodeBlock.Structure.STATEMENT)
114+
insert_at = loop.parent.children.index(loop)
115+
loop.parent.children.insert(insert_at, cblock)
116+
117+
for loop in outer_loops[2].walk(Loop)[2:7]:
118+
OMP_DO_LOOP_TRANS_STATIC.apply(loop)
119+
120+
for loop in outer_loops[2].walk(Loop)[8:13:2]:
121+
# Check if any eligible variables appear on the LHS of
122+
# assignment expressions; these lead to false dependency
123+
# errors in the parallel loop transformation that can be
124+
# ignored
125+
ignore_deps_vars = match_lhs_assignments(loop, false_dep_vars)
126+
options = {}
127+
if len(ignore_deps_vars) > 0:
128+
options["ignore_dependencies_for"] = ignore_deps_vars
129+
130+
OMP_DO_LOOP_TRANS_STATIC.apply(loop, options)
131+
132+
except (TransformationError, IndexError) as err:
133+
logging.warning("Parallelisation of the 2nd region failed: %s", err)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
##############################################################################
2+
# (c) Crown copyright 2025 Met Office. All rights reserved.
3+
# The file LICENCE, distributed with this code, contains details of the terms
4+
# under which the code may be used.
5+
##############################################################################
6+
"""
7+
Optimisation script that replaces existing OpenMP parallelisation with
8+
PSyclone-generated directives to parallelise additional loops.
9+
"""
10+
11+
import logging
12+
from psyclone.transformations import TransformationError
13+
from psyclone.psyir.nodes import Loop
14+
from transmute_psytrans.transmute_functions import (
15+
get_outer_loops,
16+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC,
17+
)
18+
19+
20+
def trans(psyir):
21+
"""
22+
Apply OpenMP Directives
23+
"""
24+
25+
# Identify outer loops in the subroutine
26+
outer_loops = [loop for loop in get_outer_loops(psyir)
27+
if not loop.ancestor(Loop)]
28+
29+
try:
30+
# Parallelise k-loops and i-loops (j-loops have a trip count of 1)
31+
for loop in outer_loops:
32+
if loop.variable.name == 'k':
33+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop)
34+
elif loop.variable.name == 'j':
35+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop.walk(Loop)[1])
36+
except (TransformationError, IndexError) as err:
37+
logging.warning("OMPParallelLoopTrans failed: %s", err)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
##############################################################################
2+
# (c) Crown copyright 2025 Met Office. All rights reserved.
3+
# The file LICENCE, distributed with this code, contains details of the terms
4+
# under which the code may be used.
5+
##############################################################################
6+
"""
7+
Optimisation script that replaces existing OpenMP parallelisation with
8+
PSyclone-generated directives to target loops over index i instead of
9+
index j. Trip count of j loops is 1 in LFRic, which prevents parallel
10+
execution.
11+
"""
12+
13+
import logging
14+
from psyclone.transformations import TransformationError
15+
from psyclone.psyir.nodes import Loop
16+
from transmute_psytrans.transmute_functions import (
17+
get_outer_loops,
18+
get_compiler,
19+
first_priv_red_init,
20+
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC,
21+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC
22+
)
23+
24+
25+
def trans(psyir):
26+
"""
27+
Apply OpenMP Directives
28+
"""
29+
30+
# Identify outer loops
31+
outer_loops = [loop for loop in get_outer_loops(psyir)
32+
if not loop.ancestor(Loop)]
33+
34+
# Apply OpenMP parallel do directives and use workaround for
35+
# firstprivate variable issue; replicate dynamic and static
36+
# schedules of the original implementation
37+
try:
38+
for idx, loop in enumerate(outer_loops):
39+
if get_compiler() == 'cce':
40+
first_priv_red_init(loop, ["cf_base", "cf_forced", "dcfl",
41+
"dqcl", "qcl_forced", "qcl_tol"])
42+
if idx == 0:
43+
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC.apply(loop.walk(Loop)[1])
44+
else:
45+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop.walk(Loop)[1])
46+
except (TransformationError, IndexError) as err:
47+
logging.warning("OMPParallelLoopTrans failed: %s", err)

0 commit comments

Comments
 (0)