Skip to content

Commit 0669da2

Browse files
tinyendianEdHone
andauthored
Additional PC2 optimisations for NG-ARCH (#53)
Co-authored-by: EdHone <edward.hone@metoffice.gov.uk>
1 parent a7e6d68 commit 0669da2

File tree

13 files changed

+708
-0
lines changed

13 files changed

+708
-0
lines changed

CONTRIBUTORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,4 @@
3232
| ukmo-juan-castillo | Juan M. Castillo | Met Office | 2026-01-23 |
3333
| Adrian-Lock | Adrian Lock | Met Office | 2026-01-09 |
3434
| thomasmelvin | Thomas Melvin | Met Office | 2026-01-15 |
35+
| tinyendian | Wolfgang Hayek | Earth Sciences New Zealand | 2026-02-02 |

applications/lfric_atm/build/psyclone_transmute_file_list.mk

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ export PSYCLONE_PHYSICS_FILES = \
3232
kmkhz_9c_wtrac \
3333
lw_kernel_mod \
3434
mphys_kernel_mod \
35+
pc2_bl_forced_cu \
36+
pc2_bm_initiate \
37+
pc2_initiation_ctl \
3538
pc2_initiation_kernel_mod \
3639
pc2_conv_coupling_kernel_mod \
3740
sw_kernel_mod \
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
##############################################################################
2+
# (c) Crown copyright Met Office. All rights reserved.
3+
# The file LICENCE, distributed with this code, contains details of the terms
4+
# under which the code may be used.
5+
##############################################################################
6+
"""
7+
Optimisation script that replaces existing OpenMP parallelisation with
8+
PSyclone-generated directives to target loops over index i instead of
9+
index j. Trip count of j loops is 1 in LFRic, which prevents parallel
10+
execution.
11+
"""
12+
13+
import logging
14+
from psyclone.transformations import TransformationError
15+
from psyclone.psyir.nodes import Loop
16+
from transmute_psytrans.transmute_functions import (
17+
get_outer_loops,
18+
get_compiler,
19+
first_priv_red_init,
20+
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC,
21+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC
22+
)
23+
24+
25+
def trans(psyir):
26+
"""
27+
Apply OpenMP Directives
28+
"""
29+
30+
# Identify outer loops
31+
outer_loops = [loop for loop in get_outer_loops(psyir)
32+
if not loop.ancestor(Loop)]
33+
34+
# Apply OpenMP parallel do directives and use workaround for
35+
# firstprivate variable issue; replicate dynamic and static
36+
# schedules of the original implementation
37+
try:
38+
for idx, loop in enumerate(outer_loops):
39+
if get_compiler() == 'cce':
40+
first_priv_red_init(loop, ["cf_base", "cf_forced", "dcfl",
41+
"dqcl", "qcl_forced", "qcl_tol"])
42+
if idx == 0:
43+
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC.apply(loop.walk(Loop)[1])
44+
else:
45+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop.walk(Loop)[1])
46+
except (TransformationError, IndexError) as err:
47+
logging.warning("OMPParallelLoopTrans failed: %s", err)
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
##############################################################################
2+
# (c) Crown copyright Met Office. All rights reserved.
3+
# The file LICENCE, distributed with this code, contains details of the terms
4+
# under which the code may be used.
5+
##############################################################################
6+
"""
7+
Optimisation script that replaces existing OpenMP parallelisation with
8+
PSyclone-generated directives to target loops over index i instead of
9+
index j. Trip count of j loops is 1 in LFRic, which prevents parallel
10+
execution. Private variables need to be declared explicitly as PSyclone
11+
analysis currently misses a scalar variable that a subroutine modifies in
12+
a parallel region. PSyclone thread safety checks need to be overridden;
13+
the subroutines can be safely parallelised. Compiler directives used in
14+
the original code are re-inserted for performance and consistency of output.
15+
"""
16+
17+
import logging
18+
from psyclone.transformations import TransformationError
19+
from psyclone.psyir.nodes import (Loop, CodeBlock)
20+
from transmute_psytrans.transmute_functions import (
21+
set_pure_subroutines,
22+
get_outer_loops,
23+
mark_explicit_privates,
24+
get_compiler,
25+
first_priv_red_init,
26+
match_lhs_assignments,
27+
match_call_args,
28+
OMP_PARALLEL_REGION_TRANS,
29+
OMP_DO_LOOP_TRANS_STATIC
30+
)
31+
32+
# Variables in parallel region that need to be private
33+
private_variables = [
34+
"alphal", "alx", "i", "j", "k", "km1", "kp1", "mux", "tmp",
35+
"frac_init", "kk", "kkm1", "kkp1", "qc", "qc_points", "qsl",
36+
"tlx", "qsi", "idx", "deltacl_c", "deltacf_c", "deltaql_c",
37+
"cf_c", "cfl_c", "cff_c"
38+
]
39+
40+
# Subroutines that need to be declared as "pure"
41+
pure_subroutines = ["qsat", "qsat_mix", "qsat_wat", "qsat_wat_mix"]
42+
43+
# Variables that appear on the left-hand side of assignments
44+
# or as call arguments for which PSyclone dependency errors
45+
# can be ignored
46+
false_dep_vars = [
47+
"qc_points",
48+
"idx",
49+
"tl_in",
50+
"p_theta_levels",
51+
"qsi_lay",
52+
"qsl_lay",
53+
]
54+
55+
56+
class CompilerDirective():
57+
"""
58+
Custom compiler directive class to avoid an issue
59+
with fparser.two.Fortran2003.Directive that will
60+
be resolved in an upcoming fparser release.
61+
"""
62+
def __init__(self, directive):
63+
self.directive = directive
64+
65+
def tofortran(self):
66+
"""
67+
Return directive with prefix
68+
"""
69+
return "!DIR$ " + self.directive
70+
71+
72+
def trans(psyir):
73+
"""
74+
Apply OpenMP and Compiler Directives
75+
"""
76+
77+
# Declare subroutines as pure to enable parallelisation
78+
# of the encompassing loops
79+
set_pure_subroutines(psyir, pure_subroutines)
80+
81+
# Identify outer loops for setting up parallel regions
82+
outer_loops = [loop for loop in get_outer_loops(psyir)
83+
if not loop.ancestor(Loop)]
84+
85+
# Check if first OpenMP region can be parallelised and
86+
# apply directives
87+
try:
88+
OMP_PARALLEL_REGION_TRANS.validate(outer_loops[0:2])
89+
OMP_PARALLEL_REGION_TRANS.apply(outer_loops[0:2])
90+
OMP_DO_LOOP_TRANS_STATIC.apply(outer_loops[0])
91+
OMP_DO_LOOP_TRANS_STATIC.apply(outer_loops[1].walk(Loop)[1])
92+
except (TransformationError, IndexError) as err:
93+
logging.warning("Parallelisation of the 1st region failed: %s", err)
94+
95+
# Declare private symbols for the last loop nest explicitly,
96+
# PSyclone misses one
97+
mark_explicit_privates(outer_loops[2], private_variables)
98+
99+
# Parallelise the second region and insert compiler directives
100+
# Add redundant variable initialisation to work around a known
101+
# PSyclone issue when using CCE
102+
try:
103+
if get_compiler() == 'cce':
104+
first_priv_red_init(outer_loops[2], ["i", "j", "k"])
105+
106+
OMP_PARALLEL_REGION_TRANS.validate(outer_loops[2:3])
107+
OMP_PARALLEL_REGION_TRANS.apply(outer_loops[2])
108+
109+
# Insert before OpenMP directives to avoid PSyclone errors
110+
if get_compiler() == "cce":
111+
for loop in outer_loops[2].walk(Loop)[3:5]:
112+
cblock = CodeBlock([CompilerDirective("NOFISSION")],
113+
CodeBlock.Structure.STATEMENT)
114+
insert_at = loop.parent.children.index(loop)
115+
loop.parent.children.insert(insert_at, cblock)
116+
117+
for loop in outer_loops[2].walk(Loop)[13:16]:
118+
cblock = CodeBlock([CompilerDirective("IVDEP")],
119+
CodeBlock.Structure.STATEMENT)
120+
insert_at = loop.parent.children.index(loop)
121+
loop.parent.children.insert(insert_at, cblock)
122+
123+
for loop in outer_loops[2].walk(Loop)[2:7]:
124+
# Check if any eligible variables appear in subroutine
125+
# call arguments; these lead to false dependency errors
126+
# in the parallel loop transformation that can be
127+
# ignored
128+
ignore_deps_vars = match_call_args(loop, false_dep_vars)
129+
options = {}
130+
if len(ignore_deps_vars) > 0:
131+
options["ignore_dependencies_for"] = ignore_deps_vars
132+
OMP_DO_LOOP_TRANS_STATIC.apply(loop, options)
133+
134+
for loop in outer_loops[2].walk(Loop)[8:13:2]:
135+
# Check if any eligible variables appear on the LHS of
136+
# assignment expressions to ignore false dependency errors
137+
ignore_deps_vars = match_lhs_assignments(loop, false_dep_vars)
138+
options = {}
139+
if len(ignore_deps_vars) > 0:
140+
options["ignore_dependencies_for"] = ignore_deps_vars
141+
142+
OMP_DO_LOOP_TRANS_STATIC.apply(loop, options)
143+
144+
except (TransformationError, IndexError) as err:
145+
logging.warning("Parallelisation of the 2nd region failed: %s", err)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
##############################################################################
2+
# (c) Crown copyright Met Office. All rights reserved.
3+
# The file LICENCE, distributed with this code, contains details of the terms
4+
# under which the code may be used.
5+
##############################################################################
6+
"""
7+
Optimisation script that replaces existing OpenMP parallelisation with
8+
PSyclone-generated directives to parallelise additional loops.
9+
"""
10+
11+
import logging
12+
from psyclone.transformations import TransformationError
13+
from psyclone.psyir.nodes import Loop
14+
from transmute_psytrans.transmute_functions import (
15+
get_outer_loops,
16+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC,
17+
)
18+
19+
20+
def trans(psyir):
21+
"""
22+
Apply OpenMP Directives
23+
"""
24+
25+
# Identify outer loops in the subroutine
26+
outer_loops = [loop for loop in get_outer_loops(psyir)
27+
if not loop.ancestor(Loop)]
28+
29+
try:
30+
# Parallelise k-loops and i-loops (j-loops have a trip count of 1)
31+
for loop in outer_loops:
32+
if loop.variable.name == 'k':
33+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop)
34+
elif loop.variable.name == 'j':
35+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop.walk(Loop)[1])
36+
except (TransformationError, IndexError) as err:
37+
logging.warning("OMPParallelLoopTrans failed: %s", err)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
##############################################################################
2+
# (c) Crown copyright Met Office. All rights reserved.
3+
# The file LICENCE, distributed with this code, contains details of the terms
4+
# under which the code may be used.
5+
##############################################################################
6+
"""
7+
Optimisation script that replaces existing OpenMP parallelisation with
8+
PSyclone-generated directives to target loops over index i instead of
9+
index j. Trip count of j loops is 1 in LFRic, which prevents parallel
10+
execution.
11+
"""
12+
13+
import logging
14+
from psyclone.transformations import TransformationError
15+
from psyclone.psyir.nodes import Loop
16+
from transmute_psytrans.transmute_functions import (
17+
get_outer_loops,
18+
get_compiler,
19+
first_priv_red_init,
20+
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC,
21+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC
22+
)
23+
24+
25+
def trans(psyir):
26+
"""
27+
Apply OpenMP Directives
28+
"""
29+
30+
# Identify outer loops
31+
outer_loops = [loop for loop in get_outer_loops(psyir)
32+
if not loop.ancestor(Loop)]
33+
34+
# Apply OpenMP parallel do directives and use workaround for
35+
# firstprivate variable issue; replicate dynamic and static
36+
# schedules of the original implementation
37+
try:
38+
for idx, loop in enumerate(outer_loops):
39+
if get_compiler() == 'cce':
40+
first_priv_red_init(loop, ["cf_base", "cf_forced", "dcfl",
41+
"dqcl", "qcl_forced", "qcl_tol"])
42+
if idx == 0:
43+
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC.apply(loop.walk(Loop)[1])
44+
else:
45+
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop.walk(Loop)[1])
46+
except (TransformationError, IndexError) as err:
47+
logging.warning("OMPParallelLoopTrans failed: %s", err)

0 commit comments

Comments
 (0)