Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
| jennyhickson | Jenny Hickson | Met Office | 2025-12-10 |
| mo-marqh | mark Hedley | Met Office | 2025-12-11 |
| yaswant | Yaswant Pradhan | Met Office | 2025-12-16 |
| tinyendian | Wolfgang Hayek | ESNZ | 2025-12-16 |
3 changes: 3 additions & 0 deletions applications/lfric_atm/build/psyclone_transmute_file_list.mk
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ export PSYCLONE_PHYSICS_FILES = mphys_kernel_mod \
bm_tau_kernel_mod \
gw_ussp_mod \
lw_kernel_mod \
pc2_bl_forced_cu \
pc2_bm_initiate \
pc2_initiation_ctl \
pc2_initiation_kernel_mod \
pc2_conv_coupling_kernel_mod \
sw_kernel_mod \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
##############################################################################
# (c) Crown copyright 2025 Met Office. All rights reserved.
# The file LICENCE, distributed with this code, contains details of the terms
# under which the code may be used.
##############################################################################
"""
Optimisation script that replaces existing OpenMP parallelisation with
PSyclone-generated directives to target loops over index i instead of
index j. Trip count of j loops is 1 in LFRic, which prevents parallel
execution.
"""

import logging
from psyclone.transformations import TransformationError
from psyclone.psyir.nodes import Loop
from transmute_psytrans.transmute_functions import (
get_outer_loops,
get_compiler,
first_priv_red_init,
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC,
OMP_PARALLEL_LOOP_DO_TRANS_STATIC
)


def trans(psyir):
"""
Apply OpenMP Directives
"""

# Identify outer loops
outer_loops = [loop for loop in get_outer_loops(psyir)
if not loop.ancestor(Loop)]

# Apply OpenMP parallel do directives and use workaround for
# firstprivate variable issue; replicate dynamic and static
# schedules of the original implementation
try:
for idx, loop in enumerate(outer_loops):
if get_compiler() == 'cce':
first_priv_red_init(loop, ["cf_base", "cf_forced", "dcfl",
"dqcl", "qcl_forced", "qcl_tol"])
if idx == 0:
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC.apply(loop.walk(Loop)[1])
else:
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop.walk(Loop)[1])
except (TransformationError, IndexError) as err:
logging.warning("OMPParallelLoopTrans failed: %s", err)
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
##############################################################################
# (c) Crown copyright 2025 Met Office. All rights reserved.
# The file LICENCE, distributed with this code, contains details of the terms
# under which the code may be used.
##############################################################################
"""
Optimisation script that replaces existing OpenMP parallelisation with
PSyclone-generated directives to target loops over index i instead of
index j. Trip count of j loops is 1 in LFRic, which prevents parallel
execution. Private variables need to be declared explicitly as PSyclone
analysis currently misses a scalar variable that a subroutine modifies in
a parallel region. PSyclone thread safety checks need to be overridden;
the subroutines can be safely parallelised. Compiler directives used in
the original code are re-inserted for performance and consistency of output.
"""

import logging
from psyclone.transformations import TransformationError
from psyclone.psyir.nodes import (Loop, CodeBlock)
from transmute_psytrans.transmute_functions import (
set_pure_subroutines,
get_outer_loops,
mark_explicit_privates,
get_compiler,
first_priv_red_init,
match_lhs_assignments,
OMP_PARALLEL_REGION_TRANS,
OMP_DO_LOOP_TRANS_STATIC
)

# Variables in parallel region that need to be private
private_variables = [
"alphal", "alx", "i", "j", "k", "km1", "kp1", "mux", "tmp",
"frac_init", "kk", "kkm1", "kkp1", "qc", "qc_points", "qsl",
"tlx", "qsi", "idx", "deltacl_c", "deltacf_c", "deltaql_c",
"cf_c", "cfl_c", "cff_c"
]

# Subroutines that need to be declared as "pure"
pure_subroutines = ["qsat", "qsat_mix", "qsat_wat", "qsat_wat_mix"]

# Variables that appear on the left-hand side of assignments
# for which PSyclone dependency errors can be ignored
false_dep_vars = [
"qc_points",
"idx"
]


class CompilerDirective():
"""
Custom compiler directive class to avoid an issue
with fparser.two.Fortran2003.Directive that will
be resolved in an upcoming fparser release.
"""
def __init__(self, directive):
self.directive = directive

def tofortran(self):
"""
Return directive with prefix
"""
return "!DIR$ " + self.directive


def trans(psyir):
"""
Apply OpenMP and Compiler Directives
"""

# Declare subroutines as pure to enable parallelisation
# of the encompassing loops
set_pure_subroutines(psyir, pure_subroutines)

# Identify outer loops for setting up parallel regions
outer_loops = [loop for loop in get_outer_loops(psyir)
if not loop.ancestor(Loop)]

# Check if first OpenMP region can be parallelised and
# apply directives
try:
OMP_PARALLEL_REGION_TRANS.validate(outer_loops[0:2])
OMP_PARALLEL_REGION_TRANS.apply(outer_loops[0:2])
OMP_DO_LOOP_TRANS_STATIC.apply(outer_loops[0])
OMP_DO_LOOP_TRANS_STATIC.apply(outer_loops[1].walk(Loop)[1])
except (TransformationError, IndexError) as err:
logging.warning("Parallelisation of the 1st region failed: %s", err)

# Declare private symbols for the last loop nest explicitly,
# PSyclone misses one
mark_explicit_privates(outer_loops[2], private_variables)

# Parallelise the second region and insert compiler directives
# Add redundant variable initialisation to work around a known
# PSyclone issue when using CCE
try:
if get_compiler() == 'cce':
first_priv_red_init(outer_loops[2], ["i", "j", "k"])

OMP_PARALLEL_REGION_TRANS.validate(outer_loops[2:3])
OMP_PARALLEL_REGION_TRANS.apply(outer_loops[2])

# Insert before OpenMP directives to avoid PSyclone errors
if get_compiler() == "cce":
for loop in outer_loops[2].walk(Loop)[3:5]:
cblock = CodeBlock([CompilerDirective("NOFISSION")],
CodeBlock.Structure.STATEMENT)
insert_at = loop.parent.children.index(loop)
loop.parent.children.insert(insert_at, cblock)

for loop in outer_loops[2].walk(Loop)[13:16]:
cblock = CodeBlock([CompilerDirective("IVDEP")],
CodeBlock.Structure.STATEMENT)
insert_at = loop.parent.children.index(loop)
loop.parent.children.insert(insert_at, cblock)

for loop in outer_loops[2].walk(Loop)[2:7]:
OMP_DO_LOOP_TRANS_STATIC.apply(loop)

for loop in outer_loops[2].walk(Loop)[8:13:2]:
# Check if any eligible variables appear on the LHS of
# assignment expressions; these lead to false dependency
# errors in the parallel loop transformation that can be
# ignored
ignore_deps_vars = match_lhs_assignments(loop, false_dep_vars)
options = {}
if len(ignore_deps_vars) > 0:
options["ignore_dependencies_for"] = ignore_deps_vars

OMP_DO_LOOP_TRANS_STATIC.apply(loop, options)

except (TransformationError, IndexError) as err:
logging.warning("Parallelisation of the 2nd region failed: %s", err)
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
##############################################################################
# (c) Crown copyright 2025 Met Office. All rights reserved.
# The file LICENCE, distributed with this code, contains details of the terms
# under which the code may be used.
##############################################################################
"""
Optimisation script that replaces existing OpenMP parallelisation with
PSyclone-generated directives to parallelise additional loops.
"""

import logging
from psyclone.transformations import TransformationError
from psyclone.psyir.nodes import Loop
from transmute_psytrans.transmute_functions import (
get_outer_loops,
OMP_PARALLEL_LOOP_DO_TRANS_STATIC,
)


def trans(psyir):
"""
Apply OpenMP Directives
"""

# Identify outer loops in the subroutine
outer_loops = [loop for loop in get_outer_loops(psyir)
if not loop.ancestor(Loop)]

try:
# Parallelise k-loops and i-loops (j-loops have a trip count of 1)
for loop in outer_loops:
if loop.variable.name == 'k':
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop)
elif loop.variable.name == 'j':
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop.walk(Loop)[1])
except (TransformationError, IndexError) as err:
logging.warning("OMPParallelLoopTrans failed: %s", err)
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
##############################################################################
# (c) Crown copyright 2025 Met Office. All rights reserved.
# The file LICENCE, distributed with this code, contains details of the terms
# under which the code may be used.
##############################################################################
"""
Optimisation script that replaces existing OpenMP parallelisation with
PSyclone-generated directives to target loops over index i instead of
index j. Trip count of j loops is 1 in LFRic, which prevents parallel
execution.
"""

import logging
from psyclone.transformations import TransformationError
from psyclone.psyir.nodes import Loop
from transmute_psytrans.transmute_functions import (
get_outer_loops,
get_compiler,
first_priv_red_init,
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC,
OMP_PARALLEL_LOOP_DO_TRANS_STATIC
)


def trans(psyir):
"""
Apply OpenMP Directives
"""

# Identify outer loops
outer_loops = [loop for loop in get_outer_loops(psyir)
if not loop.ancestor(Loop)]

# Apply OpenMP parallel do directives and use workaround for
# firstprivate variable issue; replicate dynamic and static
# schedules of the original implementation
try:
for idx, loop in enumerate(outer_loops):
if get_compiler() == 'cce':
first_priv_red_init(loop, ["cf_base", "cf_forced", "dcfl",
"dqcl", "qcl_forced", "qcl_tol"])
if idx == 0:
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC.apply(loop.walk(Loop)[1])
else:
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop.walk(Loop)[1])
except (TransformationError, IndexError) as err:
logging.warning("OMPParallelLoopTrans failed: %s", err)
Loading