Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@
| mo-lottieturner | Lottie Turner | Met Office | 2026-01-27 |
| andrewcoughtrie | Andrew Coughtrie | Met Office | 2026-01-28 |
| tommbendall | Thomas Bendall | Met Office | 2026-01-13 |
| tinyendian | Wolfgang Hayek | Earth Sciences New Zealand | 2026-02-02 |
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ export PSYCLONE_PHYSICS_FILES = \
gw_ussp_mod \
lw_kernel_mod \
mphys_kernel_mod \
pc2_bl_forced_cu \
pc2_bm_initiate \
pc2_initiation_ctl \
pc2_initiation_kernel_mod \
pc2_conv_coupling_kernel_mod \
sw_kernel_mod \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
##############################################################################
# (c) Crown copyright Met Office. All rights reserved.
# The file LICENCE, distributed with this code, contains details of the terms
# under which the code may be used.
##############################################################################
"""
Optimisation script that replaces existing OpenMP parallelisation with
PSyclone-generated directives to target loops over index i instead of
index j. Trip count of j loops is 1 in LFRic, which prevents parallel
execution.
"""

import logging
from psyclone.transformations import TransformationError
from psyclone.psyir.nodes import Loop
from transmute_psytrans.transmute_functions import (
get_outer_loops,
get_compiler,
first_priv_red_init,
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC,
OMP_PARALLEL_LOOP_DO_TRANS_STATIC
)


def trans(psyir):
"""
Apply OpenMP Directives
"""

# Identify outer loops
outer_loops = [loop for loop in get_outer_loops(psyir)
if not loop.ancestor(Loop)]

# Apply OpenMP parallel do directives and use workaround for
# firstprivate variable issue; replicate dynamic and static
# schedules of the original implementation
try:
for idx, loop in enumerate(outer_loops):
if get_compiler() == 'cce':
first_priv_red_init(loop, ["cf_base", "cf_forced", "dcfl",
"dqcl", "qcl_forced", "qcl_tol"])
if idx == 0:
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC.apply(loop.walk(Loop)[1])
else:
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop.walk(Loop)[1])
except (TransformationError, IndexError) as err:
logging.warning("OMPParallelLoopTrans failed: %s", err)
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
##############################################################################
# (c) Crown copyright Met Office. All rights reserved.
# The file LICENCE, distributed with this code, contains details of the terms
# under which the code may be used.
##############################################################################
"""
Optimisation script that replaces existing OpenMP parallelisation with
PSyclone-generated directives to target loops over index i instead of
index j. Trip count of j loops is 1 in LFRic, which prevents parallel
execution. Private variables need to be declared explicitly as PSyclone
analysis currently misses a scalar variable that a subroutine modifies in
a parallel region. PSyclone thread safety checks need to be overridden;
the subroutines can be safely parallelised. Compiler directives used in
the original code are re-inserted for performance and consistency of output.
"""

import logging
from psyclone.transformations import TransformationError
from psyclone.psyir.nodes import (Loop, CodeBlock)
from transmute_psytrans.transmute_functions import (
set_pure_subroutines,
get_outer_loops,
mark_explicit_privates,
get_compiler,
first_priv_red_init,
match_lhs_assignments,
match_call_args,
OMP_PARALLEL_REGION_TRANS,
OMP_DO_LOOP_TRANS_STATIC
)

# Variables in parallel region that need to be private
private_variables = [
"alphal", "alx", "i", "j", "k", "km1", "kp1", "mux", "tmp",
"frac_init", "kk", "kkm1", "kkp1", "qc", "qc_points", "qsl",
"tlx", "qsi", "idx", "deltacl_c", "deltacf_c", "deltaql_c",
"cf_c", "cfl_c", "cff_c"
]

# Subroutines that need to be declared as "pure"
pure_subroutines = ["qsat", "qsat_mix", "qsat_wat", "qsat_wat_mix"]

# Variables that appear on the left-hand side of assignments
# or as call arguments for which PSyclone dependency errors
# can be ignored
false_dep_vars = [
"qc_points",
"idx",
"tl_in",
"p_theta_levels",
"qsi_lay",
"qsl_lay",
]


class CompilerDirective():
"""
Custom compiler directive class to avoid an issue
with fparser.two.Fortran2003.Directive that will
be resolved in an upcoming fparser release.
"""
def __init__(self, directive):
self.directive = directive

def tofortran(self):
"""
Return directive with prefix
"""
return "!DIR$ " + self.directive


def trans(psyir):
"""
Apply OpenMP and Compiler Directives
"""

# Declare subroutines as pure to enable parallelisation
# of the encompassing loops
set_pure_subroutines(psyir, pure_subroutines)

# Identify outer loops for setting up parallel regions
outer_loops = [loop for loop in get_outer_loops(psyir)
if not loop.ancestor(Loop)]

# Check if first OpenMP region can be parallelised and
# apply directives
try:
OMP_PARALLEL_REGION_TRANS.validate(outer_loops[0:2])
OMP_PARALLEL_REGION_TRANS.apply(outer_loops[0:2])
OMP_DO_LOOP_TRANS_STATIC.apply(outer_loops[0])
OMP_DO_LOOP_TRANS_STATIC.apply(outer_loops[1].walk(Loop)[1])
except (TransformationError, IndexError) as err:
logging.warning("Parallelisation of the 1st region failed: %s", err)

# Declare private symbols for the last loop nest explicitly,
# PSyclone misses one
mark_explicit_privates(outer_loops[2], private_variables)

# Parallelise the second region and insert compiler directives
# Add redundant variable initialisation to work around a known
# PSyclone issue when using CCE
try:
if get_compiler() == 'cce':
first_priv_red_init(outer_loops[2], ["i", "j", "k"])

OMP_PARALLEL_REGION_TRANS.validate(outer_loops[2:3])
OMP_PARALLEL_REGION_TRANS.apply(outer_loops[2])

# Insert before OpenMP directives to avoid PSyclone errors
if get_compiler() == "cce":
for loop in outer_loops[2].walk(Loop)[3:5]:
cblock = CodeBlock([CompilerDirective("NOFISSION")],
CodeBlock.Structure.STATEMENT)
insert_at = loop.parent.children.index(loop)
loop.parent.children.insert(insert_at, cblock)

for loop in outer_loops[2].walk(Loop)[13:16]:
cblock = CodeBlock([CompilerDirective("IVDEP")],
CodeBlock.Structure.STATEMENT)
insert_at = loop.parent.children.index(loop)
loop.parent.children.insert(insert_at, cblock)

for loop in outer_loops[2].walk(Loop)[2:7]:
# Check if any eligible variables appear in subroutine
# call arguments; these lead to false dependency errors
# in the parallel loop transformation that can be
# ignored
ignore_deps_vars = match_call_args(loop, false_dep_vars)
options = {}
if len(ignore_deps_vars) > 0:
options["ignore_dependencies_for"] = ignore_deps_vars
OMP_DO_LOOP_TRANS_STATIC.apply(loop, options)

for loop in outer_loops[2].walk(Loop)[8:13:2]:
# Check if any eligible variables appear on the LHS of
# assignment expressions to ignore false dependency errors
ignore_deps_vars = match_lhs_assignments(loop, false_dep_vars)
options = {}
if len(ignore_deps_vars) > 0:
options["ignore_dependencies_for"] = ignore_deps_vars

OMP_DO_LOOP_TRANS_STATIC.apply(loop, options)

except (TransformationError, IndexError) as err:
logging.warning("Parallelisation of the 2nd region failed: %s", err)
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
##############################################################################
# (c) Crown copyright Met Office. All rights reserved.
# The file LICENCE, distributed with this code, contains details of the terms
# under which the code may be used.
##############################################################################
"""
Optimisation script that replaces existing OpenMP parallelisation with
PSyclone-generated directives to parallelise additional loops.
"""

import logging
from psyclone.transformations import TransformationError
from psyclone.psyir.nodes import Loop
from transmute_psytrans.transmute_functions import (
get_outer_loops,
OMP_PARALLEL_LOOP_DO_TRANS_STATIC,
)


def trans(psyir):
"""
Apply OpenMP Directives
"""

# Identify outer loops in the subroutine
outer_loops = [loop for loop in get_outer_loops(psyir)
if not loop.ancestor(Loop)]

try:
# Parallelise k-loops and i-loops (j-loops have a trip count of 1)
for loop in outer_loops:
if loop.variable.name == 'k':
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop)
elif loop.variable.name == 'j':
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop.walk(Loop)[1])
except (TransformationError, IndexError) as err:
logging.warning("OMPParallelLoopTrans failed: %s", err)
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
##############################################################################
# (c) Crown copyright Met Office. All rights reserved.
# The file LICENCE, distributed with this code, contains details of the terms
# under which the code may be used.
##############################################################################
"""
Optimisation script that replaces existing OpenMP parallelisation with
PSyclone-generated directives to target loops over index i instead of
index j. Trip count of j loops is 1 in LFRic, which prevents parallel
execution.
"""

import logging
from psyclone.transformations import TransformationError
from psyclone.psyir.nodes import Loop
from transmute_psytrans.transmute_functions import (
get_outer_loops,
get_compiler,
first_priv_red_init,
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC,
OMP_PARALLEL_LOOP_DO_TRANS_STATIC
)


def trans(psyir):
"""
Apply OpenMP Directives
"""

# Identify outer loops
outer_loops = [loop for loop in get_outer_loops(psyir)
if not loop.ancestor(Loop)]

# Apply OpenMP parallel do directives and use workaround for
# firstprivate variable issue; replicate dynamic and static
# schedules of the original implementation
try:
for idx, loop in enumerate(outer_loops):
if get_compiler() == 'cce':
first_priv_red_init(loop, ["cf_base", "cf_forced", "dcfl",
"dqcl", "qcl_forced", "qcl_tol"])
if idx == 0:
OMP_PARALLEL_LOOP_DO_TRANS_DYNAMIC.apply(loop.walk(Loop)[1])
else:
OMP_PARALLEL_LOOP_DO_TRANS_STATIC.apply(loop.walk(Loop)[1])
except (TransformationError, IndexError) as err:
logging.warning("OMPParallelLoopTrans failed: %s", err)
Loading