Skip to content

Commit 27f1881

Browse files
authored
Merge pull request #3214 from stfc/3188_tighten_privatisation_validation
(closes #3188) Improve automatic array privatisation validation
2 parents 273a46a + 2f41b6f commit 27f1881

File tree

13 files changed

+216
-59
lines changed

13 files changed

+216
-59
lines changed

.github/workflows/nemo_tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ jobs:
106106
. .runner_venv/bin/activate
107107
export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
108108
export NEMO_DIR=${PREFIX}/UKMO-NEMOv4
109+
export NEMOV4=1 # Enables specific NEMOV4 exclusions in the PSyclone transformation script
109110
cd $PSYCLONE_NEMO_DIR
110111
module load nvidia-hpcsdk/${NVFORTRAN_VERSION}
111112
module load hdf5/${HDF5_VERSION} netcdf-c/${NETCDF_C_VERSION} netcdf-fortran/${NETCDF_FORTRAN_VERSION}

.github/workflows/nemo_v5_tests.yml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,7 @@ jobs:
120120
mpirun -np 4 ./nemo
121121
tail run.stat
122122
# This was produced with gfortran, so we can do an exact diff
123-
# TODO #3112: Fix differences with baseline result
124-
# diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.10steps run.stat
123+
diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.10steps run.stat
125124
126125
- name: NEMO 5.0 nvidia passthrough
127126
# Only bother doing passthrough if this is a re-run of a previous test.
@@ -196,6 +195,7 @@ jobs:
196195
export PSYCLONE_HOME=${PWD}/.runner_venv
197196
export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
198197
export TEST_DIR=BENCH_OMP_THREADING_GCC
198+
export REPRODUCIBLE=1
199199
200200
# Set up FCM envvars to use psyclonefc and compile with OpenMP for CPU
201201
cd $NEMO_DIR
@@ -213,9 +213,7 @@ jobs:
213213
cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
214214
cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
215215
OMP_NUM_THREADS=4 mpirun -np 1 ./nemo
216-
tail run.stat
217-
# TODO #3112: Fix differences with baseline result
218-
# diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.10steps run.stat
216+
diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.10steps run.stat
219217
export VAR_TIME=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s)
220218
echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"
221219

changelog

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
17) PR #3214 for #3188. Improves the validation when doing automatic
2+
array privatisation.
3+
14
16) PR #3187 for #3184. Improve logging when chasing imports and test
25
inining with the same imported symbol in the caller and the callee.
36

examples/nemo/scripts/omp_cpu_trans.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
import os
4141
from utils import (
4242
insert_explicit_loop_parallelism, normalise_loops, add_profiling,
43-
enhance_tree_information, PARALLELISATION_ISSUES, PRIVATISATION_ISSUES,
43+
enhance_tree_information, PARALLELISATION_ISSUES,
4444
NEMO_MODULES_TO_IMPORT)
4545
from psyclone.psyir.nodes import Routine
4646
from psyclone.transformations import OMPLoopTrans
@@ -59,10 +59,20 @@
5959
# array privatisation is disabled.
6060
NEMOV4 = os.environ.get('NEMOV4', False)
6161

62-
CPU_REDUCTIONS = os.environ.get('CPU_REDUCTIONS', False)
62+
# By default, allow optimisations that may change the results, e.g. reductions
63+
REPRODUCIBLE = os.environ.get('REPRODUCIBLE', False)
6364

6465
# List of all files that psyclone will skip processing
6566
FILES_TO_SKIP = []
67+
if not NEMOV4:
68+
# TODO #3112: These produce diverging run.stat results in gcc NEMOv5 BENCH
69+
FILES_TO_SKIP = [
70+
"dynhpg.f90",
71+
"dynspg_ts.f90",
72+
"sbcssm.f90",
73+
"tramle.f90",
74+
"trazdf.f90",
75+
]
6676

6777
if PROFILING_ENABLED:
6878
# Fails with profiling enabled. issue #2723
@@ -83,6 +93,14 @@ def trans(psyir):
8393
if only_do_file and psyir.name != only_do_file:
8494
return
8595

96+
# Parallelising this file currently causes a noticeable slowdown
97+
if psyir.name.startswith("icethd"):
98+
return
99+
100+
# This file fails for gcc NEMOv5 BENCH
101+
if not NEMOV4 and psyir.name == "icedyn_rhg_evp.f90":
102+
return
103+
86104
omp_parallel_trans = None
87105
omp_loop_trans = OMPLoopTrans(omp_schedule="static")
88106
omp_loop_trans.omp_directive = "paralleldo"
@@ -112,7 +130,6 @@ def trans(psyir):
112130
region_directive_trans=omp_parallel_trans,
113131
loop_directive_trans=omp_loop_trans,
114132
collapse=False,
115-
privatise_arrays=(not NEMOV4 and
116-
psyir.name not in PRIVATISATION_ISSUES),
117-
enable_reductions=CPU_REDUCTIONS,
133+
privatise_arrays=not NEMOV4,
134+
enable_reductions=not REPRODUCIBLE,
118135
)

examples/nemo/scripts/omp_gpu_trans.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from utils import (
4242
add_profiling, inline_calls, insert_explicit_loop_parallelism,
4343
normalise_loops, enhance_tree_information, PARALLELISATION_ISSUES,
44-
NEMO_MODULES_TO_IMPORT, PRIVATISATION_ISSUES)
44+
NEMO_MODULES_TO_IMPORT)
4545
from psyclone.psyir.nodes import Routine
4646
from psyclone.psyir.transformations import (
4747
OMPTargetTrans, OMPDeclareTargetTrans)
@@ -75,8 +75,7 @@
7575

7676
# List of all files that psyclone will skip processing
7777
FILES_TO_SKIP = [
78-
"vremap.f90", # TODO #2772
79-
"icefrm.f90", # Has unsupportet implicit symbol declaration
78+
"icefrm.f90", # Has an unsupported implicit symbol declaration
8079
]
8180

8281
NEMOV5_EXCLUSIONS = []
@@ -236,7 +235,7 @@ def trans(psyir):
236235
region_directive_trans=omp_target_trans,
237236
loop_directive_trans=omp_gpu_loop_trans,
238237
collapse=True,
239-
privatise_arrays=(psyir.name not in PRIVATISATION_ISSUES),
238+
privatise_arrays=True,
240239
asynchronous_parallelism=ASYNC_PARALLEL,
241240
uniform_intrinsics_only=REPRODUCIBLE,
242241
enable_reductions=not REPRODUCIBLE
@@ -252,7 +251,7 @@ def trans(psyir):
252251
insert_explicit_loop_parallelism(
253252
subroutine,
254253
loop_directive_trans=omp_cpu_loop_trans,
255-
privatise_arrays=(psyir.name not in PRIVATISATION_ISSUES),
254+
privatise_arrays=True,
256255
asynchronous_parallelism=ASYNC_PARALLEL
257256
)
258257

examples/nemo/scripts/passthrough.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,22 @@
3737
''' Process Nemo code with PSyclone but don't do any changes. This file is only
3838
needed to provide a FILES_TO_SKIP list. '''
3939

40+
import os
41+
42+
# A environment variable can inform if this is targeting NEMOv4
43+
NEMOV4 = os.environ.get('NEMOV4', False)
44+
4045
# List of all files that psyclone will skip processing
4146
FILES_TO_SKIP = []
47+
if not NEMOV4:
48+
# TODO #3112: These produce diverging run.stat results in gcc NEMOv5 BENCH
49+
FILES_TO_SKIP = [
50+
"dynhpg.f90",
51+
"dynspg_ts.f90",
52+
"sbcssm.f90",
53+
"tramle.f90",
54+
"trazdf.f90",
55+
]
4256

4357

4458
def trans(_):

examples/nemo/scripts/utils.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -180,10 +180,6 @@
180180
"traqsr.f90",
181181
]
182182

183-
PRIVATISATION_ISSUES = [
184-
"ldftra.f90", # Wrong runtime results
185-
]
186-
187183

188184
def _it_should_be(symbol, of_type, instance):
189185
''' Make sure that symbol has the datatype as provided.

src/psyclone/psyir/nodes/array_mixin.py

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
''' This module contains the implementation of the abstract ArrayMixin. '''
4141

4242
import abc
43-
from typing import Tuple
43+
from typing import Tuple, Optional
4444

4545
from psyclone.core import SymbolicMaths
4646
from psyclone.errors import InternalError
@@ -496,32 +496,39 @@ def is_same_array(self, node) -> bool:
496496
return False
497497
return True
498498

499-
def is_full_range(self, index):
500-
'''Returns True if the specified array index is a Range Node that
501-
specifies all elements in this index. In the PSyIR this is
502-
specified by using LBOUND(name,index) for the lower bound of
503-
the range, UBOUND(name,index) for the upper bound of the range
504-
and "1" for the range step.
499+
def is_full_range(self, index: Optional[int] = None) -> bool:
500+
''' Returns whether the array access iterates over the whole
501+
associated array. Can optionally be provided a single index
502+
to check if it iterates over a whole dimension of the array.
505503
506-
:param int index: the array index to check.
504+
:param index: only check the given array index.
507505
508-
:returns: True if the access to this array index is a range \
509-
that specifies all index elements. Otherwise returns \
506+
:returns: True if the access to this array (or specified array
507+
dimension) iterates over all elements. Otherwise returns
510508
False.
511-
:rtype: bool
512509
513510
'''
514-
self._validate_index(index)
515-
516-
array_dimension = self.indices[index]
517-
if isinstance(array_dimension, Range):
518-
if self.is_lower_bound(index) and self.is_upper_bound(index):
519-
step = array_dimension.children[2]
520-
if (isinstance(step, Literal) and
511+
if index is not None:
512+
self._validate_index(index)
513+
indices_to_check = [index]
514+
else:
515+
indices_to_check = range(len(self.indices))
516+
517+
for idx in indices_to_check:
518+
array_dimension = self.indices[idx]
519+
# Check that it is a range going from the lower to the upper
520+
# bound with step 1
521+
if isinstance(array_dimension, Range):
522+
if self.is_lower_bound(idx) and self.is_upper_bound(idx):
523+
step = array_dimension.children[2]
524+
if (
525+
isinstance(step, Literal) and
521526
step.datatype.intrinsic == ScalarType.Intrinsic.INTEGER
522-
and str(step.value) == "1"):
523-
return True
524-
return False
527+
and str(step.value) == "1"
528+
):
529+
continue
530+
return False
531+
return True
525532

526533
@property
527534
def indices(self) -> Tuple[Node]:

src/psyclone/psyir/nodes/reference.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,13 @@ def escapes_scope(
300300

301301
# Check if this instance is in the provided scope
302302
if not self.is_descendant_of(scope):
303+
# If the next_access is an array access that does not cover all
304+
# elements of the array, therefore, it has escaped the scope
305+
# because some array elements will still have the scope values.
306+
# pylint: disable=import-outside-toplevel
307+
from psyclone.psyir.nodes.array_mixin import ArrayMixin
308+
if isinstance(self, ArrayMixin):
309+
return not self.is_full_range()
303310
# If following the recursive calls through next_accesses()
304311
# it reaches a point outside the scope, return True (
305312
# it has escaped the scope), unless this is a write-only

src/psyclone/psyir/transformations/parallel_loop_trans.py

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
from psyclone.domain.common.psylayer import PSyLoop
5050
from psyclone.psyir import nodes
5151
from psyclone.psyir.nodes import (
52-
Call, Loop, Reference, Routine,
52+
Call, Loop, Reference, Routine, Assignment, IfBlock,
5353
BinaryOperation, IntrinsicCall
5454
)
5555
from psyclone.psyir.tools import (
@@ -124,15 +124,50 @@ def _attempt_privatisation(loop, symbol_name, dry_run=False):
124124
if sym in loop.explicitly_private_symbols:
125125
return True
126126

127-
# Check that the symbol is not referenced following this loop (before
128-
# the loop is fine because we can use OpenMP/OpenACC first-private or
129-
# Fortran do concurrent local_init())
130-
refs_in_loop = filter(lambda x: x.symbol is sym, loop.walk(Reference))
131-
last_access = list(refs_in_loop)[-1]
132127
loop.compute_cached_abs_positions()
128+
129+
# Get the last access
130+
refs_in_loop = filter(lambda x: x.symbol is sym, loop.walk(Reference))
131+
refs_in_loop = list(refs_in_loop)
132+
last_access = refs_in_loop[-1]
133+
# If it's an assignment the last access is the one in the lhs
134+
if last_access.ancestor(Assignment):
135+
lhs = last_access.ancestor(Assignment).lhs
136+
if isinstance(lhs, Reference) and lhs.symbol is sym:
137+
last_access = lhs
138+
# If the value of the symbol is used after the loop, it cannot be
139+
# private
133140
if last_access.escapes_scope(loop):
134141
return False
135142

143+
# Also prevent cases when the first access in the loop is a read that
144+
# could come from the previous iteration
145+
while True:
146+
first_access = refs_in_loop[0]
147+
# If it's an assignment the first access is the one in the rhs
148+
if first_access.ancestor(Assignment):
149+
rhs = first_access.ancestor(Assignment).rhs
150+
refs = filter(lambda x: x.symbol is sym, rhs.walk(Reference))
151+
refs = list(refs)
152+
if refs:
153+
first_access = refs[0]
154+
if first_access.is_read:
155+
return False
156+
# If it is inside a conditional, there may be more entry points for
157+
# this symbol, so we look for the next 'first_access'
158+
inside_conditional = first_access.ancestor(IfBlock, limit=loop)
159+
if inside_conditional:
160+
following = inside_conditional.following_node()
161+
if not following:
162+
break
163+
# Skip al references in the condition that we already checked
164+
refs_in_loop = list(filter(
165+
lambda x: x.abs_position > following.abs_position,
166+
refs_in_loop
167+
))
168+
if not inside_conditional or not refs_in_loop:
169+
break
170+
136171
if not dry_run:
137172
loop.explicitly_private_symbols.add(sym)
138173

@@ -281,9 +316,10 @@ def validate(self, node, options=None, **kwargs):
281316
dry_run=True):
282317
errors.append(
283318
f"The write-write dependency in '{var_name}'"
284-
f" cannot be solved by array privatisation "
285-
f"because it is not a plain local array or "
286-
f"it is used after the loop.")
319+
f" cannot be solved by automatic array "
320+
f"privatisation. Use 'loop.explictly_private"
321+
f"_sybmols.add(sybmol)' if *YOU* can guarantee"
322+
f" that it is private.")
287323
continue
288324
# See if the scalar in question allows parallelisation of
289325
# the loop using reduction clauses.

0 commit comments

Comments
 (0)