Skip to content

Commit 26d5f4c

Browse files
author
Vasileios Karakasis
authored
Merge pull request #2083 from jjotero/feat/perf-syntax
[feat] New syntax for performance checking
2 parents 7364e80 + 06813e9 commit 26d5f4c

File tree

19 files changed

+834
-188
lines changed

19 files changed

+834
-188
lines changed

cscs-checks/microbenchmarks/gpu/gpu_burn/gpu_burn_test.py

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
#
44
# SPDX-License-Identifier: BSD-3-Clause
55

6-
import os
76

87
import reframe as rfm
98
import reframe.utility.sanity as sn
9+
import reframe.utility.osext as osext
10+
from reframe.core.exceptions import SanityError
1011

1112
from hpctestlib.microbenchmarks.gpu.gpu_burn import GpuBurn
1213
import cscstests.microbenchmarks.gpu.hooks as hooks
@@ -24,30 +25,29 @@ class gpu_burn_check(GpuBurn):
2425
num_tasks = 0
2526
reference = {
2627
'dom:gpu': {
27-
'perf': (4115, -0.10, None, 'Gflop/s'),
28+
'min_perf': (4115, -0.10, None, 'Gflop/s'),
2829
},
2930
'daint:gpu': {
30-
'perf': (4115, -0.10, None, 'Gflop/s'),
31+
'min_perf': (4115, -0.10, None, 'Gflop/s'),
3132
},
3233
'arolla:cn': {
33-
'perf': (5861, -0.10, None, 'Gflop/s'),
34+
'min_perf': (5861, -0.10, None, 'Gflop/s'),
3435
},
3536
'tsa:cn': {
36-
'perf': (5861, -0.10, None, 'Gflop/s'),
37+
'min_perf': (5861, -0.10, None, 'Gflop/s'),
3738
},
3839
'ault:amda100': {
39-
'perf': (15000, -0.10, None, 'Gflop/s'),
40+
'min_perf': (15000, -0.10, None, 'Gflop/s'),
4041
},
4142
'ault:amdv100': {
42-
'perf': (5500, -0.10, None, 'Gflop/s'),
43+
'min_perf': (5500, -0.10, None, 'Gflop/s'),
4344
},
4445
'ault:intelv100': {
45-
'perf': (5500, -0.10, None, 'Gflop/s'),
46+
'min_perf': (5500, -0.10, None, 'Gflop/s'),
4647
},
4748
'ault:amdvega': {
48-
'perf': (3450, -0.10, None, 'Gflop/s'),
49+
'min_perf': (3450, -0.10, None, 'Gflop/s'),
4950
},
50-
'*': {'temp': (0, None, None, 'degC')}
5151
}
5252

5353
maintainers = ['AJ', 'TM']
@@ -63,16 +63,25 @@ def set_num_gpus_per_node(self):
6363
hooks.set_num_gpus_per_node(self)
6464

6565
@run_before('performance')
66-
def report_nid_with_smallest_flops(self):
67-
regex = r'\[(\S+)\] GPU\s+\d\(OK\): (\d+) GF/s'
68-
rptf = os.path.join(self.stagedir, sn.evaluate(self.stdout))
69-
self.nids = sn.extractall(regex, rptf, 1)
70-
self.flops = sn.extractall(regex, rptf, 2, float)
66+
def report_slow_nodes(self):
67+
'''Report the base perf metrics and also all the slow nodes.'''
68+
69+
# Only report the nodes that don't meet the perf reference
70+
with osext.change_dir(self.stagedir):
71+
key = f'{self.current_partition.fullname}:min_perf'
72+
if key in self.reference:
73+
regex = r'\[(\S+)\] GPU\s+\d\(OK\): (\d+) GF/s'
74+
nids = set(sn.extractall(regex, self.stdout, 1))
75+
76+
# Get the references
77+
ref, lt, ut, *_ = self.reference[key]
78+
79+
# Flag the slow nodes
80+
for nid in nids:
81+
try:
82+
node_perf = self.min_perf(nid)
83+
val = node_perf.evaluate(cache=True)
84+
sn.assert_reference(val, ref, lt, ut).evaluate()
85+
except SanityError:
86+
self.perf_variables[nid] = node_perf
7187

72-
# Find index of smallest flops and update reference dictionary to
73-
# include our patched units
74-
index = self.flops.evaluate().index(min(self.flops))
75-
unit = f'GF/s ({self.nids[index]})'
76-
for key, ref in self.reference.items():
77-
if not key.endswith(':temp'):
78-
self.reference[key] = (*ref[:3], unit)

cscs-checks/system/jobreport/gpu_report.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,12 @@ def gpu_usage_sanity(self):
9393
sn.assert_ge(sn.min(time_reported), self.burn_time)
9494
])
9595

96+
@performance_function('nodes')
97+
def total_nodes_reported(self):
98+
return sn.count(self.nodes_reported)
99+
96100
@run_before('performance')
97-
def set_perf_patterns(self):
101+
def set_perf_variables(self):
98102
'''The number of reported nodes can be used as a perf metric.
99103
100104
For now, the low limit can go to zero, but this can be set to a more
@@ -103,9 +107,9 @@ def set_perf_patterns(self):
103107

104108
self.reference = {
105109
'*': {
106-
'nodes_reported': (self.num_tasks, self.perf_floor, 0, 'nodes')
110+
'nodes_reported': (self.num_tasks, self.perf_floor, 0)
107111
},
108112
}
109-
self.perf_patterns = {
110-
'nodes_reported': sn.count(self.nodes_reported)
113+
self.perf_variables = {
114+
'nodes_reported': self.total_nodes_reported()
111115
}

docs/deferrable_functions_reference.rst

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,20 @@ Deferrable Functions Reference
77
*Deferrable functions* are the functions whose execution may be postponed to a later time after they are called.
88
The key characteristic of these functions is that they store their arguments when they are called, and the execution itself does not occur until the function is evaluated either explicitly or implicitly.
99

10+
ReFrame provides an ample set of deferrable utilities and it also allows users to write their own deferrable functions when needed.
11+
Please refer to ":doc:`deferrables`" for a hands-on explanation on how deferrable functions work and how to create custom deferrable functions.
12+
13+
1014
Explicit evaluation of deferrable functions
1115
-------------------------------------------
1216

1317
Deferrable functions may be evaluated at any time by calling :func:`evaluate` on their return value or by passing the deferred function itself to the :func:`~reframe.utility.sanity.evaluate()` free function.
18+
These :func:`evaluate` functions take an optional :class:`bool` argument ``cache``, which can be used to cache the evaluation of the deferrable function.
19+
Hence, if caching is enabled on a given deferrable function, any subsequent calls to :func:`evaluate` will simply return the previously cached results.
20+
21+
.. versionchanged:: 3.8.0
22+
Support of cached evaluation is added.
23+
1424

1525
Implicit evaluation of deferrable functions
1626
-------------------------------------------
@@ -48,9 +58,24 @@ Currently ReFrame provides three broad categories of deferrable functions:
4858
They include, but are not limited to, functions to iterate over regex matches in a file, extracting and converting values from regex matches, computing statistical information on series of data etc.
4959

5060

51-
Users can write their own deferrable functions as well.
52-
The page ":doc:`deferrables`" explains in detail how deferrable functions work and how users can write their own.
61+
.. _deferrable-performance-functions:
62+
63+
64+
--------------------------------
65+
Deferrable performance functions
66+
--------------------------------
67+
68+
.. versionadded:: 3.8.0
69+
70+
Deferrable performance functions are a special type of deferrable functions which are intended for measuring a given quantity.
71+
Therefore, this kind of deferrable functions have an associated unit that can be used to interpret the return values from these functions.
72+
The unit of a deferrable performance function can be accessed through the public member :attr:`unit`.
73+
Regular deferrable functions can be promoted to deferrable performance functions using the :func:`~reframe.utility.sanity.make_performance_function` utility.
74+
Also, this utility allows to create performance functions directly from any callable.
75+
5376

77+
List of deferrable functions and utilities
78+
------------------------------------------
5479

5580
.. py:decorator:: reframe.utility.sanity.deferrable(func)
5681

docs/regression_test_api.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,21 @@ Built-in functions
322322

323323
.. versionadded:: 3.7.0
324324

325+
.. py:decorator:: RegressionMixin.performance_function(unit, *, perf_key=None)
326+
327+
Decorate a member function as a performance function of the test.
328+
329+
This decorator converts the decorated method into a performance deferrable function (see ":ref:`deferrable-performance-functions`" for more details) whose evaluation is deferred to the performance stage of the regression test.
330+
The decorated function must take a single argument without a default value (i.e. ``self``) and any number of arguments with default values.
331+
A test may decorate multiple member functions as performance functions, where each of the decorated functions must be provided with the units of the performance quantitites to be extracted from the test.
332+
These performance units must be of type :class:`str`.
333+
Any performance function may be overridden in a derived class and multiple bases may define their own performance functions.
334+
In the event of a name conflict, the derived class will follow Python's `MRO <https://docs.python.org/3/library/stdtypes.html#class.__mro__>`_ to choose the appropriate performance function.
335+
However, defining more than one performance function with the same name in the same class is disallowed.
336+
337+
The full set of performance functions of a regression test is stored under :attr:`~reframe.core.pipeline.RegressionTest.perf_variables` as key-value pairs, where, by default, the key is the name of the decorated member function, and the value is the deferred performance function itself.
338+
Optionally, the key under which a performance function is stored in :attr:`~reframe.core.pipeline.RegressionTest.perf_variables` can be customised by passing the desired key as the ``perf_key`` argument to this decorator.
339+
325340
.. py:decorator:: RegressionMixin.deferrable(func)
326341
327342
Converts the decorated method into a deferrable function.

hpctestlib/microbenchmarks/gpu/gpu_burn/__init__.py

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,6 @@ class GpuBurn(rfm.RegressionTest, pin_prefix=True):
5151
build_system = 'Make'
5252
executable = './gpu_burn.x'
5353
num_tasks_per_node = 1
54-
reference = {
55-
'*': {
56-
'perf': (0, None, None, 'Gflop/s'),
57-
'temp': (0, None, None, 'degC')
58-
}
59-
}
6054

6155
@run_before('compile')
6256
def set_gpu_build(self):
@@ -83,7 +77,6 @@ def set_gpu_build(self):
8377
raise ValueError('unknown gpu_build option')
8478

8579
@property
86-
@deferrable
8780
def num_tasks_assigned(self):
8881
'''Total number of times the gpu burn will run.
8982
@@ -103,17 +96,27 @@ def count_successful_burns(self):
10396
r'^\s*\[[^\]]*\]\s*GPU\s*\d+\(OK\)', self.stdout)
10497
), self.num_tasks_assigned)
10598

106-
@run_before('performance')
107-
def set_perf_patterns(self):
108-
'''Extract the minimum performance and maximum temperature recorded.
99+
def _extract_perf_metric(self, metric, nid=None):
100+
'''Utility to extract performance metrics.'''
109101

110-
The performance and temperature data are reported in Gflops/s and
111-
deg. Celsius respectively.
112-
'''
102+
if metric not in {'perf', 'temp'}:
103+
raise ValueError(
104+
f"unsupported value in 'metric' argument: {metric!r}"
105+
)
106+
107+
if nid is None:
108+
nid = r'[^\]]*'
109+
110+
patt = (rf'^\s*\[{nid}\]\s*GPU\s+\d+\(\S*\):\s+(?P<perf>\S*)\s+GF\/s'
111+
rf'\s+(?P<temp>\S*)\s+Celsius')
112+
return sn.extractall(patt, self.stdout, metric, float)
113+
114+
@performance_function('Gflop/s')
115+
def min_perf(self, nid=None):
116+
'''Lowest performance recorded.'''
117+
return sn.min(self._extract_perf_metric('perf', nid))
113118

114-
patt = (r'^\s*\[[^\]]*\]\s*GPU\s+\d+\(\S*\):\s+(?P<perf>\S*)\s+GF\/s'
115-
r'\s+(?P<temp>\S*)\s+Celsius')
116-
self.perf_patterns = {
117-
'perf': sn.min(sn.extractall(patt, self.stdout, 'perf', float)),
118-
'temp': sn.max(sn.extractall(patt, self.stdout, 'temp', float)),
119-
}
119+
@performance_function('degC')
120+
def max_temp(self, nid=None):
121+
'''Maximum temperature recorded.'''
122+
return sn.max(self._extract_perf_metric('temp', nid))

reframe/core/deferrable.py

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,31 +44,36 @@ def __init__(self, fn, *args, **kwargs):
4444

4545
# We cache the value of the last evaluation inside a tuple.
4646
# We don't cache the value directly, because it can be any.
47-
48-
# NOTE: The cache for the moment is only used by
49-
# `__rfm_json_encode__`. Enabling caching in the evaluation is a
50-
# reasonable optimization, but might break compatibility, so it needs
51-
# to be thought thoroughly and communicated properly in the
52-
# documentation.
5347
self._cached = ()
48+
self._return_cached = False
49+
50+
def evaluate(self, cache=False):
51+
# Return the cached value (if any)
52+
if self._return_cached and not cache:
53+
return self._cached[0]
54+
elif cache:
55+
self._return_cached = cache
5456

55-
def evaluate(self):
5657
fn_args = []
5758
for arg in self._args:
5859
fn_args.append(
59-
arg.evaluate() if isinstance(arg, type(self)) else arg
60+
arg.evaluate() if isinstance(arg, _DeferredExpression) else arg
6061
)
6162

6263
fn_kwargs = {}
6364
for k, v in self._kwargs.items():
6465
fn_kwargs[k] = (
65-
v.evaluate() if isinstance(v, type(self)) else v
66+
v.evaluate() if isinstance(v, _DeferredExpression) else v
6667
)
6768

6869
ret = self._fn(*fn_args, **fn_kwargs)
69-
if isinstance(ret, type(self)):
70+
71+
# Evaluate the return for as long as a deferred expression returns
72+
# another deferred expression.
73+
while isinstance(ret, _DeferredExpression):
7074
ret = ret.evaluate()
7175

76+
# Cache the results for any subsequent evaluate calls.
7277
self._cached = (ret,)
7378
return ret
7479

@@ -355,3 +360,34 @@ def __abs__(a):
355360
@deferrable
356361
def __invert__(a):
357362
return ~a
363+
364+
365+
class _DeferredPerformanceExpression(_DeferredExpression):
366+
'''Represents a performance function whose evaluation has been deferred.
367+
368+
It extends the :class:`_DeferredExpression` class by adding the ``unit``
369+
attribute. This attribute represents the unit of the performance
370+
metric to be extracted by the performance function.
371+
'''
372+
373+
def __init__(self, fn, unit, *args, **kwargs):
374+
super().__init__(fn, *args, **kwargs)
375+
376+
if not isinstance(unit, str):
377+
raise TypeError(
378+
'performance units must be a string'
379+
)
380+
381+
self._unit = unit
382+
383+
@classmethod
384+
def construct_from_deferred_expr(cls, expr, unit):
385+
if not isinstance(expr, _DeferredExpression):
386+
raise TypeError("'expr' argument is not an instance of the "
387+
"_DeferredExpression class")
388+
389+
return cls(expr._fn, unit, *(expr._args), **(expr._kwargs))
390+
391+
@property
392+
def unit(self):
393+
return self._unit

reframe/core/hooks.py

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -128,25 +128,6 @@ def __repr__(self):
128128
class HookRegistry:
129129
'''Global hook registry.'''
130130

131-
@classmethod
132-
def create(cls, namespace):
133-
'''Create a hook registry from a class namespace.
134-
135-
Hook functions have an `_rfm_attach` attribute that specify the stages
136-
of the pipeline where they must be attached. Dependencies will be
137-
resolved first in the post-setup phase if not assigned elsewhere.
138-
'''
139-
140-
local_hooks = util.OrderedSet()
141-
for v in namespace.values():
142-
if hasattr(v, '_rfm_attach'):
143-
local_hooks.add(Hook(v))
144-
elif hasattr(v, '_rfm_resolve_deps'):
145-
v._rfm_attach = ['post_setup']
146-
local_hooks.add(Hook(v))
147-
148-
return cls(local_hooks)
149-
150131
def __init__(self, hooks=None):
151132
self.__hooks = util.OrderedSet()
152133
if hooks is not None:
@@ -161,6 +142,20 @@ def __getattr__(self, name):
161142
def __iter__(self):
162143
return iter(self.__hooks)
163144

145+
def add(self, v):
146+
'''Add value to the hook registry if it meets the conditions.
147+
148+
Hook functions have an `_rfm_attach` attribute that specify the stages
149+
of the pipeline where they must be attached. Dependencies will be
150+
resolved first in the post-setup phase if not assigned elsewhere.
151+
'''
152+
153+
if hasattr(v, '_rfm_attach'):
154+
self.__hooks.add(Hook(v))
155+
elif hasattr(v, '_rfm_resolve_deps'):
156+
v._rfm_attach = ['post_setup']
157+
self.__hooks.add(Hook(v))
158+
164159
def update(self, hooks, *, denied_hooks=None):
165160
'''Update the hook registry with the hooks from another hook registry.
166161

0 commit comments

Comments
 (0)