Skip to content

Commit 1adb384

Browse files
author
Vasileios Karakasis
authored
Merge branch 'master' into ci-cli-options
2 parents 6124e80 + 0a6decc commit 1adb384

File tree

7 files changed

+78
-62
lines changed

7 files changed

+78
-62
lines changed

cscs-checks/system/jobreport/gpu_report.py

Lines changed: 18 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
import time
99

1010
from reframe.core.exceptions import SanityError
11-
from hpctestlib.microbenchmarks.gpu.gpu_burn import GpuBurn
11+
from hpctestlib.microbenchmarks.gpu.gpu_burn import gpu_burn_check
1212

1313

1414
@rfm.simple_test
15-
class gpu_usage_report_check(GpuBurn):
15+
class gpu_usage_report_check(gpu_burn_check):
1616
'''Check the output from the job report.
1717
1818
This check uses the gpu burn app and checks that the job report produces
@@ -23,14 +23,10 @@ class gpu_usage_report_check(GpuBurn):
2323
'''
2424

2525
valid_systems = ['daint:gpu', 'dom:gpu']
26-
valid_prog_environs = ['PrgEnv-gnu']
2726
descr = 'Check GPU usage from job report'
2827
gpu_build = 'cuda'
29-
modules = ['craype-accel-nvidia60', 'cdt-cuda']
30-
num_tasks = 0
28+
num_tasks = 2
3129
num_gpus_per_node = 1
32-
burn_time = variable(int, value=10)
33-
executable_opts = ['-d', f'{burn_time}']
3430
perf_floor = variable(float, value=-0.2)
3531
tags = {'production'}
3632

@@ -44,7 +40,7 @@ def set_launcher_opts(self):
4440
self.job.launcher.options = ['-u']
4541

4642
@sanity_function
47-
def set_sanity_patterns(self):
43+
def assert_jobreport_success(self):
4844
'''Extend sanity and wait for the jobreport.
4945
5046
If a large number of nodes is used, the final jobreport output happens
@@ -59,57 +55,35 @@ def set_sanity_patterns(self):
5955
except SanityError:
6056
time.sleep(25)
6157

62-
return sn.all([
63-
self.count_successful_burns(), self.gpu_usage_sanity()
64-
])
58+
return self.assert_successful_burn_count(), self.gpu_usage_sanity()
6559

6660
@deferrable
6761
def gpu_usage_sanity(self):
6862
'''Verify that the jobreport output has sensible numbers.
6963
70-
This function asserts that the nodes reported are at least a subset of
71-
all nodes used by the gpu burn app. Also, the GPU usage is verified by
72-
assuming that in the worst case scenario, the usage is near 100% during
73-
the burn, and 0% outside the burn period. Lastly, the GPU usage time
74-
for each node is also asserted to be greater or equal than the burn
75-
time.
64+
The GPU usage is verified by assuming that in the worst case scenario,
65+
the usage is near 100% during the burn, and 0% outside the burn period.
66+
Lastly, the GPU usage time for each node is also asserted to be greater
67+
or equal than the burn time.
7668
'''
7769

78-
# Get set with all nodes
79-
patt = r'^\s*\[([^\]]*)\]\s*GPU\s*\d+\(OK\)'
80-
full_node_set = set(sn.extractall(patt, self.stdout, 1))
81-
8270
# Parse job report data
8371
patt = r'^\s*(\w*)\s*(\d+)\s*%\s*\d+\s*MiB\s*\d+:\d+:(\d+)'
8472
self.nodes_reported = sn.extractall(patt, self.stdout, 1)
73+
self.num_tasks_assigned = self.num_tasks * self.num_gpus_per_node
8574
usage = sn.extractall(patt, self.stdout, 2, int)
8675
time_reported = sn.extractall(patt, self.stdout, 3, int)
8776
return sn.all([
8877
sn.assert_ge(sn.count(self.nodes_reported), 1),
89-
set(self.nodes_reported).issubset(full_node_set),
9078
sn.all(
91-
map(lambda x, y: self.burn_time/x <= y, time_reported, usage)
79+
map(lambda x, y: self.duration/x <= y/100, time_reported, usage)
9280
),
93-
sn.assert_ge(sn.min(time_reported), self.burn_time)
81+
sn.assert_ge(sn.min(time_reported), self.duration)
9482
])
9583

96-
@performance_function('nodes')
97-
def total_nodes_reported(self):
98-
return sn.count(self.nodes_reported)
99-
100-
@run_before('performance')
101-
def set_perf_variables(self):
102-
'''The number of reported nodes can be used as a perf metric.
103-
104-
For now, the low limit can go to zero, but this can be set to a more
105-
restrictive value.
106-
'''
107-
108-
self.reference = {
109-
'*': {
110-
'nodes_reported': (self.num_tasks, self.perf_floor, 0)
111-
},
112-
}
113-
self.perf_variables = {
114-
'nodes_reported': self.total_nodes_reported()
115-
}
84+
@deferrable
85+
def assert_successful_burn_count(self):
86+
'''Assert that the expected successful burn count is reported.'''
87+
return sn.assert_eq(sn.count(sn.findall(r'^GPU\s*\d+\(OK\)',
88+
self.stdout)),
89+
self.num_tasks_assigned)

docs/manpage.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,10 @@ Options controlling ReFrame execution
484484

485485
Set variable ``VAR`` in all tests or optionally only in test ``TEST`` to ``VAL``.
486486

487+
``TEST`` can have the form ``[TEST.][FIXT.]*``, in which case ``VAR`` will be set in fixture ``FIXT`` of ``TEST``.
488+
Note that this syntax is recursive on fixtures, so that a variable can be set in a fixture arbitrarily deep.
489+
``TEST`` prefix refers to the test class name, *not* the test name, but ``FIXT`` refers to the fixture name *inside* the referenced test.
490+
487491
Multiple variables can be set at the same time by passing this option multiple times.
488492
This option *cannot* change arbitrary test attributes, but only test variables declared with the :attr:`~reframe.core.pipeline.RegressionMixin.variable` built-in.
489493
If an attempt is made to change an inexistent variable or a test parameter, a warning will be issued.
@@ -511,8 +515,6 @@ Options controlling ReFrame execution
511515
Conversions to arbitrary objects are also supported.
512516
See :class:`~reframe.utility.typecheck.ConvertibleType` for more details.
513517

514-
The optional ``TEST.`` prefix refers to the test class name, *not* the test name.
515-
516518
Variable assignments passed from the command line happen *before* the test is instantiated and is the exact equivalent of assigning a new value to the variable *at the end* of the test class body.
517519
This has a number of implications that users of this feature should be aware of:
518520

@@ -561,6 +563,10 @@ Options controlling ReFrame execution
561563

562564
Proper handling of boolean variables.
563565

566+
.. versionchanged:: 3.11.1
567+
568+
Allow setting variables in fixtures.
569+
564570

565571
.. option:: --skip-performance-check
566572

reframe/core/meta.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,17 @@ def setvar(cls, name, value):
535535
536536
'''
537537

538+
if '.' in name:
539+
# `name` refers to a fixture variable
540+
fixtname, varname = name.split('.', maxsplit=1)
541+
try:
542+
fixt_space = super().__getattribute__('_rfm_fixture_space')
543+
except AttributeError:
544+
'''Catch early access attempt to the variable space.'''
545+
546+
if fixtname in fixt_space:
547+
return fixt_space[fixtname].cls.setvar(varname, value)
548+
538549
try:
539550
var_space = super().__getattribute__('_rfm_var_space')
540551
if name in var_space:

reframe/core/schedulers/pbs.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,13 @@ def _update_nodelist(self, job, nodespec):
181181
job._nodelist.sort()
182182

183183
def poll(self, *jobs):
184+
def output_ready(job):
185+
# We report a job as finished only when its stdout/stderr are
186+
# written back to the working directory
187+
stdout = os.path.join(job.workdir, job.stdout)
188+
stderr = os.path.join(job.workdir, job.stderr)
189+
return os.path.exists(stdout) and os.path.exists(stderr)
190+
184191
if jobs:
185192
# Filter out non-jobs
186193
jobs = [job for job in jobs if job is not None]
@@ -198,11 +205,12 @@ def poll(self, *jobs):
198205
# Otherwise, it will return with return code 0 and print information
199206
# only for the jobs it could find.
200207
if completed.returncode in (153, 35):
201-
self.log(f'Return code is {completed.returncode}: '
202-
f'assuming all jobs completed')
208+
self.log(f'Return code is {completed.returncode}')
203209
for job in jobs:
204210
job._state = 'COMPLETED'
205-
job._completed = True
211+
if job.cancelled or output_ready(job):
212+
self.log(f'Assuming job {job.jobid} completed')
213+
job._completed = True
206214

207215
return
208216

@@ -224,10 +232,12 @@ def poll(self, *jobs):
224232

225233
for job in jobs:
226234
if job.jobid not in jobinfo:
227-
self.log(f'Job {job.jobid} not known to scheduler, '
228-
f'assuming job completed')
235+
self.log(f'Job {job.jobid} not known to scheduler')
229236
job._state = 'COMPLETED'
230-
job._completed = True
237+
if job.cancelled or output_ready(job):
238+
self.log(f'Assuming job {job.jobid} completed')
239+
job._completed = True
240+
231241
continue
232242

233243
info = jobinfo[job.jobid]
@@ -259,10 +269,7 @@ def poll(self, *jobs):
259269

260270
# We report a job as finished only when its stdout/stderr are
261271
# written back to the working directory
262-
stdout = os.path.join(job.workdir, job.stdout)
263-
stderr = os.path.join(job.workdir, job.stderr)
264-
out_ready = os.path.exists(stdout) and os.path.exists(stderr)
265-
done = job.cancelled or out_ready
272+
done = job.cancelled or output_ready(job)
266273
if done:
267274
job._completed = True
268275
elif (job.state in ['QUEUED', 'HELD', 'WAITING'] and

reframe/core/variables.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def __getattr__(self, name):
281281
def _check_is_defined(self):
282282
if not self.is_defined():
283283
raise ReframeSyntaxError(
284-
f'variable {self._name} is not assigned a value'
284+
f'variable {self._name!r} is not assigned a value'
285285
)
286286

287287
def __repr__(self):

unittests/resources/checks_unlisted/externalvars.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,33 @@
33
import reframe.utility.typecheck as typ
44

55

6+
class Bacon(rfm.RunOnlyRegressionTest):
7+
bacon = variable(int, value=-1)
8+
executable = 'echo'
9+
sanity_patterns = sn.assert_true(1)
10+
11+
12+
class Eggs(rfm.RunOnlyRegressionTest):
13+
eggs = fixture(Bacon)
14+
executable = 'echo'
15+
sanity_patterns = sn.assert_true(1)
16+
17+
618
@rfm.simple_test
719
class external_x(rfm.RunOnlyRegressionTest):
820
valid_systems = ['*']
921
valid_prog_environs = ['*']
1022
foo = variable(int, value=1)
1123
ham = variable(typ.Bool, value=False)
24+
spam = fixture(Eggs)
1225
executable = 'echo'
1326

1427
@sanity_function
1528
def assert_foo(self):
1629
return sn.all([
1730
sn.assert_eq(self.foo, 3),
18-
sn.assert_true(self.ham)
31+
sn.assert_true(self.ham),
32+
sn.assert_eq(self.spam.eggs.bacon, 10)
1933
])
2034

2135

unittests/test_cli.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -837,13 +837,17 @@ def test_detect_host_topology_file(run_reframe, tmp_path):
837837
def test_external_vars(run_reframe):
838838
returncode, stdout, stderr = run_reframe(
839839
checkpath=['unittests/resources/checks_unlisted/externalvars.py'],
840-
more_options=['-S', 'external_x.foo=3', '-S', 'external_y.foo=2',
841-
'-S', 'foolist=3,4', '-S', 'bar=@none',
840+
more_options=['-S', 'external_x.foo=3',
842841
'-S', 'external_x.ham=true',
843-
'-S', 'external_y.baz=false']
842+
'-S', 'external_x.spam.eggs.bacon=10',
843+
'-S', 'external_y.foo=2',
844+
'-S', 'external_y.baz=false',
845+
'-S', 'foolist=3,4',
846+
'-S', 'bar=@none']
844847
)
848+
assert 'PASSED' in stdout
849+
assert 'Ran 6/6 test case(s)' in stdout
845850
assert 'Traceback' not in stdout
846-
assert 'Ran 2/2 test case(s)' in stdout
847851
assert 'Traceback' not in stderr
848852
assert returncode == 0
849853

0 commit comments

Comments
 (0)