Skip to content

Commit 6e1f162

Browse files
authored
Merge pull request #2839 from vkarak/feat/stress-testing
[feat] Add support for repeated testing through two new options: `--reruns` and `--duration`
2 parents 767b86d + 7c9449e commit 6e1f162

File tree

10 files changed

+324
-96
lines changed

10 files changed

+324
-96
lines changed

docs/manpage.rst

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,20 @@ Options controlling ReFrame execution
459459
.. versionadded:: 3.11.0
460460

461461

462+
.. option:: --duration=TIMEOUT
463+
464+
Run the test session repeatedly until the specified timeout expires.
465+
466+
``TIMEOUT`` can be specified in one of the following forms:
467+
468+
- ``<int>`` or ``<float>``: number of seconds
469+
- ``<days>d<hours>h<minutes>m<seconds>s``: a string denoting days, hours, minutes and/or seconds.
470+
471+
At the end, failures from every run will be reported and, similarly, the failure statistics printed by the :option:`--failure-stats` option will include all runs.
472+
473+
.. versionadded:: 4.2
474+
475+
462476
.. option:: --exec-order=ORDER
463477

464478
Impose an execution order for the independent tests.
@@ -531,6 +545,24 @@ Options controlling ReFrame execution
531545

532546
.. versionadded:: 3.12.0
533547

548+
549+
.. option:: --reruns=N
550+
551+
Rerun the whole test session ``N`` times.
552+
553+
In total, the selected tests will run ``N+1`` times as the first time does not count as a rerun.
554+
555+
At the end, failures from every run will be reported and, similarly, the failure statistics printed by the :option:`--failure-stats` option will include all runs.
556+
557+
Although similar to :option:`--repeat`, this option behaves differently.
558+
This option repeats the *whole* test session multiple times.
559+
All the tests of the session will finish before a new run is started.
560+
The :option:`--repeat` option on the other hand generates clones of the selected tests and schedules them for running in a single session.
561+
As a result, all the test clones will run (by default) concurrently.
562+
563+
.. versionadded:: 4.2
564+
565+
534566
.. option:: --restore-session [REPORT1[,REPORT2,...]]
535567

536568
Restore a testing session that has run previously.

reframe/core/exceptions.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,10 @@ class FailureLimitError(ReframeError):
101101
'''Raised when the limit of test failures has been reached.'''
102102

103103

104+
class RunSessionTimeout(ReframeError):
105+
'''Raised when the maximum duration for a test session expires.'''
106+
107+
104108
class AbortTaskError(ReframeError):
105109
'''Raised by the runtime inside a regression task to denote that it has
106110
been aborted due to an external reason (e.g., keyboard interrupt, fatal
@@ -355,6 +359,12 @@ def is_severe(exc_type, exc_value, tb):
355359
return not is_user_error(exc_type, exc_value, tb)
356360

357361

362+
def is_warning(exc_type, exc_value, tb):
363+
'''Check whether this exception can be treated as warning'''
364+
365+
return isinstance(exc_value, (RunSessionTimeout, KeyboardInterrupt))
366+
367+
358368
def what(exc_type, exc_value, tb):
359369
'''A short description of the error.'''
360370

reframe/core/schedulers/local.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def _kill_all(self, job):
9898
except (ProcessLookupError, PermissionError):
9999
# The process group may already be dead or assigned to a different
100100
# group, so ignore this error
101-
self.log(f'pid {job.jobid} already dead or assigned elsewhere')
101+
self.log(f'pid {job.jobid} already dead')
102102
finally:
103103
# Close file handles
104104
job.f_stdout.close()
@@ -107,8 +107,15 @@ def _kill_all(self, job):
107107

108108
def _term_all(self, job):
109109
'''Send SIGTERM to all the processes of the spawned job.'''
110-
os.killpg(job.jobid, signal.SIGTERM)
111-
job._signal = signal.SIGTERM
110+
try:
111+
os.killpg(job.jobid, signal.SIGTERM)
112+
job._signal = signal.SIGTERM
113+
except (ProcessLookupError, PermissionError):
114+
# Job has finished already, close file handles
115+
self.log(f'pid {job.jobid} already dead')
116+
job.f_stdout.close()
117+
job.f_stderr.close()
118+
job._state = 'FAILURE'
112119

113120
def cancel(self, job):
114121
'''Cancel job.

reframe/frontend/cli.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import reframe.utility.osext as osext
3232
import reframe.utility.typecheck as typ
3333

34-
3534
from reframe.frontend.testgenerators import (distribute_tests,
3635
getallnodes, repeat_tests)
3736
from reframe.frontend.executors.policies import (SerialExecutionPolicy,
@@ -403,6 +402,10 @@ def main():
403402
help=('Distribute the selected single-node jobs on every node that'
404403
'is in STATE (default: "idle"')
405404
)
405+
run_options.add_argument(
406+
'--duration', action='store', metavar='TIMEOUT',
407+
help='Run the test session repeatedly for the specified duration'
408+
)
406409
run_options.add_argument(
407410
'--exec-order', metavar='ORDER', action='store',
408411
choices=['name', 'random', 'rname', 'ruid', 'uid'],
@@ -439,6 +442,10 @@ def main():
439442
'--repeat', action='store', metavar='N',
440443
help='Repeat selected tests N times'
441444
)
445+
run_options.add_argument(
446+
'--reruns', action='store', metavar='N', default=0,
447+
help='Rerun the whole test session N times', type=int
448+
)
442449
run_options.add_argument(
443450
'--restore-session', action='store', nargs='?', const='',
444451
metavar='REPORT',
@@ -1300,11 +1307,21 @@ def module_unuse(*paths):
13001307
if options.maxfail < 0:
13011308
raise errors.CommandLineError(
13021309
f'--maxfail should be a non-negative integer: '
1303-
f'{options.maxfail!r}'
1310+
f'{options.maxfail}'
1311+
)
1312+
1313+
if options.reruns and options.duration:
1314+
raise errors.CommandLineError(
1315+
f"'--reruns' option cannot be combined with '--duration'"
1316+
)
1317+
1318+
if options.reruns < 0:
1319+
raise errors.CommandLineError(
1320+
f"'--reruns' should be a non-negative integer: {options.reruns}"
13041321
)
13051322

13061323
runner = Runner(exec_policy, printer, options.max_retries,
1307-
options.maxfail)
1324+
options.maxfail, options.reruns, options.duration)
13081325
try:
13091326
time_start = time.time()
13101327
session_info['time_start'] = time.strftime(
@@ -1319,18 +1336,21 @@ def module_unuse(*paths):
13191336
session_info['time_elapsed'] = time_end - time_start
13201337

13211338
# Print a retry report if we did any retries
1322-
if runner.stats.failed(run=0):
1339+
if options.max_retries and runner.stats.failed(run=0):
13231340
printer.info(runner.stats.retry_report())
13241341

13251342
# Print a failure report if we had failures in the last run
13261343
success = True
13271344
if runner.stats.failed():
13281345
success = False
13291346
runner.stats.print_failure_report(
1330-
printer, not options.distribute
1347+
printer, not options.distribute,
1348+
options.duration or options.reruns
13311349
)
13321350
if options.failure_stats:
1333-
runner.stats.print_failure_stats(printer)
1351+
runner.stats.print_failure_stats(
1352+
printer, options.duration or options.reruns
1353+
)
13341354

13351355
if options.performance_report:
13361356
printer.info(runner.stats.performance_report())
@@ -1412,7 +1432,12 @@ def module_unuse(*paths):
14121432
except (Exception, KeyboardInterrupt, errors.ReframeFatalError):
14131433
exc_info = sys.exc_info()
14141434
tb = ''.join(traceback.format_exception(*exc_info))
1415-
printer.error(f'run session stopped: {errors.what(*exc_info)}')
1435+
message = f'run session stopped: {errors.what(*exc_info)}'
1436+
if errors.is_warning(*exc_info):
1437+
printer.warning(message)
1438+
else:
1439+
printer.error(message)
1440+
14161441
if errors.is_exit_request(*exc_info):
14171442
# Print stack traces for exit requests only when TOO verbose
14181443
printer.debug2(tb)

0 commit comments

Comments
 (0)