Skip to content

Commit 2727d8c

Browse files
author
Vasileios Karakasis
authored
Merge pull request #1538 from rsarm/feat/restart-from-jsonreport
[feat] Add ability to restore a test session and rerun selected tests
2 parents 39aa59a + b8eacc4 commit 2727d8c

23 files changed

+812
-233
lines changed

docs/manpage.rst

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ After all tests in the search path have been loaded, they are first filtered by
6262
Any test that is not valid for the current system, it will be filtered out.
6363
The current system is either auto-selected or explicitly specified with the :option:`--system` option.
6464
Tests can be filtered by different attributes and there are specific command line options for achieving this.
65-
65+
A common characteristic of all test filtering options is that if a test is selected, then all its dependencies will be selected, too, regardless if they match the filtering criteria or not.
66+
This happens recursively so that if test ``T1`` depends on ``T2`` and ``T2`` depends on ``T3``, then selecting ``T1`` would also select ``T2`` and ``T3``.
6667

6768
.. option:: -t, --tag=TAG
6869

@@ -116,6 +117,15 @@ Tests can be filtered by different attributes and there are specific command lin
116117
Tests may or may not make use of it.
117118

118119

120+
.. option:: --failed
121+
122+
Select only the failed test cases for a previous run.
123+
This option can only be used in combination with the :option:`--restore-session`.
124+
To rerun the failed cases from the last run, you can use ``reframe --restore-session --failed -r``.
125+
126+
.. versionadded:: 3.4
127+
128+
119129
.. option:: --skip-system-check
120130

121131
Do not filter tests against the selected system.
@@ -196,7 +206,7 @@ Options controlling ReFrame output
196206

197207
This option can also be set using the :envvar:`RFM_STAGE_DIR` environment variable or the :js:attr:`stagedir` system configuration parameter.
198208

199-
.. option:: --timestamp[=TIMEFMT]
209+
.. option:: --timestamp [TIMEFMT]
200210

201211
Append a timestamp to the output and stage directory prefixes.
202212
``TIMEFMT`` can be any valid :manpage:`strftime(3)` time format.
@@ -312,6 +322,25 @@ Options controlling ReFrame execution
312322
.. versionadded:: 3.2
313323

314324

325+
.. option:: --restore-session [REPORT]
326+
327+
Restore a testing session that has run previously.
328+
``REPORT`` is a run report file generated by ReFrame.
329+
If ``REPORT`` is not given, ReFrame will pick the last report file found in the default location of report files (see the :option:`--report-file` option).
330+
If passed alone, this option will simply rerun all the test cases that have run previously based on the report file data.
331+
It is more useful to combine this option with any of the `test filtering <#test-filtering>`__ options, in which case only the selected test cases will be executed.
332+
The difference in test selection process when using this option is that the dependencies of the selected tests will not be selected for execution, as they would normally, but they will be restored.
333+
For example, if test ``T1`` depends on ``T2`` and ``T2`` depends on ``T3``, then running ``reframe -n T1 -r`` would cause both ``T2`` and ``T3`` to run.
334+
However, by doing ``reframe -n T1 --restore-session -r``, only ``T1`` would run and its immediate dependence ``T2`` will be restored.
335+
This is useful when you have deep test dependencies or some of the tests in the dependency chain are very time consuming.
336+
337+
.. note::
338+
In order for a test case to be restored, its stage directory must be present.
339+
This is not a problem when rerunning a failed case, since the stage directories of its dependencies are automatically kept, but if you want to rerun a successful test case, you should make sure to have run with the :option:`--keep-stage-files` option.
340+
341+
.. versionadded:: 3.4
342+
343+
315344
----------------------------------
316345
Options controlling job submission
317346
----------------------------------
@@ -463,7 +492,7 @@ Miscellaneous options
463492

464493
This option can also be set using the :envvar:`RFM_CONFIG_FILE` environment variable.
465494

466-
.. option:: --show-config[=PARAM]
495+
.. option:: --show-config [PARAM]
467496

468497
Show the value of configuration parameter ``PARAM`` as this is defined for the currently selected system and exit.
469498
The parameter value is printed in JSON format.

reframe/core/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ def add_sticky_option(self, option, value):
107107
def remove_sticky_option(self, option):
108108
self._sticky_options.pop(option, None)
109109

110+
def is_sticky_option(self, option):
111+
return option in self._sticky_options
112+
110113
@_normalize_syntax({'.*/.*modules$': normalize_module_list})
111114
def get(self, option, default=None):
112115
'''Retrieve value of option.

reframe/core/environments.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import reframe.core.fields as fields
1010
import reframe.utility as util
11+
import reframe.utility.jsonext as jsonext
1112
import reframe.utility.typecheck as typ
1213

1314

@@ -26,7 +27,7 @@ def normalize_module_list(modules):
2627
return ret
2728

2829

29-
class Environment:
30+
class Environment(jsonext.JSONSerializable):
3031
'''This class abstracts away an environment to run regression tests.
3132
3233
It is simply a collection of modules to be loaded and environment variables

reframe/core/pipeline.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import reframe.core.logging as logging
2727
import reframe.core.runtime as rt
2828
import reframe.utility as util
29+
import reframe.utility.jsonext as jsonext
2930
import reframe.utility.osext as osext
3031
import reframe.utility.sanity as sn
3132
import reframe.utility.typecheck as typ
@@ -125,7 +126,7 @@ def _wrapped(*args, **kwargs):
125126
return _wrapped
126127

127128

128-
class RegressionTest(metaclass=RegressionTestMeta):
129+
class RegressionTest(jsonext.JSONSerializable, metaclass=RegressionTestMeta):
129130
'''Base class for regression tests.
130131
131132
All regression tests must eventually inherit from this class.
@@ -1809,6 +1810,10 @@ def __eq__(self, other):
18091810
def __hash__(self):
18101811
return hash(self.name)
18111812

1813+
def __rfm_json_decode__(self, json):
1814+
# 'tags' are decoded as list, so we convert them to a set
1815+
self.tags = set(json['tags'])
1816+
18121817

18131818
class RunOnlyRegressionTest(RegressionTest, special=True):
18141819
'''Base class for run-only regression tests.

reframe/core/schedulers/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import reframe.core.fields as fields
1414
import reframe.core.runtime as runtime
1515
import reframe.core.shell as shell
16+
import reframe.utility.jsonext as jsonext
1617
import reframe.utility.typecheck as typ
1718
from reframe.core.exceptions import JobError, JobNotStartedError
1819
from reframe.core.launchers import JobLauncher
@@ -111,7 +112,7 @@ def log(self, message, level=DEBUG2):
111112
getlogger().log(level, f'[S] {self.registered_name}: {message}')
112113

113114

114-
class Job:
115+
class Job(jsonext.JSONSerializable):
115116
'''A job descriptor.
116117
117118
A job descriptor is created by the framework after the "setup" phase and

reframe/core/systems.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@
44
# SPDX-License-Identifier: BSD-3-Clause
55

66
import json
7-
import re
87

98
import reframe.utility as utility
9+
import reframe.utility.jsonext as jsonext
1010
from reframe.core.backends import (getlauncher, getscheduler)
1111
from reframe.core.logging import getlogger
1212
from reframe.core.modules import ModulesSystem
1313
from reframe.core.environments import (Environment, ProgEnvironment)
1414

1515

16-
class SystemPartition:
16+
class SystemPartition(jsonext.JSONSerializable):
1717
'''A representation of a system partition inside ReFrame.
1818
1919
.. warning::
@@ -237,7 +237,7 @@ def __str__(self):
237237
return json.dumps(self.json(), indent=2)
238238

239239

240-
class System:
240+
class System(jsonext.JSONSerializable):
241241
'''A representation of a system inside ReFrame.
242242
243243
.. warning::

reframe/frontend/cli.py

Lines changed: 110 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,16 @@
2323
import reframe.frontend.argparse as argparse
2424
import reframe.frontend.dependencies as dependencies
2525
import reframe.frontend.filters as filters
26+
import reframe.frontend.runreport as runreport
2627
import reframe.utility.jsonext as jsonext
2728
import reframe.utility.osext as osext
28-
from reframe.frontend.executors import Runner, generate_testcases
29+
30+
31+
from reframe.frontend.printer import PrettyPrinter
32+
from reframe.frontend.loader import RegressionCheckLoader
2933
from reframe.frontend.executors.policies import (SerialExecutionPolicy,
3034
AsynchronousExecutionPolicy)
31-
from reframe.frontend.loader import RegressionCheckLoader
32-
from reframe.frontend.printer import PrettyPrinter
35+
from reframe.frontend.executors import Runner, generate_testcases
3336

3437

3538
def format_check(check, check_deps, detailed=False):
@@ -119,23 +122,6 @@ def list_checks(testcases, printer, detailed=False):
119122
printer.info(f'Found {len(checks)} check(s)')
120123

121124

122-
def generate_report_filename(filepatt):
123-
if '{sessionid}' not in filepatt:
124-
return filepatt
125-
126-
search_patt = os.path.basename(filepatt).replace('{sessionid}', r'(\d+)')
127-
new_id = -1
128-
basedir = os.path.dirname(filepatt) or '.'
129-
for filename in os.listdir(basedir):
130-
match = re.match(search_patt, filename)
131-
if match:
132-
found_id = int(match.group(1))
133-
new_id = max(found_id, new_id)
134-
135-
new_id += 1
136-
return filepatt.format(sessionid=new_id)
137-
138-
139125
def logfiles_message():
140126
log_files = logging.log_files()
141127
msg = 'Log file(s) saved in: '
@@ -260,6 +246,10 @@ def main():
260246
help=('Select checks with at least one '
261247
'programming environment matching PATTERN')
262248
)
249+
select_options.add_argument(
250+
'--failed', action='store_true',
251+
help="Select failed test cases (only when '--restore-session' is used)"
252+
)
263253
select_options.add_argument(
264254
'--gpu-only', action='store_true',
265255
help='Select only GPU checks'
@@ -326,6 +316,11 @@ def main():
326316
help='Set the maximum number of times a failed regression test '
327317
'may be retried (default: 0)'
328318
)
319+
run_options.add_argument(
320+
'--restore-session', action='store', nargs='?', const='',
321+
metavar='REPORT',
322+
help='Restore a testing session from REPORT file'
323+
)
329324
run_options.add_argument(
330325
'--flex-alloc-nodes', action='store',
331326
dest='flex_alloc_nodes', metavar='{all|STATE|NUM}', default=None,
@@ -586,10 +581,53 @@ def main():
586581
printer.debug(format_env(options.env_vars))
587582

588583
# Setup the check loader
584+
if options.restore_session is not None:
585+
# We need to load the failed checks only from a report
586+
if options.restore_session:
587+
filename = options.restore_session
588+
else:
589+
filename = runreport.next_report_filename(
590+
osext.expandvars(site_config.get('general/0/report_file')),
591+
new=False
592+
)
593+
594+
report = runreport.load_report(filename)
595+
check_search_path = list(report.slice('filename', unique=True))
596+
check_search_recursive = False
597+
598+
# If `-c` or `-R` are passed explicitly outside the configuration
599+
# file, override the values set from the report file
600+
if site_config.is_sticky_option('general/check_search_path'):
601+
printer.warning(
602+
'Ignoring check search path set in the report file: '
603+
'search path set explicitly in the command-line or '
604+
'the environment'
605+
)
606+
check_search_path = site_config.get(
607+
'general/0/check_search_path'
608+
)
609+
610+
if site_config.is_sticky_option('general/check_search_recursive'):
611+
printer.warning(
612+
'Ignoring check search recursive option from the report file: '
613+
'option set explicitly in the command-line or the environment'
614+
)
615+
check_search_recursive = site_config.get(
616+
'general/0/check_search_recursive'
617+
)
618+
619+
else:
620+
check_search_recursive = site_config.get(
621+
'general/0/check_search_recursive'
622+
)
623+
check_search_path = site_config.get('general/0/check_search_path')
624+
589625
loader = RegressionCheckLoader(
590-
load_path=site_config.get('general/0/check_search_path'),
591-
recurse=site_config.get('general/0/check_search_recursive'),
592-
ignore_conflicts=site_config.get('general/0/ignore_check_conflicts')
626+
load_path=check_search_path,
627+
recurse=check_search_recursive,
628+
ignore_conflicts=site_config.get(
629+
'general/0/ignore_check_conflicts'
630+
)
593631
)
594632

595633
def print_infoline(param, value):
@@ -599,7 +637,7 @@ def print_infoline(param, value):
599637
session_info = {
600638
'cmdline': ' '.join(sys.argv),
601639
'config_file': rt.site_config.filename,
602-
'data_version': '1.1',
640+
'data_version': runreport.DATA_VERSION,
603641
'hostname': socket.gethostname(),
604642
'prefix_output': rt.output_prefix,
605643
'prefix_stage': rt.stage_prefix,
@@ -683,6 +721,34 @@ def print_infoline(param, value):
683721
elif options.cpu_only:
684722
testcases = filter(filters.have_cpu_only(), testcases)
685723

724+
testcases = list(testcases)
725+
printer.verbose(
726+
f'Filtering test cases(s) by other attributes: '
727+
f'{len(testcases)} remaining'
728+
)
729+
730+
# Filter in failed cases
731+
if options.failed:
732+
if options.restore_session is None:
733+
printer.error(
734+
"the option '--failed' can only be used "
735+
"in combination with the '--restore-session' option"
736+
)
737+
sys.exit(1)
738+
739+
def _case_failed(t):
740+
rec = report.case(*t)
741+
if rec and rec['result'] == 'failure':
742+
return True
743+
else:
744+
return False
745+
746+
testcases = list(filter(_case_failed, testcases))
747+
printer.verbose(
748+
f'Filtering successful test case(s): '
749+
f'{len(testcases)} remaining'
750+
)
751+
686752
# Prepare for running
687753
printer.debug('Building and validating the full test DAG')
688754
testgraph, skipped_cases = dependencies.build_deps(testcases_all)
@@ -697,12 +763,22 @@ def print_infoline(param, value):
697763
dependencies.validate_deps(testgraph)
698764
printer.debug('Full test DAG:')
699765
printer.debug(dependencies.format_deps(testgraph))
766+
767+
restored_cases = []
700768
if len(testcases) != len(testcases_all):
701-
testgraph = dependencies.prune_deps(testgraph, testcases)
769+
testgraph = dependencies.prune_deps(
770+
testgraph, testcases,
771+
max_depth=1 if options.restore_session is not None else None
772+
)
702773
printer.debug('Pruned test DAG')
703774
printer.debug(dependencies.format_deps(testgraph))
775+
if options.restore_session is not None:
776+
testgraph, restored_cases = report.restore_dangling(testgraph)
704777

705-
testcases = dependencies.toposort(testgraph)
778+
testcases = dependencies.toposort(
779+
testgraph,
780+
is_subgraph=options.restore_session is not None
781+
)
706782
printer.verbose(f'Final number of test cases: {len(testcases)}')
707783

708784
# Disable hooks
@@ -848,7 +924,7 @@ def module_unuse(*paths):
848924
session_info['time_start'] = time.strftime(
849925
'%FT%T%z', time.localtime(time_start),
850926
)
851-
runner.runall(testcases)
927+
runner.runall(testcases, restored_cases)
852928
finally:
853929
time_end = time.time()
854930
session_info['time_end'] = time.strftime(
@@ -887,9 +963,14 @@ def module_unuse(*paths):
887963
})
888964
json_report = {
889965
'session_info': session_info,
890-
'runs': run_stats
966+
'runs': run_stats,
967+
'restored_cases': []
891968
}
892-
report_file = generate_report_filename(report_file)
969+
if options.restore_session is not None:
970+
for c in restored_cases:
971+
json_report['restored_cases'].append(report.case(*c))
972+
973+
report_file = runreport.next_report_filename(report_file)
893974
try:
894975
with open(report_file, 'w') as fp:
895976
jsonext.dump(json_report, fp, indent=2)

0 commit comments

Comments
 (0)