Skip to content

Commit 004d3a1

Browse files
author
Vasileios Karakasis
authored
Merge pull request #1124 from ekouts/bugfix/failed_cleanup
[bugfix] Fix infinite loop bug when cleanup phase fails
2 parents 4dc7d2e + 4ada425 commit 004d3a1

File tree

5 files changed

+62
-57
lines changed

5 files changed

+62
-57
lines changed

reframe/frontend/executors/policies.py

Lines changed: 26 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import contextlib
12
import itertools
23
import math
34
import sys
45
import time
6+
57
from datetime import datetime
68

79
from reframe.core.exceptions import (TaskDependencyError, TaskExit)
@@ -10,6 +12,16 @@
1012
TaskEventListener, ABORT_REASONS)
1113

1214

15+
def _cleanup_all(tasks, *args, **kwargs):
16+
for task in tasks:
17+
if task.ref_count == 0:
18+
with contextlib.suppress(TaskExit):
19+
task.cleanup(*args, **kwargs)
20+
21+
# Remove cleaned up tests
22+
tasks[:] = [t for t in tasks if t.ref_count]
23+
24+
1325
class SerialExecutionPolicy(ExecutionPolicy, TaskEventListener):
1426
def __init__(self):
1527
super().__init__()
@@ -66,19 +78,6 @@ def runcase(self, case):
6678
raise
6779
except BaseException:
6880
task.fail(sys.exc_info())
69-
finally:
70-
self.printer.status('FAIL' if task.failed else 'OK',
71-
task.check.info(), just='right')
72-
73-
def _cleanup_all(self):
74-
for task in self._retired_tasks:
75-
if task.ref_count == 0:
76-
task.cleanup(not self.keep_stage_files)
77-
78-
# Remove cleaned up tests
79-
self._retired_tasks[:] = [
80-
t for t in self._retired_tasks if t.ref_count
81-
]
8281

8382
def on_task_setup(self, task):
8483
pass
@@ -90,18 +89,22 @@ def on_task_exit(self, task):
9089
pass
9190

9291
def on_task_failure(self, task):
93-
pass
92+
if task.failed_stage == 'cleanup':
93+
self.printer.status('ERROR', task.check.info(), just='right')
94+
else:
95+
self.printer.status('FAIL', task.check.info(), just='right')
9496

9597
def on_task_success(self, task):
98+
self.printer.status('OK', task.check.info(), just='right')
9699
# update reference count of dependencies
97100
for c in task.testcase.deps:
98101
self._task_index[c].ref_count -= 1
99102

100-
self._cleanup_all()
103+
_cleanup_all(self._retired_tasks, not self.keep_stage_files)
101104

102105
def exit(self):
103106
# Clean up all remaining tasks
104-
self._cleanup_all()
107+
_cleanup_all(self._retired_tasks, not self.keep_stage_files)
105108

106109

107110
class PollRateFunction:
@@ -196,8 +199,11 @@ def on_task_run(self, task):
196199
self._running_tasks.append(task)
197200

198201
def on_task_failure(self, task):
199-
self._remove_from_running(task)
200-
self.printer.status('FAIL', task.check.info(), just='right')
202+
if task.failed_stage == 'cleanup':
203+
self.printer.status('ERROR', task.check.info(), just='right')
204+
else:
205+
self._remove_from_running(task)
206+
self.printer.status('FAIL', task.check.info(), just='right')
201207

202208
def on_task_success(self, task):
203209
self.printer.status('OK', task.check.info(), just='right')
@@ -290,16 +296,6 @@ def runcase(self, case):
290296
self._failall(e)
291297
raise
292298

293-
def _cleanup_all(self):
294-
for task in self._retired_tasks:
295-
if task.ref_count == 0:
296-
task.cleanup(not self.keep_stage_files)
297-
298-
# Remove cleaned up tests
299-
self._retired_tasks[:] = [
300-
t for t in self._retired_tasks if t.ref_count
301-
]
302-
303299
def _poll_tasks(self):
304300
'''Update the counts of running checks per partition.'''
305301
getlogger().debug('updating counts for running test cases')
@@ -324,10 +320,8 @@ def _finalize_all(self):
324320
break
325321

326322
getlogger().debug('finalizing task: %s' % task.check.info())
327-
try:
323+
with contextlib.suppress(TaskExit):
328324
self._finalize_task(task)
329-
except TaskExit:
330-
pass
331325

332326
def _finalize_task(self, task):
333327
if not self.skip_sanity_check:
@@ -395,7 +389,7 @@ def exit(self):
395389
self._finalize_all()
396390
self._setup_all()
397391
self._reschedule_all()
398-
self._cleanup_all()
392+
_cleanup_all(self._retired_tasks, not self.keep_stage_files)
399393
t_elapsed = (datetime.now() - t_start).total_seconds()
400394
real_rate = num_polls / t_elapsed
401395
getlogger().debug(

reframe/frontend/printer.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,23 +41,13 @@ def status(self, status, message='', just=None, level=logging.INFO):
4141
status_stripped = status.strip().lower()
4242
if status_stripped == 'skip':
4343
status = color.colorize(status, color.YELLOW)
44-
elif status_stripped in ['fail', 'failed']:
44+
elif status_stripped in ['fail', 'failed', 'error']:
4545
status = color.colorize(status, color.RED)
4646
else:
4747
status = color.colorize(status, color.GREEN)
4848

4949
logging.getlogger().log(level, '[ %s ] %s' % (status, message))
5050

51-
def result(self, check, partition, environ, success):
52-
if success:
53-
result_str = 'OK'
54-
else:
55-
result_str = 'FAIL'
56-
57-
self.status(
58-
result_str, '%s on %s using %s' %
59-
(check.name, partition.fullname, environ.name), just='right')
60-
6151
def timestamp(self, msg='', separator=None):
6252
msg = '%s %s' % (msg, datetime.datetime.today().strftime('%c %Z'))
6353
if separator:

unittests/resources/checks/frontend_checks.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,21 @@ def wait(self):
130130
sys.exit(1)
131131

132132

133+
@rfm.simple_test
134+
class CleanupFailTest(rfm.RunOnlyRegressionTest):
135+
def __init__(self):
136+
self.valid_systems = ['*']
137+
self.valid_prog_environs = ['*']
138+
self.sourcesdir = None
139+
self.executable = 'echo foo'
140+
self.sanity_patterns = sn.assert_found(r'foo', self.stdout)
141+
142+
@rfm.run_before('cleanup')
143+
def fail(self):
144+
# Make this test fail on purpose
145+
raise Exception
146+
147+
133148
class SleepCheck(BaseFrontendCheck):
134149
_next_id = 0
135150

unittests/test_loader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,11 @@ def test_load_file_absolute(self):
3232
def test_load_recursive(self):
3333
checks = self.loader.load_from_dir('unittests/resources/checks',
3434
recurse=True)
35-
self.assertEqual(11, len(checks))
35+
self.assertEqual(12, len(checks))
3636

3737
def test_load_all(self):
3838
checks = self.loader_with_path.load_all()
39-
self.assertEqual(10, len(checks))
39+
self.assertEqual(11, len(checks))
4040

4141
def test_load_all_with_prefix(self):
4242
checks = self.loader_with_prefix.load_all()

unittests/test_policies.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -81,70 +81,76 @@ def test_runall(self):
8181
self.runall(self.checks)
8282

8383
stats = self.runner.stats
84-
self.assertEqual(7, stats.num_cases())
84+
self.assertEqual(8, stats.num_cases())
8585
self.assertRunall()
86-
self.assertEqual(4, len(stats.failures()))
86+
self.assertEqual(5, len(stats.failures()))
8787
self.assertEqual(2, self._num_failures_stage('setup'))
8888
self.assertEqual(1, self._num_failures_stage('sanity'))
8989
self.assertEqual(1, self._num_failures_stage('performance'))
90+
self.assertEqual(1, self._num_failures_stage('cleanup'))
9091

9192
def test_runall_skip_system_check(self):
9293
self.runall(self.checks, skip_system_check=True)
9394

9495
stats = self.runner.stats
95-
self.assertEqual(8, stats.num_cases())
96+
self.assertEqual(9, stats.num_cases())
9697
self.assertRunall()
97-
self.assertEqual(4, len(stats.failures()))
98+
self.assertEqual(5, len(stats.failures()))
9899
self.assertEqual(2, self._num_failures_stage('setup'))
99100
self.assertEqual(1, self._num_failures_stage('sanity'))
100101
self.assertEqual(1, self._num_failures_stage('performance'))
102+
self.assertEqual(1, self._num_failures_stage('cleanup'))
101103

102104
def test_runall_skip_prgenv_check(self):
103105
self.runall(self.checks, skip_environ_check=True)
104106

105107
stats = self.runner.stats
106-
self.assertEqual(8, stats.num_cases())
108+
self.assertEqual(9, stats.num_cases())
107109
self.assertRunall()
108-
self.assertEqual(4, len(stats.failures()))
110+
self.assertEqual(5, len(stats.failures()))
109111
self.assertEqual(2, self._num_failures_stage('setup'))
110112
self.assertEqual(1, self._num_failures_stage('sanity'))
111113
self.assertEqual(1, self._num_failures_stage('performance'))
114+
self.assertEqual(1, self._num_failures_stage('cleanup'))
112115

113116
def test_runall_skip_sanity_check(self):
114117
self.runner.policy.skip_sanity_check = True
115118
self.runall(self.checks)
116119

117120
stats = self.runner.stats
118-
self.assertEqual(7, stats.num_cases())
121+
self.assertEqual(8, stats.num_cases())
119122
self.assertRunall()
120-
self.assertEqual(3, len(stats.failures()))
123+
self.assertEqual(4, len(stats.failures()))
121124
self.assertEqual(2, self._num_failures_stage('setup'))
122125
self.assertEqual(0, self._num_failures_stage('sanity'))
123126
self.assertEqual(1, self._num_failures_stage('performance'))
127+
self.assertEqual(1, self._num_failures_stage('cleanup'))
124128

125129
def test_runall_skip_performance_check(self):
126130
self.runner.policy.skip_performance_check = True
127131
self.runall(self.checks)
128132

129133
stats = self.runner.stats
130-
self.assertEqual(7, stats.num_cases())
134+
self.assertEqual(8, stats.num_cases())
131135
self.assertRunall()
132-
self.assertEqual(3, len(stats.failures()))
136+
self.assertEqual(4, len(stats.failures()))
133137
self.assertEqual(2, self._num_failures_stage('setup'))
134138
self.assertEqual(1, self._num_failures_stage('sanity'))
135139
self.assertEqual(0, self._num_failures_stage('performance'))
140+
self.assertEqual(1, self._num_failures_stage('cleanup'))
136141

137142
def test_strict_performance_check(self):
138143
self.runner.policy.strict_check = True
139144
self.runall(self.checks)
140145

141146
stats = self.runner.stats
142-
self.assertEqual(7, stats.num_cases())
147+
self.assertEqual(8, stats.num_cases())
143148
self.assertRunall()
144-
self.assertEqual(5, len(stats.failures()))
149+
self.assertEqual(6, len(stats.failures()))
145150
self.assertEqual(2, self._num_failures_stage('setup'))
146151
self.assertEqual(1, self._num_failures_stage('sanity'))
147152
self.assertEqual(2, self._num_failures_stage('performance'))
153+
self.assertEqual(1, self._num_failures_stage('cleanup'))
148154

149155
def test_force_local_execution(self):
150156
self.runner.policy.force_local = True

0 commit comments

Comments
 (0)