Skip to content

Commit 5ae74c9

Browse files
authored
Merge pull request #4351 from boegel/run_shell_cmd_error
improve error reporting for failing shell commands (and EasyBuild crashes)
2 parents 37d7a0e + ece71c2 commit 5ae74c9

File tree

11 files changed

+283
-57
lines changed

11 files changed

+283
-57
lines changed

easybuild/framework/easyblock.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
from easybuild.tools.hooks import MODULE_STEP, MODULE_WRITE, PACKAGE_STEP, PATCH_STEP, PERMISSIONS_STEP, POSTITER_STEP
8888
from easybuild.tools.hooks import POSTPROC_STEP, PREPARE_STEP, READY_STEP, SANITYCHECK_STEP, SOURCE_STEP
8989
from easybuild.tools.hooks import SINGLE_EXTENSION, TEST_STEP, TESTCASES_STEP, load_hooks, run_hook
90-
from easybuild.tools.run import check_async_cmd, run_cmd
90+
from easybuild.tools.run import RunShellCmdError, check_async_cmd, run_cmd
9191
from easybuild.tools.jenkins import write_to_xml
9292
from easybuild.tools.module_generator import ModuleGeneratorLua, ModuleGeneratorTcl, module_generator, dependencies_for
9393
from easybuild.tools.module_naming_scheme.utilities import det_full_ec_version
@@ -4124,6 +4124,11 @@ def run_all_steps(self, run_test_cases):
41244124
start_time = datetime.now()
41254125
try:
41264126
self.run_step(step_name, step_methods)
4127+
except RunShellCmdError as err:
4128+
err.print()
4129+
ec_path = os.path.basename(self.cfg.path)
4130+
error_msg = f"shell command '{err.cmd_name} ...' failed in {step_name} step for {ec_path}"
4131+
raise EasyBuildError(error_msg)
41274132
finally:
41284133
if not self.dry_run:
41294134
step_duration = datetime.now() - start_time
@@ -4225,7 +4230,7 @@ def build_and_install_one(ecdict, init_env):
42254230
app.cfg['skip'] = skip
42264231

42274232
# build easyconfig
4228-
errormsg = '(no error)'
4233+
error_msg = '(no error)'
42294234
# timing info
42304235
start_time = time.time()
42314236
try:
@@ -4263,9 +4268,7 @@ def build_and_install_one(ecdict, init_env):
42634268
adjust_permissions(app.installdir, stat.S_IWUSR, add=False, recursive=True)
42644269

42654270
except EasyBuildError as err:
4266-
first_n = 300
4267-
errormsg = "build failed (first %d chars): %s" % (first_n, err.msg[:first_n])
4268-
_log.warning(errormsg)
4271+
error_msg = err.msg
42694272
result = False
42704273

42714274
ended = 'ended'
@@ -4387,11 +4390,7 @@ def ensure_writable_log_dir(log_dir):
43874390
# build failed
43884391
success = False
43894392
summary = 'FAILED'
4390-
4391-
build_dir = ''
4392-
if app.builddir:
4393-
build_dir = " (build directory: %s)" % (app.builddir)
4394-
succ = "unsuccessfully%s: %s" % (build_dir, errormsg)
4393+
succ = "unsuccessfully: " + error_msg
43954394

43964395
# cleanup logs
43974396
app.close_log()
@@ -4424,7 +4423,7 @@ def ensure_writable_log_dir(log_dir):
44244423

44254424
del app
44264425

4427-
return (success, application_log, errormsg)
4426+
return (success, application_log, error_msg)
44284427

44294428

44304429
def copy_easyblocks_for_reprod(easyblock_instances, reprod_dir):

easybuild/main.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070
from easybuild.tools.github import add_pr_labels, install_github_token, list_prs, merge_pr, new_branch_github, new_pr
7171
from easybuild.tools.github import new_pr_from_branch
7272
from easybuild.tools.github import sync_branch_with_develop, sync_pr_with_develop, update_branch, update_pr
73-
from easybuild.tools.hooks import BUILD_AND_INSTALL_LOOP, PRE_PREF, POST_PREF, START, END, CANCEL, FAIL
73+
from easybuild.tools.hooks import BUILD_AND_INSTALL_LOOP, PRE_PREF, POST_PREF, START, END, CANCEL, CRASH, FAIL
7474
from easybuild.tools.hooks import load_hooks, run_hook
7575
from easybuild.tools.modules import modules_tool
7676
from easybuild.tools.options import opts_dict_to_eb_opts, set_up_configuration, use_color
@@ -149,11 +149,11 @@ def build_and_install_software(ecs, init_session_state, exit_on_failure=True):
149149

150150
# keep track of success/total count
151151
if ec_res['success']:
152-
test_msg = "Successfully built %s" % ec['spec']
152+
test_msg = "Successfully installed %s" % ec['spec']
153153
else:
154-
test_msg = "Build of %s failed" % ec['spec']
154+
test_msg = "Installation of %s failed" % os.path.basename(ec['spec'])
155155
if 'err' in ec_res:
156-
test_msg += " (err: %s)" % ec_res['err']
156+
test_msg += ": %s" % ec_res['err']
157157

158158
# dump test report next to log file
159159
test_report_txt = create_test_report(test_msg, [(ec, ec_res)], init_session_state)
@@ -169,8 +169,8 @@ def build_and_install_software(ecs, init_session_state, exit_on_failure=True):
169169
adjust_permissions(parent_dir, stat.S_IWUSR, add=False, recursive=False)
170170

171171
if not ec_res['success'] and exit_on_failure:
172-
if 'traceback' in ec_res:
173-
raise EasyBuildError(ec_res['traceback'])
172+
if not isinstance(ec_res['err'], EasyBuildError):
173+
raise ec_res['err']
174174
else:
175175
raise EasyBuildError(test_msg)
176176

@@ -747,14 +747,22 @@ def prepare_main(args=None, logfile=None, testing=None):
747747
return init_session_state, eb_go, cfg_settings
748748

749749

750-
if __name__ == "__main__":
751-
init_session_state, eb_go, cfg_settings = prepare_main()
750+
def main_with_hooks(args=None):
751+
init_session_state, eb_go, cfg_settings = prepare_main(args=args)
752752
hooks = load_hooks(eb_go.options.hooks)
753753
try:
754-
main(prepared_cfg_data=(init_session_state, eb_go, cfg_settings))
754+
main(args=args, prepared_cfg_data=(init_session_state, eb_go, cfg_settings))
755755
except EasyBuildError as err:
756756
run_hook(FAIL, hooks, args=[err])
757-
print_error(err.msg)
757+
sys.exit(1)
758758
except KeyboardInterrupt as err:
759759
run_hook(CANCEL, hooks, args=[err])
760760
print_error("Cancelled by user: %s" % err)
761+
except Exception as err:
762+
run_hook(CRASH, hooks, args=[err])
763+
sys.stderr.write("EasyBuild crashed! Please consider reporting a bug, this should not happen...\n\n")
764+
raise
765+
766+
767+
if __name__ == "__main__":
768+
main_with_hooks()

easybuild/tools/build_log.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ def nosupport(self, msg, ver):
167167

168168
def error(self, msg, *args, **kwargs):
169169
"""Print error message and raise an EasyBuildError."""
170-
ebmsg = "EasyBuild crashed with an error %s: " % self.caller_info()
170+
ebmsg = "EasyBuild encountered an error %s: " % self.caller_info()
171171
fancylogger.FancyLogger.error(self, ebmsg + msg, *args, **kwargs)
172172

173173
def devel(self, msg, *args, **kwargs):

easybuild/tools/hooks.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
END = 'end'
6868

6969
CANCEL = 'cancel'
70+
CRASH = 'crash'
7071
FAIL = 'fail'
7172

7273
RUN_SHELL_CMD = 'run_shell_cmd'
@@ -107,6 +108,7 @@
107108
POST_PREF + BUILD_AND_INSTALL_LOOP,
108109
END,
109110
CANCEL,
111+
CRASH,
110112
FAIL,
111113
PRE_PREF + RUN_SHELL_CMD,
112114
POST_PREF + RUN_SHELL_CMD,

easybuild/tools/run.py

Lines changed: 95 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
"""
3838
import contextlib
3939
import functools
40+
import inspect
4041
import os
4142
import re
4243
import signal
@@ -75,7 +76,81 @@
7576
]
7677

7778

78-
RunResult = namedtuple('RunResult', ('cmd', 'exit_code', 'output', 'stderr', 'work_dir'))
79+
RunShellCmdResult = namedtuple('RunShellCmdResult', ('cmd', 'exit_code', 'output', 'stderr', 'work_dir'))
80+
81+
82+
class RunShellCmdError(BaseException):
83+
84+
def __init__(self, cmd, exit_code, work_dir, output, stderr, caller_info, *args, **kwargs):
85+
"""Constructor for RunShellCmdError."""
86+
self.cmd = cmd
87+
self.cmd_name = cmd.split(' ')[0]
88+
self.exit_code = exit_code
89+
self.work_dir = work_dir
90+
self.output = output
91+
self.stderr = stderr
92+
self.caller_info = caller_info
93+
94+
msg = f"Shell command '{self.cmd_name}' failed!"
95+
super(RunShellCmdError, self).__init__(msg, *args, **kwargs)
96+
97+
def print(self):
98+
"""
99+
Report failed shell command for this RunShellCmdError instance
100+
"""
101+
102+
def pad_4_spaces(msg):
103+
return ' ' * 4 + msg
104+
105+
error_info = [
106+
'',
107+
"ERROR: Shell command failed!",
108+
pad_4_spaces(f"full command -> {self.cmd}"),
109+
pad_4_spaces(f"exit code -> {self.exit_code}"),
110+
pad_4_spaces(f"working directory -> {self.work_dir}"),
111+
]
112+
113+
tmpdir = tempfile.mkdtemp(prefix='shell-cmd-error-')
114+
output_fp = os.path.join(tmpdir, f"{self.cmd_name}.out")
115+
with open(output_fp, 'w') as fp:
116+
fp.write(self.output or '')
117+
118+
if self.stderr is None:
119+
error_info.append(pad_4_spaces(f"output (stdout + stderr) -> {output_fp}"))
120+
else:
121+
stderr_fp = os.path.join(tmpdir, f"{self.cmd_name}.err")
122+
with open(stderr_fp, 'w') as fp:
123+
fp.write(self.stderr)
124+
error_info.extend([
125+
pad_4_spaces(f"output (stdout) -> {output_fp}"),
126+
pad_4_spaces(f"error/warnings (stderr) -> {stderr_fp}"),
127+
])
128+
129+
caller_file_name, caller_line_nr, caller_function_name = self.caller_info
130+
called_from_info = f"'{caller_function_name}' function in {caller_file_name} (line {caller_line_nr})"
131+
error_info.extend([
132+
pad_4_spaces(f"called from -> {called_from_info}"),
133+
'',
134+
])
135+
136+
sys.stderr.write('\n'.join(error_info) + '\n')
137+
138+
139+
def raise_run_shell_cmd_error(cmd, exit_code, work_dir, output, stderr):
140+
"""
141+
Raise RunShellCmdError for failing shell command, after collecting additional caller info
142+
"""
143+
144+
# figure out where failing command was run
145+
# need to go 3 levels down:
146+
# 1) this function
147+
# 2) run_shell_cmd function
148+
# 3) run_cmd_cache decorator
149+
# 4) actual caller site
150+
frameinfo = inspect.getouterframes(inspect.currentframe())[3]
151+
caller_info = (frameinfo.filename, frameinfo.lineno, frameinfo.function)
152+
153+
raise RunShellCmdError(cmd, exit_code, work_dir, output, stderr, caller_info)
79154

80155

81156
def run_cmd_cache(func):
@@ -178,7 +253,7 @@ def to_cmd_str(cmd):
178253
msg += f" (in {work_dir})"
179254
dry_run_msg(msg, silent=silent)
180255

181-
return RunResult(cmd=cmd_str, exit_code=0, output='', stderr=None, work_dir=work_dir)
256+
return RunShellCmdResult(cmd=cmd_str, exit_code=0, output='', stderr=None, work_dir=work_dir)
182257

183258
start_time = datetime.now()
184259
if not hidden:
@@ -204,14 +279,29 @@ def to_cmd_str(cmd):
204279
_log.info("Command to run was changed by pre-%s hook: '%s' (was: '%s')", RUN_SHELL_CMD, cmd, old_cmd)
205280

206281
_log.info(f"Running command '{cmd_str}' in {work_dir}")
207-
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=stderr, check=fail_on_error,
282+
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=stderr, check=False,
208283
cwd=work_dir, env=env, input=stdin, shell=shell, executable=executable)
209284

210285
# return output as a regular string rather than a byte sequence (and non-UTF-8 characters get stripped out)
211286
output = proc.stdout.decode('utf-8', 'ignore')
212-
stderr_output = proc.stderr.decode('utf-8', 'ignore') if split_stderr else None
287+
stderr = proc.stderr.decode('utf-8', 'ignore') if split_stderr else None
288+
289+
res = RunShellCmdResult(cmd=cmd_str, exit_code=proc.returncode, output=output, stderr=stderr, work_dir=work_dir)
213290

214-
res = RunResult(cmd=cmd_str, exit_code=proc.returncode, output=output, stderr=stderr_output, work_dir=work_dir)
291+
# always log command output
292+
cmd_name = cmd_str.split(' ')[0]
293+
if split_stderr:
294+
_log.info(f"Output of '{cmd_name} ...' shell command (stdout only):\n{res.output}")
295+
_log.info(f"Warnings and errors of '{cmd_name} ...' shell command (stderr only):\n{res.stderr}")
296+
else:
297+
_log.info(f"Output of '{cmd_name} ...' shell command (stdout + stderr):\n{res.output}")
298+
299+
if res.exit_code == 0:
300+
_log.info(f"Shell command completed successfully (see output above): {cmd_str}")
301+
else:
302+
_log.warning(f"Shell command FAILED (exit code {res.exit_code}, see output above): {cmd_str}")
303+
if fail_on_error:
304+
raise_run_shell_cmd_error(res.cmd, res.exit_code, res.work_dir, output=res.output, stderr=res.stderr)
215305

216306
if with_hooks:
217307
run_hook_kwargs = {
@@ -222,13 +312,6 @@ def to_cmd_str(cmd):
222312
}
223313
run_hook(RUN_SHELL_CMD, hooks, post_step_hook=True, args=[cmd], kwargs=run_hook_kwargs)
224314

225-
if split_stderr:
226-
log_msg = f"Command '{cmd_str}' exited with exit code {res.exit_code}, "
227-
log_msg += f"with stdout:\n{res.output}\nstderr:\n{res.stderr}"
228-
else:
229-
log_msg = f"Command '{cmd_str}' exited with exit code {res.exit_code} and output:\n{res.output}"
230-
_log.info(log_msg)
231-
232315
if not hidden:
233316
time_since_start = time_str_since(start_time)
234317
trace_msg(f"command completed: exit {res.exit_code}, ran in {time_since_start}")

test/framework/build_log.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,8 @@ def test_easybuildlog(self):
139139
r"fancyroot.test_easybuildlog \[WARNING\] :: Deprecated functionality.*onemorewarning.*",
140140
r"fancyroot.test_easybuildlog \[WARNING\] :: Deprecated functionality.*lastwarning.*",
141141
r"fancyroot.test_easybuildlog \[WARNING\] :: Deprecated functionality.*thisisnotprinted.*",
142-
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): kaput",
143-
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): err: msg: %s",
142+
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): kaput",
143+
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): err: msg: %s",
144144
r"fancyroot.test_easybuildlog \[ERROR\] :: .*EasyBuild encountered an exception \(at .* in .*\): oops",
145145
'',
146146
])
@@ -168,7 +168,7 @@ def test_easybuildlog(self):
168168
r"fancyroot.test_easybuildlog \[WARNING\] :: bleh",
169169
r"fancyroot.test_easybuildlog \[INFO\] :: 4\+2 = 42",
170170
r"fancyroot.test_easybuildlog \[DEBUG\] :: this is just a test",
171-
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): foo baz baz",
171+
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): foo baz baz",
172172
'',
173173
])
174174
logtxt_regex = re.compile(r'^%s' % expected_logtxt, re.M)
@@ -223,7 +223,7 @@ def test_log_levels(self):
223223
info_msg = r"%s \[INFO\] :: fyi" % prefix
224224
warning_msg = r"%s \[WARNING\] :: this is a warning" % prefix
225225
deprecated_msg = r"%s \[WARNING\] :: Deprecated functionality, .*: almost kaput; see .*" % prefix
226-
error_msg = r"%s \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): kaput" % prefix
226+
error_msg = r"%s \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): kaput" % prefix
227227

228228
expected_logtxt = '\n'.join([
229229
error_msg,

test/framework/hooks.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ def setUp(self):
7979
'',
8080
'def fail_hook(err):',
8181
' print("EasyBuild FAIL: %s" % err)',
82+
'',
83+
'def crash_hook(err):',
84+
' print("EasyBuild CRASHED, oh no! => %s" % err)',
8285
])
8386
write_file(self.test_hooks_pymod, test_hooks_pymod_txt)
8487

@@ -97,8 +100,9 @@ def test_load_hooks(self):
97100

98101
hooks = load_hooks(self.test_hooks_pymod)
99102

100-
self.assertEqual(len(hooks), 8)
103+
self.assertEqual(len(hooks), 9)
101104
expected = [
105+
'crash_hook',
102106
'fail_hook',
103107
'parse_hook',
104108
'post_configure_hook',
@@ -140,6 +144,7 @@ def test_find_hook(self):
140144
pre_single_extension_hook = [hooks[k] for k in hooks if k == 'pre_single_extension_hook'][0]
141145
start_hook = [hooks[k] for k in hooks if k == 'start_hook'][0]
142146
pre_run_shell_cmd_hook = [hooks[k] for k in hooks if k == 'pre_run_shell_cmd_hook'][0]
147+
crash_hook = [hooks[k] for k in hooks if k == 'crash_hook'][0]
143148
fail_hook = [hooks[k] for k in hooks if k == 'fail_hook'][0]
144149
pre_build_and_install_loop_hook = [hooks[k] for k in hooks if k == 'pre_build_and_install_loop_hook'][0]
145150

@@ -175,6 +180,10 @@ def test_find_hook(self):
175180
self.assertEqual(find_hook('fail', hooks, pre_step_hook=True), None)
176181
self.assertEqual(find_hook('fail', hooks, post_step_hook=True), None)
177182

183+
self.assertEqual(find_hook('crash', hooks), crash_hook)
184+
self.assertEqual(find_hook('crash', hooks, pre_step_hook=True), None)
185+
self.assertEqual(find_hook('crash', hooks, post_step_hook=True), None)
186+
178187
hook_name = 'build_and_install_loop'
179188
self.assertEqual(find_hook(hook_name, hooks), None)
180189
self.assertEqual(find_hook(hook_name, hooks, pre_step_hook=True), pre_build_and_install_loop_hook)
@@ -209,6 +218,7 @@ def run_hooks():
209218
run_hook('single_extension', hooks, post_step_hook=True, args=[None])
210219
run_hook('extensions', hooks, post_step_hook=True, args=[None])
211220
run_hook('fail', hooks, args=[EasyBuildError('oops')])
221+
run_hook('crash', hooks, args=[RuntimeError('boom!')])
212222
stdout = self.get_stdout()
213223
stderr = self.get_stderr()
214224
self.mock_stdout(False)
@@ -244,6 +254,8 @@ def run_hooks():
244254
"this is run before installing an extension",
245255
"== Running fail hook...",
246256
"EasyBuild FAIL: 'oops'",
257+
"== Running crash hook...",
258+
"EasyBuild CRASHED, oh no! => boom!",
247259
]
248260
expected_stdout = '\n'.join(expected_stdout_lines)
249261

test/framework/options.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,7 @@ def test_avail_hooks(self):
737737
" post_build_and_install_loop_hook",
738738
" end_hook",
739739
" cancel_hook",
740+
" crash_hook",
740741
" fail_hook",
741742
" pre_run_shell_cmd_hook",
742743
" post_run_shell_cmd_hook",

0 commit comments

Comments
 (0)