Skip to content

Commit fc36d0d

Browse files
committed
Copy build log and artifacts to permanents locations after a failure
Packages are be built in some selected build path (--buildpath), and the logs of successful compilation are then concentrated to some other location for permanent storage (--logfile-format). Logs of failed builds remain in the build path location so that they can be inspected. However, this setup is problematic when building software in HPC jobs. Quite often in HPC systems the build path is set to some fast storage local to the node, like NVME raid mounted on `/tmp` or `/dev/shm` (as suggested in the documentation: https://docs.easybuild.io/configuration/#buildpath). The node storage is often wiped out after the end of a job, so the log files and the artifacts are no longer available after the termination of the job. This commit adds options (--log-error-path and --artifact-error-path) to accumulate error logs and artifacts in some more permanent locations, so that the can be easily inspected after a failed build.
1 parent 3165a6c commit fc36d0d

File tree

6 files changed

+288
-21
lines changed

6 files changed

+288
-21
lines changed

easybuild/framework/easyblock.py

Lines changed: 101 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,17 @@
7676
from easybuild.tools.config import EASYBUILD_SOURCES_URL # noqa
7777
from easybuild.tools.config import build_option, build_path, get_log_filename, get_repository, get_repositorypath
7878
from easybuild.tools.config import install_path, log_path, package_path, source_paths
79+
from easybuild.tools.config import get_log_error_path, get_artifact_error_path
7980
from easybuild.tools.environment import restore_env, sanitize_env
8081
from easybuild.tools.filetools import CHECKSUM_TYPE_MD5, CHECKSUM_TYPE_SHA256
8182
from easybuild.tools.filetools import adjust_permissions, apply_patch, back_up_file, change_dir, check_lock
82-
from easybuild.tools.filetools import compute_checksum, convert_name, copy_file, create_lock, create_patch_info
83-
from easybuild.tools.filetools import derive_alt_pypi_url, diff_files, dir_contains_files, download_file
84-
from easybuild.tools.filetools import encode_class_name, extract_file
85-
from easybuild.tools.filetools import find_backup_name_candidate, get_source_tarball_from_git, is_alt_pypi_url
86-
from easybuild.tools.filetools import is_binary, is_sha256_checksum, mkdir, move_file, move_logs, read_file, remove_dir
87-
from easybuild.tools.filetools import remove_file, remove_lock, verify_checksum, weld_paths, write_file, symlink
83+
from easybuild.tools.filetools import convert_name, copy_file, copy_dir, create_lock, create_patch_info
84+
from easybuild.tools.filetools import create_unused_dirs, derive_alt_pypi_url, diff_files, dir_contains_files
85+
from easybuild.tools.filetools import download_file, encode_class_name, extract_file, compute_checksum
86+
from easybuild.tools.filetools import find_backup_name_candidate, get_source_tarball_from_git, det_common_path_prefix
87+
from easybuild.tools.filetools import is_alt_pypi_url,is_binary, is_predecesor, is_readable, is_sha256_checksum, mkdir
88+
from easybuild.tools.filetools import move_file, move_logs, read_file, remove_dir, remove_file, remove_lock
89+
from easybuild.tools.filetools import verify_checksum, weld_paths, write_file, symlink
8890
from easybuild.tools.hooks import BUILD_STEP, CLEANUP_STEP, CONFIGURE_STEP, EXTENSIONS_STEP, FETCH_STEP, INSTALL_STEP
8991
from easybuild.tools.hooks import MODULE_STEP, MODULE_WRITE, PACKAGE_STEP, PATCH_STEP, PERMISSIONS_STEP, POSTITER_STEP
9092
from easybuild.tools.hooks import POSTPROC_STEP, PREPARE_STEP, READY_STEP, SANITYCHECK_STEP, SOURCE_STEP
@@ -1045,16 +1047,23 @@ def moduleGenerator(self):
10451047
#
10461048
# DIRECTORY UTILITY FUNCTIONS
10471049
#
1048-
def gen_builddir(self):
1049-
"""Generate the (unique) name for the builddir"""
1050+
def get_relative_builddir_base_path(self):
1051+
"""Generate builddir base name relative to build_path"""
10501052
clean_name = remove_unwanted_chars(self.name)
10511053

10521054
# if a toolchain version starts with a -, remove the - so prevent a -- in the path name
10531055
tc = self.cfg['toolchain']
10541056
tcversion = tc['version'].lstrip('-')
10551057
lastdir = "%s%s-%s%s" % (self.cfg['versionprefix'], tc['name'], tcversion, self.cfg['versionsuffix'])
10561058

1057-
builddir = os.path.join(os.path.abspath(build_path()), clean_name, self.version, lastdir)
1059+
relative_builddir = os.path.join(clean_name, self.version, lastdir)
1060+
1061+
return relative_builddir
1062+
1063+
def gen_builddir(self):
1064+
"""Generate the (unique) name for the builddir"""
1065+
relative_builddir = self.get_relative_builddir_base_path()
1066+
builddir = os.path.join(os.path.abspath(build_path()), relative_builddir)
10581067

10591068
# make sure build dir is unique if cleanupoldbuild is False or not set
10601069
if not self.cfg.get('cleanupoldbuild', False):
@@ -4198,6 +4207,88 @@ def print_dry_run_note(loc, silent=True):
41984207
dry_run_msg(msg, silent=silent)
41994208

42004209

4210+
def create_persistence_paths(operation_args):
4211+
persistence_paths = []
4212+
for op in operation_args:
4213+
_, _, target_path, _ = op
4214+
persistence_paths.append(target_path)
4215+
4216+
persistence_paths = create_unused_dirs(persistence_paths)
4217+
4218+
for i in range(len(operation_args)):
4219+
op = operation_args[i]
4220+
operation_args[i] = (op[0], op[1], persistence_paths[i], op[3])
4221+
4222+
return operation_args
4223+
4224+
def execute_persistence_operation(operation, source_paths, target_dir, msg, silent):
4225+
for p in source_paths:
4226+
operation(p, target_dir)
4227+
print_msg(msg, log=_log, silent=silent)
4228+
4229+
def persist_failed_compilation_log_and_artifacts(build_successful, application_log, silent, app, easyconfig):
4230+
if not application_log:
4231+
return
4232+
4233+
# there may be multiple log files, or the file name may be different due to zipping
4234+
logs = glob.glob('%s*' % application_log)
4235+
print_msg(
4236+
"Results of the build can be found in the temporary log file(s) %s" % ', '.join(logs),
4237+
log=_log,
4238+
silent=silent
4239+
)
4240+
4241+
if build_successful:
4242+
return
4243+
4244+
datetime_stamp = time.strftime("%Y%m%d") + '-' + time.strftime("%H%M%S")
4245+
operation_args = []
4246+
4247+
log_error_path = get_log_error_path(ec=easyconfig)
4248+
if log_error_path is not None:
4249+
log_error_path = os.path.join(log_error_path, datetime_stamp)
4250+
4251+
if not is_predecesor(app.builddir, log_error_path):
4252+
operation_args.append(
4253+
(
4254+
copy_file,
4255+
logs,
4256+
log_error_path,
4257+
"Logs of failed build copied to permanent storage: %s" % log_error_path
4258+
)
4259+
)
4260+
else:
4261+
print_msg(
4262+
"Persistent log directory is subdirectory of build directory; not copying logs.",
4263+
log=_log,
4264+
silent=silent
4265+
)
4266+
4267+
artifact_error_path = get_artifact_error_path(ec=easyconfig)
4268+
if artifact_error_path is not None:
4269+
artifact_error_path = os.path.join(artifact_error_path, datetime_stamp)
4270+
4271+
if not is_predecesor(app.builddir, artifact_error_path):
4272+
operation_args.append(
4273+
(
4274+
lambda source, destination : copy_dir(source, destination, dirs_exist_ok=True),
4275+
[app.builddir],
4276+
artifact_error_path,
4277+
"Artifacts of failed build copied to permanent storage: %s" % artifact_error_path
4278+
)
4279+
)
4280+
else:
4281+
print_msg(
4282+
"Persistent artifact directory is subdirectory of build directory; not copying artifacts.",
4283+
log=_log,
4284+
silent=silent
4285+
)
4286+
4287+
operation_args = create_persistence_paths(operation_args)
4288+
for op in operation_args:
4289+
execute_persistence_operation(*op, silent=silent)
4290+
4291+
42014292
def build_and_install_one(ecdict, init_env):
42024293
"""
42034294
Build the software
@@ -4456,10 +4547,7 @@ def ensure_writable_log_dir(log_dir):
44564547
else:
44574548
dry_run_msg("(no ignored errors during dry run)\n", silent=silent)
44584549

4459-
if application_log:
4460-
# there may be multiple log files, or the file name may be different due to zipping
4461-
logs = glob.glob('%s*' % application_log)
4462-
print_msg("Results of the build can be found in the log file(s) %s" % ', '.join(logs), log=_log, silent=silent)
4550+
persist_failed_compilation_log_and_artifacts(success, application_log, silent, app, ecdict['ec'])
44634551

44644552
del app
44654553

easybuild/tools/config.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,11 @@
104104
DEFAULT_MODULE_SYNTAX = 'Lua'
105105
DEFAULT_MODULES_TOOL = 'Lmod'
106106
DEFAULT_PATH_SUBDIRS = {
107+
'artifact_error_path' : 'error_artifacts',
107108
'buildpath': 'build',
108109
'containerpath': 'containers',
109110
'installpath': '',
111+
'log_error_path': 'error_logs',
110112
'packagepath': 'packages',
111113
'repositorypath': 'ebfiles_repo',
112114
'sourcepath': 'sources',
@@ -475,13 +477,15 @@ class ConfigurationVariables(BaseConfigurationVariables):
475477

476478
# list of known/required keys
477479
REQUIRED = [
480+
'artifact_error_path',
478481
'buildpath',
479482
'config',
480483
'containerpath',
481484
'installpath',
482485
'installpath_modules',
483486
'installpath_software',
484487
'job_backend',
488+
'log_error_path',
485489
'logfile_format',
486490
'moduleclasses',
487491
'module_naming_scheme',
@@ -843,6 +847,58 @@ def log_path(ec=None):
843847
return log_file_format(return_directory=True, ec=ec, date=date, timestamp=timestamp)
844848

845849

850+
def get_log_error_path(ec=None):
851+
"""
852+
Return the 'log_error_path', the location where logs are copied in case of failure
853+
854+
:param ec: dict-like value with at least the keys 'name' and 'version' defined
855+
"""
856+
log_error_path = ConfigurationVariables()['log_error_path']
857+
858+
if not log_error_path:
859+
return None
860+
861+
if ec is None:
862+
raise EasyBuildError("Easyconfig cannot be empty.")
863+
864+
name, version = ec.get('name'), ec.get('version')
865+
866+
if name is None:
867+
raise EasyBuildError("The 'name' key is not defined.")
868+
if version is None:
869+
raise EasyBuildError("The 'version' key is not defined.")
870+
871+
path = os.path.join(log_error_path, name + '-' + version)
872+
873+
return path
874+
875+
876+
def get_artifact_error_path(ec=None):
877+
"""
878+
Return the 'artifact_error_path', the location where build directories are copied in case of failure
879+
880+
:param ec: dict-like value with at least the keys 'name' and 'version' defined
881+
"""
882+
artifact_error_path = ConfigurationVariables()['artifact_error_path']
883+
884+
if not artifact_error_path:
885+
return None
886+
887+
if ec is None:
888+
raise EasyBuildError("Easyconfig cannot be empty.")
889+
890+
name, version = ec.get('name'), ec.get('version')
891+
892+
if name is None:
893+
raise EasyBuildError("The 'name' key is not defined.")
894+
if version is None:
895+
raise EasyBuildError("The 'version' key is not defined.")
896+
897+
path = os.path.join(artifact_error_path, name + '-' + version)
898+
899+
return path
900+
901+
846902
def get_build_log_path():
847903
"""
848904
Return (temporary) directory for build log

easybuild/tools/options.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,9 @@ def config_options(self):
559559
descr = ("Configuration options", "Configure EasyBuild behavior.")
560560

561561
opts = OrderedDict({
562+
'artifact-error-path': ("Location where artifacts are copied in case of an error, "
563+
"leave empty to avoid copying artifacts",
564+
None, 'store', mk_full_default_path('artifact_error_path')),
562565
'avail-module-naming-schemes': ("Show all supported module naming schemes",
563566
None, 'store_true', False,),
564567
'avail-modules-tools': ("Show all supported module tools",
@@ -589,6 +592,9 @@ def config_options(self):
589592
None, 'store', None),
590593
'job-backend': ("Backend to use for submitting jobs", 'choice', 'store',
591594
DEFAULT_JOB_BACKEND, sorted(avail_job_backends().keys())),
595+
'log-error-path': ("Location where logs and artifacts are copied in case of an error, "
596+
"leave empty to avoid copying logs",
597+
None, 'store', mk_full_default_path('log_error_path')),
592598
# purposely take a copy for the default logfile format
593599
'logfile-format': ("Directory name and format of the log file",
594600
'strtuple', 'store', DEFAULT_LOGFILE_FORMAT[:], {'metavar': 'DIR,FORMAT'}),
@@ -1153,8 +1159,8 @@ def _postprocess_config(self):
11531159
# - the <path> could also specify the location of a *remote* (Git( repository,
11541160
# which can be done in variety of formats (git@<url>:<org>/<repo>), https://<url>, etc.)
11551161
# (see also https://github.com/easybuilders/easybuild-framework/issues/3892);
1156-
path_opt_names = ['buildpath', 'containerpath', 'git_working_dirs_path', 'installpath',
1157-
'installpath_modules', 'installpath_software', 'prefix', 'packagepath',
1162+
path_opt_names = ['artifact_error_path', 'buildpath', 'containerpath', 'git_working_dirs_path', 'installpath',
1163+
'installpath_modules', 'installpath_software', 'log_error_path', 'prefix', 'packagepath',
11581164
'robot_paths', 'sourcepath']
11591165

11601166
for opt_name in path_opt_names:
@@ -1163,8 +1169,8 @@ def _postprocess_config(self):
11631169
if self.options.prefix is not None:
11641170
# prefix applies to all paths, and repository has to be reinitialised to take new repositorypath in account
11651171
# in the legacy-style configuration, repository is initialised in configuration file itself
1166-
path_opts = ['buildpath', 'containerpath', 'installpath', 'packagepath', 'repository', 'repositorypath',
1167-
'sourcepath']
1172+
path_opts = ['artifact_error_path', 'buildpath', 'containerpath', 'installpath', 'log_error_path',
1173+
'packagepath', 'repository', 'repositorypath', 'sourcepath']
11681174
for dest in path_opts:
11691175
if not self.options._action_taken.get(dest, False):
11701176
if dest == 'repository':

test/framework/options.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5283,7 +5283,16 @@ def test_prefix_option(self):
52835283

52845284
regex = re.compile(r"(?P<cfg_opt>\S*).*%s.*" % self.test_prefix, re.M)
52855285

5286-
expected = ['buildpath', 'containerpath', 'installpath', 'packagepath', 'prefix', 'repositorypath']
5286+
expected = [
5287+
'artifact-error-path',
5288+
'buildpath',
5289+
'containerpath',
5290+
'installpath',
5291+
'log-error-path',
5292+
'packagepath',
5293+
'prefix',
5294+
'repositorypath',
5295+
]
52875296
self.assertEqual(sorted(regex.findall(txt)), expected)
52885297

52895298
def test_dump_env_script(self):
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
--- a/toy-0.0.orig/toy.source 2014-03-06 18:48:16.000000000 +0100
2+
+++ b/toy-0.0/toy.source 2020-08-18 12:19:35.000000000 +0200
3+
@@ -2,6 +2,6 @@
4+
5+
int main(int argc, char* argv[]){
6+
7+
- printf("I'm a toy, and proud of it.\n");
8+
+ printf("I'm a toy, and proud of it.\n")
9+
return 0;
10+
}

0 commit comments

Comments
 (0)