Skip to content

Commit b01f21a

Browse files
committed
Copy build log and artifacts to a permanent location after failures
The files can be build in some selected build path (--buildpath), and the logs of successful compilation are then concentrated to some other location for permanent storage (--logfile-format). Logs of failed builds remain in the build path location so that they can be inspected. However, this setup is problematic when building software in HPC jobs. Quite often in HPC systems the build path is set to some fast storage local to the node, like NVME raid mounted on `/tmp` or `/dev/shm` (as suggested in the documentation: https://docs.easybuild.io/configuration/#buildpath). The node storage is often wiped out after the end of a job, so the log files and the artifacts are no longer available after the termination of the job. This commit adds options (--log-error-path and --artifact-error-path) to accumulate error logs and artifacts in some more permanent locations, so that the can be easily inspected after a failed build.
1 parent f7717a5 commit b01f21a

File tree

6 files changed

+294
-20
lines changed

6 files changed

+294
-20
lines changed

easybuild/framework/easyblock.py

Lines changed: 110 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,17 @@
7676
from easybuild.tools.config import EASYBUILD_SOURCES_URL # noqa
7777
from easybuild.tools.config import build_option, build_path, get_log_filename, get_repository, get_repositorypath
7878
from easybuild.tools.config import install_path, log_path, package_path, source_paths
79+
from easybuild.tools.config import get_log_error_path, get_artifact_error_path
7980
from easybuild.tools.environment import restore_env, sanitize_env
8081
from easybuild.tools.filetools import CHECKSUM_TYPE_MD5, CHECKSUM_TYPE_SHA256
8182
from easybuild.tools.filetools import adjust_permissions, apply_patch, back_up_file, change_dir, check_lock
82-
from easybuild.tools.filetools import compute_checksum, convert_name, copy_file, create_lock, create_patch_info
83-
from easybuild.tools.filetools import derive_alt_pypi_url, diff_files, dir_contains_files, download_file
84-
from easybuild.tools.filetools import encode_class_name, extract_file
83+
from easybuild.tools.filetools import convert_name, copy_file, copy_dir, create_lock, create_patch_info
84+
from easybuild.tools.filetools import create_unused_path, derive_alt_pypi_url, diff_files, dir_contains_files
85+
from easybuild.tools.filetools import download_file, encode_class_name, extract_file, compute_checksum
8586
from easybuild.tools.filetools import find_backup_name_candidate, get_source_tarball_from_git, is_alt_pypi_url
86-
from easybuild.tools.filetools import is_binary, is_sha256_checksum, mkdir, move_file, move_logs, read_file, remove_dir
87-
from easybuild.tools.filetools import remove_file, remove_lock, verify_checksum, weld_paths, write_file, symlink
87+
from easybuild.tools.filetools import is_binary, is_readable, is_sha256_checksum, mkdir, move_file, move_logs
88+
from easybuild.tools.filetools import read_file, remove_dir, remove_file, remove_lock, verify_checksum, weld_paths
89+
from easybuild.tools.filetools import write_file, symlink
8890
from easybuild.tools.hooks import BUILD_STEP, CLEANUP_STEP, CONFIGURE_STEP, EXTENSIONS_STEP, FETCH_STEP, INSTALL_STEP
8991
from easybuild.tools.hooks import MODULE_STEP, MODULE_WRITE, PACKAGE_STEP, PATCH_STEP, PERMISSIONS_STEP, POSTITER_STEP
9092
from easybuild.tools.hooks import POSTPROC_STEP, PREPARE_STEP, READY_STEP, SANITYCHECK_STEP, SOURCE_STEP
@@ -1045,16 +1047,23 @@ def moduleGenerator(self):
10451047
#
10461048
# DIRECTORY UTILITY FUNCTIONS
10471049
#
1048-
def gen_builddir(self):
1049-
"""Generate the (unique) name for the builddir"""
1050+
def get_relative_builddir_base_path(self):
1051+
"""Generate builddir base name relative to build_path"""
10501052
clean_name = remove_unwanted_chars(self.name)
10511053

10521054
# if a toolchain version starts with a -, remove the - so prevent a -- in the path name
10531055
tc = self.cfg['toolchain']
10541056
tcversion = tc['version'].lstrip('-')
10551057
lastdir = "%s%s-%s%s" % (self.cfg['versionprefix'], tc['name'], tcversion, self.cfg['versionsuffix'])
10561058

1057-
builddir = os.path.join(os.path.abspath(build_path()), clean_name, self.version, lastdir)
1059+
relative_builddir = os.path.join(clean_name, self.version, lastdir)
1060+
1061+
return relative_builddir
1062+
1063+
def gen_builddir(self):
1064+
"""Generate the (unique) name for the builddir"""
1065+
relative_builddir = self.get_relative_builddir_base_path()
1066+
builddir = os.path.join(os.path.abspath(build_path()), relative_builddir)
10581067

10591068
# make sure build dir is unique if cleanupoldbuild is False or not set
10601069
if not self.cfg.get('cleanupoldbuild', False):
@@ -4198,6 +4207,98 @@ def print_dry_run_note(loc, silent=True):
41984207
dry_run_msg(msg, silent=silent)
41994208

42004209

4210+
def persist_failed_compilation_log_and_artifacts(build_successful, application_log, silent, app, easyconfig):
4211+
persistent_dirs = {}
4212+
4213+
def get_dir(path, dirname):
4214+
nonlocal persistent_dirs
4215+
dirpath = persistent_dirs.get(dirname)
4216+
if dirpath is None:
4217+
dirpath = create_unused_path(path)
4218+
persistent_dirs[dirname] = dirpath
4219+
return persistent_dirs[dirname]
4220+
4221+
def get_artifact_dir(path):
4222+
return get_dir(path, 'artifact')
4223+
4224+
def get_log_dir(path):
4225+
return get_dir(path, 'log')
4226+
4227+
def initialize_persistent_dir_and_replicate_if_paths_distinct(replication_operation,
4228+
get_directory,
4229+
source_path,
4230+
destination_base_path,
4231+
destination_relative_path):
4232+
abs_source_path = os.path.abspath(source_path)
4233+
abs_destination_base_path = os.path.abspath(destination_base_path)
4234+
abs_destination_path = os.path.join(abs_destination_base_path, destination_relative_path)
4235+
4236+
if not os.path.exists(abs_source_path):
4237+
return
4238+
if os.path.realpath(abs_source_path) == os.path.realpath(abs_destination_path):
4239+
return
4240+
4241+
final_abs_destination_base_path = get_directory(destination_base_path)
4242+
final_abs_destination_path = os.path.join(final_abs_destination_base_path, destination_relative_path)
4243+
4244+
replication_operation(abs_source_path, final_abs_destination_path)
4245+
4246+
if not application_log:
4247+
return
4248+
4249+
# there may be multiple log files, or the file name may be different due to zipping
4250+
logs = glob.glob('%s*' % application_log)
4251+
print_msg(
4252+
"Results of the build can be found in the temporary log file(s) %s" % ', '.join(logs),
4253+
log=_log,
4254+
silent=silent
4255+
)
4256+
4257+
if build_successful:
4258+
return
4259+
4260+
datetime_stamp = time.strftime("%Y%m%d") + '-' + time.strftime("%H%M%S")
4261+
4262+
errorlog_path = get_log_error_path(ec=easyconfig)
4263+
if errorlog_path is not None:
4264+
errorlog_path = os.path.join(errorlog_path, datetime_stamp)
4265+
4266+
for log_file in logs:
4267+
initialize_persistent_dir_and_replicate_if_paths_distinct(
4268+
copy_file,
4269+
get_log_dir,
4270+
log_file,
4271+
errorlog_path,
4272+
os.path.basename(log_file)
4273+
)
4274+
4275+
print_msg(
4276+
"Build logs of failed build copied to permanent storage: %s" % errorlog_path,
4277+
log=_log,
4278+
silent=silent
4279+
)
4280+
4281+
errorartifact_path = get_artifact_error_path(ec=easyconfig)
4282+
if errorartifact_path is not None:
4283+
errorartifact_path = os.path.join(errorartifact_path, datetime_stamp)
4284+
4285+
builddir = app.builddir
4286+
if is_readable(builddir):
4287+
initialize_persistent_dir_and_replicate_if_paths_distinct(
4288+
copy_dir,
4289+
get_artifact_dir,
4290+
builddir,
4291+
errorartifact_path,
4292+
app.get_relative_builddir_base_path()
4293+
)
4294+
4295+
print_msg(
4296+
"Build artifacts of failed build copied to permanent storage: %s" % errorartifact_path,
4297+
log=_log,
4298+
silent=silent
4299+
)
4300+
4301+
42014302
def build_and_install_one(ecdict, init_env):
42024303
"""
42034304
Build the software
@@ -4456,10 +4557,7 @@ def ensure_writable_log_dir(log_dir):
44564557
else:
44574558
dry_run_msg("(no ignored errors during dry run)\n", silent=silent)
44584559

4459-
if application_log:
4460-
# there may be multiple log files, or the file name may be different due to zipping
4461-
logs = glob.glob('%s*' % application_log)
4462-
print_msg("Results of the build can be found in the log file(s) %s" % ', '.join(logs), log=_log, silent=silent)
4560+
persist_failed_compilation_log_and_artifacts(success, application_log, silent, app, ecdict['ec'])
44634561

44644562
del app
44654563

easybuild/tools/config.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,11 @@
104104
DEFAULT_MODULE_SYNTAX = 'Lua'
105105
DEFAULT_MODULES_TOOL = 'Lmod'
106106
DEFAULT_PATH_SUBDIRS = {
107+
'artifact_error_path': 'error_artifacts',
107108
'buildpath': 'build',
108109
'containerpath': 'containers',
109110
'installpath': '',
111+
'log_error_path': 'error_log',
110112
'packagepath': 'packages',
111113
'repositorypath': 'ebfiles_repo',
112114
'sourcepath': 'sources',
@@ -475,13 +477,15 @@ class ConfigurationVariables(BaseConfigurationVariables):
475477

476478
# list of known/required keys
477479
REQUIRED = [
480+
'artifact_error_path',
478481
'buildpath',
479482
'config',
480483
'containerpath',
481484
'installpath',
482485
'installpath_modules',
483486
'installpath_software',
484487
'job_backend',
488+
'log_error_path',
485489
'logfile_format',
486490
'moduleclasses',
487491
'module_naming_scheme',
@@ -843,6 +847,58 @@ def log_path(ec=None):
843847
return log_file_format(return_directory=True, ec=ec, date=date, timestamp=timestamp)
844848

845849

850+
def get_log_error_path(ec=None):
851+
"""
852+
Return the 'log_error_path', the location where logs are copied in case of failure
853+
854+
:param ec: dict-like value with at least the keys 'name' and 'version' defined
855+
"""
856+
log_error_path = ConfigurationVariables()['log_error_path']
857+
858+
if not log_error_path:
859+
return None
860+
861+
if ec is None:
862+
raise EasyBuildError("Easyconfig cannot be empty.")
863+
864+
name, version = ec.get('name'), ec.get('version')
865+
866+
if name is None:
867+
raise EasyBuildError("The 'name' key is not defined.")
868+
if version is None:
869+
raise EasyBuildError("The 'version' key is not defined.")
870+
871+
path = os.path.join(log_error_path, name + '-' + version)
872+
873+
return path
874+
875+
876+
def get_artifact_error_path(ec=None):
877+
"""
878+
Return the 'artifact_error_path', the location where build directories are copied in case of failure
879+
880+
:param ec: dict-like value with at least the keys 'name' and 'version' defined
881+
"""
882+
artifact_error_path = ConfigurationVariables()['artifact_error_path']
883+
884+
if not artifact_error_path:
885+
return None
886+
887+
if ec is None:
888+
raise EasyBuildError("Easyconfig cannot be empty.")
889+
890+
name, version = ec.get('name'), ec.get('version')
891+
892+
if name is None:
893+
raise EasyBuildError("The 'name' key is not defined.")
894+
if version is None:
895+
raise EasyBuildError("The 'version' key is not defined.")
896+
897+
path = os.path.join(artifact_error_path, name + '-' + version)
898+
899+
return path
900+
901+
846902
def get_build_log_path():
847903
"""
848904
Return (temporary) directory for build log

easybuild/tools/options.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,8 @@ def config_options(self):
559559
descr = ("Configuration options", "Configure EasyBuild behavior.")
560560

561561
opts = OrderedDict({
562+
'artifact-error-path': ("Location where artifacts are copied in case of an error",
563+
None, 'store', mk_full_default_path('artifact_error_path')),
562564
'avail-module-naming-schemes': ("Show all supported module naming schemes",
563565
None, 'store_true', False,),
564566
'avail-modules-tools': ("Show all supported module tools",
@@ -589,6 +591,8 @@ def config_options(self):
589591
None, 'store', None),
590592
'job-backend': ("Backend to use for submitting jobs", 'choice', 'store',
591593
DEFAULT_JOB_BACKEND, sorted(avail_job_backends().keys())),
594+
'log-error-path': ("Location where logs and artifacts are copied in case of an error",
595+
None, 'store', mk_full_default_path('log_error_path')),
592596
# purposely take a copy for the default logfile format
593597
'logfile-format': ("Directory name and format of the log file",
594598
'strtuple', 'store', DEFAULT_LOGFILE_FORMAT[:], {'metavar': 'DIR,FORMAT'}),
@@ -1153,8 +1157,8 @@ def _postprocess_config(self):
11531157
# - the <path> could also specify the location of a *remote* (Git( repository,
11541158
# which can be done in variety of formats (git@<url>:<org>/<repo>), https://<url>, etc.)
11551159
# (see also https://github.com/easybuilders/easybuild-framework/issues/3892);
1156-
path_opt_names = ['buildpath', 'containerpath', 'git_working_dirs_path', 'installpath',
1157-
'installpath_modules', 'installpath_software', 'prefix', 'packagepath',
1160+
path_opt_names = ['artifact_error_path', 'buildpath', 'containerpath', 'git_working_dirs_path', 'installpath',
1161+
'installpath_modules', 'installpath_software', 'log_error_path', 'prefix', 'packagepath',
11581162
'robot_paths', 'sourcepath']
11591163

11601164
for opt_name in path_opt_names:
@@ -1163,8 +1167,8 @@ def _postprocess_config(self):
11631167
if self.options.prefix is not None:
11641168
# prefix applies to all paths, and repository has to be reinitialised to take new repositorypath in account
11651169
# in the legacy-style configuration, repository is initialised in configuration file itself
1166-
path_opts = ['buildpath', 'containerpath', 'installpath', 'packagepath', 'repository', 'repositorypath',
1167-
'sourcepath']
1170+
path_opts = ['artifact_error_path', 'buildpath', 'containerpath', 'installpath', 'log_error_path',
1171+
'packagepath', 'repository', 'repositorypath', 'sourcepath']
11681172
for dest in path_opts:
11691173
if not self.options._action_taken.get(dest, False):
11701174
if dest == 'repository':

test/framework/options.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5283,7 +5283,16 @@ def test_prefix_option(self):
52835283

52845284
regex = re.compile(r"(?P<cfg_opt>\S*).*%s.*" % self.test_prefix, re.M)
52855285

5286-
expected = ['buildpath', 'containerpath', 'installpath', 'packagepath', 'prefix', 'repositorypath']
5286+
expected = [
5287+
'artifact-error-path',
5288+
'buildpath',
5289+
'containerpath',
5290+
'installpath',
5291+
'log-error-path',
5292+
'packagepath',
5293+
'prefix',
5294+
'repositorypath',
5295+
]
52875296
self.assertEqual(sorted(regex.findall(txt)), expected)
52885297

52895298
def test_dump_env_script(self):
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
--- a/toy-0.0.orig/toy.source 2014-03-06 18:48:16.000000000 +0100
2+
+++ b/toy-0.0/toy.source 2020-08-18 12:19:35.000000000 +0200
3+
@@ -2,6 +2,6 @@
4+
5+
int main(int argc, char* argv[]){
6+
7+
- printf("I'm a toy, and proud of it.\n");
8+
+ printf("I'm a toy, and proud of it.\n")
9+
return 0;
10+
}

0 commit comments

Comments
 (0)