Skip to content

Commit 4b29b70

Browse files
authored
Merge pull request #3291 from boegel/lock_cleanup
clean up locks when EasyBuild session is cancelled with signal like SIGTERM
2 parents f0164b5 + 27ab7e0 commit 4b29b70

File tree

7 files changed

+379
-66
lines changed

7 files changed

+379
-66
lines changed

easybuild/framework/easyblock.py

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,12 @@
7171
from easybuild.tools.config import install_path, log_path, package_path, source_paths
7272
from easybuild.tools.environment import restore_env, sanitize_env
7373
from easybuild.tools.filetools import CHECKSUM_TYPE_MD5, CHECKSUM_TYPE_SHA256
74-
from easybuild.tools.filetools import adjust_permissions, apply_patch, back_up_file
75-
from easybuild.tools.filetools import change_dir, convert_name, compute_checksum, copy_file, derive_alt_pypi_url
76-
from easybuild.tools.filetools import diff_files, download_file, encode_class_name, extract_file
74+
from easybuild.tools.filetools import adjust_permissions, apply_patch, back_up_file, change_dir, convert_name
75+
from easybuild.tools.filetools import compute_checksum, copy_file, check_lock, create_lock, derive_alt_pypi_url
76+
from easybuild.tools.filetools import diff_files, dir_contains_files, download_file, encode_class_name, extract_file
7777
from easybuild.tools.filetools import find_backup_name_candidate, get_source_tarball_from_git, is_alt_pypi_url
7878
from easybuild.tools.filetools import is_binary, is_sha256_checksum, mkdir, move_file, move_logs, read_file, remove_dir
79-
from easybuild.tools.filetools import remove_file, verify_checksum, weld_paths, write_file, dir_contains_files
79+
from easybuild.tools.filetools import remove_file, remove_lock, verify_checksum, weld_paths, write_file
8080
from easybuild.tools.hooks import BUILD_STEP, CLEANUP_STEP, CONFIGURE_STEP, EXTENSIONS_STEP, FETCH_STEP, INSTALL_STEP
8181
from easybuild.tools.hooks import MODULE_STEP, PACKAGE_STEP, PATCH_STEP, PERMISSIONS_STEP, POSTITER_STEP, POSTPROC_STEP
8282
from easybuild.tools.hooks import PREPARE_STEP, READY_STEP, SANITYCHECK_STEP, SOURCE_STEP, TEST_STEP, TESTCASES_STEP
@@ -3096,30 +3096,14 @@ def run_all_steps(self, run_test_cases):
30963096
if ignore_locks:
30973097
self.log.info("Ignoring locks...")
30983098
else:
3099-
locks_dir = build_option('locks_dir') or os.path.join(install_path('software'), '.locks')
3100-
lock_path = os.path.join(locks_dir, '%s.lock' % self.installdir.replace('/', '_'))
3101-
3102-
# if lock already exists, either abort or wait until it disappears
3103-
if os.path.exists(lock_path):
3104-
wait_on_lock = build_option('wait_on_lock')
3105-
if wait_on_lock:
3106-
while os.path.exists(lock_path):
3107-
print_msg("lock %s exists, waiting %d seconds..." % (lock_path, wait_on_lock),
3108-
silent=self.silent)
3109-
time.sleep(wait_on_lock)
3110-
else:
3111-
raise EasyBuildError("Lock %s already exists, aborting!", lock_path)
3099+
lock_name = self.installdir.replace('/', '_')
31123100

3113-
# create lock to avoid that another installation running in parallel messes things up;
3114-
# we use a directory as a lock, since that's atomically created
3115-
try:
3116-
mkdir(lock_path, parents=True)
3117-
except EasyBuildError as err:
3118-
# clean up the error message a bit, get rid of the "Failed to create directory" part + quotes
3119-
stripped_err = str(err).split(':', 1)[1].strip().replace("'", '').replace('"', '')
3120-
raise EasyBuildError("Failed to create lock %s: %s", lock_path, stripped_err)
3101+
# check if lock already exists;
3102+
# either aborts with an error or waits until it disappears (depends on --wait-on-lock)
3103+
check_lock(lock_name)
31213104

3122-
self.log.info("Lock created: %s", lock_path)
3105+
# create lock to avoid that another installation running in parallel messes things up
3106+
create_lock(lock_name)
31233107

31243108
try:
31253109
for (step_name, descr, step_methods, skippable) in steps:
@@ -3137,8 +3121,7 @@ def run_all_steps(self, run_test_cases):
31373121
pass
31383122
finally:
31393123
if not ignore_locks:
3140-
remove_dir(lock_path)
3141-
self.log.info("Lock removed: %s", lock_path)
3124+
remove_lock(lock_name)
31423125

31433126
# return True for successfull build (or stopped build)
31443127
return True

easybuild/main.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
from easybuild.tools.containers.common import containerize
5858
from easybuild.tools.docs import list_software
5959
from easybuild.tools.filetools import adjust_permissions, cleanup, copy_file, copy_files, dump_index, load_index
60-
from easybuild.tools.filetools import read_file, write_file
60+
from easybuild.tools.filetools import read_file, register_lock_cleanup_signal_handlers, write_file
6161
from easybuild.tools.github import check_github, close_pr, new_branch_github, find_easybuild_easyconfig
6262
from easybuild.tools.github import install_github_token, list_prs, new_pr, new_pr_from_branch, merge_pr
6363
from easybuild.tools.github import sync_branch_with_develop, sync_pr_with_develop, update_branch, update_pr
@@ -189,6 +189,9 @@ def main(args=None, logfile=None, do_build=None, testing=False, modtool=None):
189189
:param do_build: whether or not to actually perform the build
190190
:param testing: enable testing mode
191191
"""
192+
193+
register_lock_cleanup_signal_handlers()
194+
192195
# if $CDPATH is set, unset it, it'll only cause trouble...
193196
# see https://github.com/easybuilders/easybuild-framework/issues/2944
194197
if 'CDPATH' in os.environ:
@@ -518,5 +521,5 @@ def main(args=None, logfile=None, do_build=None, testing=False, modtool=None):
518521
main()
519522
except EasyBuildError as err:
520523
print_error(err.msg)
521-
except KeyboardInterrupt:
522-
print_error("Cancelled by user (keyboard interrupt)")
524+
except KeyboardInterrupt as err:
525+
print_error("Cancelled by user: %s" % err)

easybuild/tools/config.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@
102102
DEFAULT_PNS = 'EasyBuildPNS'
103103
DEFAULT_PREFIX = os.path.join(os.path.expanduser('~'), ".local", "easybuild")
104104
DEFAULT_REPOSITORY = 'FileRepository'
105+
DEFAULT_WAIT_ON_LOCK_INTERVAL = 60
106+
DEFAULT_WAIT_ON_LOCK_LIMIT = 0
105107

106108
EBROOT_ENV_VAR_ACTIONS = [ERROR, IGNORE, UNSET, WARN]
107109
LOADED_MODULES_ACTIONS = [ERROR, IGNORE, PURGE, UNLOAD, WARN]
@@ -211,6 +213,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
211213
'subdir_user_modules',
212214
'test_report_env_filter',
213215
'testoutput',
216+
'wait_on_lock',
214217
'umask',
215218
'zip_logs',
216219
],
@@ -256,7 +259,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
256259
'use_f90cache',
257260
'use_existing_modules',
258261
'set_default_module',
259-
'wait_on_lock',
262+
'wait_on_lock_limit',
260263
],
261264
True: [
262265
'cleanup_builddir',
@@ -305,6 +308,9 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
305308
DEFAULT_ALLOW_LOADED_MODULES: [
306309
'allow_loaded_modules',
307310
],
311+
DEFAULT_WAIT_ON_LOCK_INTERVAL: [
312+
'wait_on_lock_interval',
313+
],
308314
}
309315
# build option that do not have a perfectly matching command line option
310316
BUILD_OPTIONS_OTHER = {

easybuild/tools/filetools.py

Lines changed: 130 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import os
4949
import re
5050
import shutil
51+
import signal
5152
import stat
5253
import sys
5354
import tempfile
@@ -59,7 +60,7 @@
5960
from easybuild.tools import run
6061
# import build_log must stay, to use of EasyBuildLog
6162
from easybuild.tools.build_log import EasyBuildError, dry_run_msg, print_msg, print_warning
62-
from easybuild.tools.config import GENERIC_EASYBLOCK_PKG, build_option
63+
from easybuild.tools.config import DEFAULT_WAIT_ON_LOCK_INTERVAL, GENERIC_EASYBLOCK_PKG, build_option, install_path
6364
from easybuild.tools.py2vs3 import std_urllib, string_type
6465
from easybuild.tools.utilities import nub, remove_unwanted_chars
6566

@@ -155,6 +156,9 @@
155156
'.tar.z': "tar xzf %(filepath)s",
156157
}
157158

159+
# global set of names of locks that were created in this session
160+
global_lock_names = set()
161+
158162

159163
class ZlibChecksum(object):
160164
"""
@@ -1513,6 +1517,131 @@ def mkdir(path, parents=False, set_gid=None, sticky=None):
15131517
_log.debug("Not creating existing path %s" % path)
15141518

15151519

1520+
def det_lock_path(lock_name):
1521+
"""
1522+
Determine full path for lock with specifed name.
1523+
"""
1524+
locks_dir = build_option('locks_dir') or os.path.join(install_path('software'), '.locks')
1525+
return os.path.join(locks_dir, lock_name + '.lock')
1526+
1527+
1528+
def create_lock(lock_name):
1529+
"""Create lock with specified name."""
1530+
1531+
lock_path = det_lock_path(lock_name)
1532+
_log.info("Creating lock at %s...", lock_path)
1533+
try:
1534+
# we use a directory as a lock, since that's atomically created
1535+
mkdir(lock_path, parents=True)
1536+
global_lock_names.add(lock_name)
1537+
except EasyBuildError as err:
1538+
# clean up the error message a bit, get rid of the "Failed to create directory" part + quotes
1539+
stripped_err = str(err).split(':', 1)[1].strip().replace("'", '').replace('"', '')
1540+
raise EasyBuildError("Failed to create lock %s: %s", lock_path, stripped_err)
1541+
_log.info("Lock created: %s", lock_path)
1542+
1543+
1544+
def check_lock(lock_name):
1545+
"""
1546+
Check whether a lock with specified name already exists.
1547+
1548+
If it exists, either wait until it's released, or raise an error
1549+
(depending on --wait-on-lock configuration option).
1550+
"""
1551+
lock_path = det_lock_path(lock_name)
1552+
if os.path.exists(lock_path):
1553+
_log.info("Lock %s exists!", lock_path)
1554+
1555+
wait_interval = build_option('wait_on_lock_interval')
1556+
wait_limit = build_option('wait_on_lock_limit')
1557+
1558+
# --wait-on-lock is deprecated, should use --wait-on-lock-limit and --wait-on-lock-interval instead
1559+
wait_on_lock = build_option('wait_on_lock')
1560+
if wait_on_lock is not None:
1561+
depr_msg = "Use of --wait-on-lock is deprecated, use --wait-on-lock-limit and --wait-on-lock-interval"
1562+
_log.deprecated(depr_msg, '5.0')
1563+
1564+
# if --wait-on-lock-interval has default value and --wait-on-lock is specified too, the latter wins
1565+
# (required for backwards compatibility)
1566+
if wait_interval == DEFAULT_WAIT_ON_LOCK_INTERVAL and wait_on_lock > 0:
1567+
wait_interval = wait_on_lock
1568+
1569+
# if --wait-on-lock-limit is not specified we need to wait indefinitely if --wait-on-lock is specified,
1570+
# since the original semantics of --wait-on-lock was that it specified the waiting time interval (no limit)
1571+
if not wait_limit:
1572+
wait_limit = -1
1573+
1574+
# wait limit could be zero (no waiting), -1 (no waiting limit) or non-zero value (waiting limit in seconds)
1575+
if wait_limit != 0:
1576+
wait_time = 0
1577+
while os.path.exists(lock_path) and (wait_limit == -1 or wait_time < wait_limit):
1578+
print_msg("lock %s exists, waiting %d seconds..." % (lock_path, wait_interval),
1579+
silent=build_option('silent'))
1580+
time.sleep(wait_interval)
1581+
wait_time += wait_interval
1582+
1583+
if os.path.exists(lock_path) and wait_limit != -1 and wait_time >= wait_limit:
1584+
error_msg = "Maximum wait time for lock %s to be released reached: %s sec >= %s sec"
1585+
raise EasyBuildError(error_msg, lock_path, wait_time, wait_limit)
1586+
else:
1587+
_log.info("Lock %s was released!", lock_path)
1588+
else:
1589+
raise EasyBuildError("Lock %s already exists, aborting!", lock_path)
1590+
else:
1591+
_log.info("Lock %s does not exist", lock_path)
1592+
1593+
1594+
def remove_lock(lock_name):
1595+
"""
1596+
Remove lock with specified name.
1597+
"""
1598+
lock_path = det_lock_path(lock_name)
1599+
_log.info("Removing lock %s...", lock_path)
1600+
remove_dir(lock_path)
1601+
if lock_name in global_lock_names:
1602+
global_lock_names.remove(lock_name)
1603+
_log.info("Lock removed: %s", lock_path)
1604+
1605+
1606+
def clean_up_locks():
1607+
"""
1608+
Clean up all still existing locks that were created in this session.
1609+
"""
1610+
for lock_name in list(global_lock_names):
1611+
remove_lock(lock_name)
1612+
1613+
1614+
def clean_up_locks_signal_handler(signum, frame):
1615+
"""
1616+
Signal handler, cleans up locks & exits with received signal number.
1617+
"""
1618+
1619+
if not build_option('silent'):
1620+
print_warning("signal received (%s), cleaning up locks (%s)..." % (signum, ', '.join(global_lock_names)))
1621+
clean_up_locks()
1622+
1623+
# by default, a KeyboardInterrupt is raised with SIGINT, so keep doing so
1624+
if signum == signal.SIGINT:
1625+
raise KeyboardInterrupt("keyboard interrupt")
1626+
else:
1627+
sys.exit(signum)
1628+
1629+
1630+
def register_lock_cleanup_signal_handlers():
1631+
"""
1632+
Register signal handler for signals that cancel the current EasyBuild session,
1633+
so we can clean up the locks that were created first.
1634+
"""
1635+
signums = [
1636+
signal.SIGABRT,
1637+
signal.SIGINT, # Ctrl-C
1638+
signal.SIGTERM, # signal 15, soft kill (like when Slurm job is cancelled or received timeout)
1639+
signal.SIGQUIT, # kinda like Ctrl-C
1640+
]
1641+
for signum in signums:
1642+
signal.signal(signum, clean_up_locks_signal_handler)
1643+
1644+
15161645
def expand_glob_paths(glob_paths):
15171646
"""Expand specified glob paths to a list of unique non-glob paths to only files."""
15181647
paths = []

easybuild/tools/options.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@
6464
from easybuild.tools.config import DEFAULT_JOB_BACKEND, DEFAULT_LOGFILE_FORMAT, DEFAULT_MAX_FAIL_RATIO_PERMS
6565
from easybuild.tools.config import DEFAULT_MNS, DEFAULT_MODULE_SYNTAX, DEFAULT_MODULES_TOOL, DEFAULT_MODULECLASSES
6666
from easybuild.tools.config import DEFAULT_PATH_SUBDIRS, DEFAULT_PKG_RELEASE, DEFAULT_PKG_TOOL, DEFAULT_PKG_TYPE
67-
from easybuild.tools.config import DEFAULT_PNS, DEFAULT_PREFIX, DEFAULT_REPOSITORY, EBROOT_ENV_VAR_ACTIONS, ERROR
68-
from easybuild.tools.config import FORCE_DOWNLOAD_CHOICES, GENERAL_CLASS, IGNORE, JOB_DEPS_TYPE_ABORT_ON_ERROR
69-
from easybuild.tools.config import JOB_DEPS_TYPE_ALWAYS_RUN, LOADED_MODULES_ACTIONS, WARN
70-
from easybuild.tools.config import LOCAL_VAR_NAMING_CHECK_WARN, LOCAL_VAR_NAMING_CHECKS
67+
from easybuild.tools.config import DEFAULT_PNS, DEFAULT_PREFIX, DEFAULT_REPOSITORY, DEFAULT_WAIT_ON_LOCK_INTERVAL
68+
from easybuild.tools.config import DEFAULT_WAIT_ON_LOCK_LIMIT, EBROOT_ENV_VAR_ACTIONS, ERROR, FORCE_DOWNLOAD_CHOICES
69+
from easybuild.tools.config import GENERAL_CLASS, IGNORE, JOB_DEPS_TYPE_ABORT_ON_ERROR, JOB_DEPS_TYPE_ALWAYS_RUN
70+
from easybuild.tools.config import LOADED_MODULES_ACTIONS, LOCAL_VAR_NAMING_CHECK_WARN, LOCAL_VAR_NAMING_CHECKS, WARN
7171
from easybuild.tools.config import get_pretend_installpath, init, init_build_options, mk_full_default_path
7272
from easybuild.tools.configobj import ConfigObj, ConfigObjError
7373
from easybuild.tools.docs import FORMAT_TXT, FORMAT_RST
@@ -76,9 +76,8 @@
7676
from easybuild.tools.docs import list_easyblocks, list_toolchains
7777
from easybuild.tools.environment import restore_env, unset_env_vars
7878
from easybuild.tools.filetools import CHECKSUM_TYPE_SHA256, CHECKSUM_TYPES, install_fake_vsc, move_file, which
79-
from easybuild.tools.github import GITHUB_EB_MAIN, GITHUB_EASYCONFIGS_REPO
80-
from easybuild.tools.github import GITHUB_PR_DIRECTION_DESC, GITHUB_PR_ORDER_CREATED, GITHUB_PR_STATE_OPEN
81-
from easybuild.tools.github import GITHUB_PR_STATES, GITHUB_PR_ORDERS, GITHUB_PR_DIRECTIONS
79+
from easybuild.tools.github import GITHUB_EB_MAIN, GITHUB_PR_DIRECTION_DESC, GITHUB_PR_ORDER_CREATED
80+
from easybuild.tools.github import GITHUB_PR_STATE_OPEN, GITHUB_PR_STATES, GITHUB_PR_ORDERS, GITHUB_PR_DIRECTIONS
8281
from easybuild.tools.github import HAVE_GITHUB_API, HAVE_KEYRING, VALID_CLOSE_PR_REASONS
8382
from easybuild.tools.github import fetch_easyblocks_from_pr, fetch_github_token
8483
from easybuild.tools.hooks import KNOWN_HOOKS
@@ -442,8 +441,15 @@ def override_options(self):
442441
None, 'store_true', False),
443442
'verify-easyconfig-filenames': ("Verify whether filename of specified easyconfigs matches with contents",
444443
None, 'store_true', False),
445-
'wait-on-lock': ("Wait interval (in seconds) to use when waiting for existing lock to be removed "
446-
"(0: implies no waiting, but exiting with an error)", int, 'store', 0),
444+
'wait-on-lock': ("Wait for lock to be released; 0 implies no waiting (exit with an error if the lock "
445+
"already exists), non-zero value specified waiting interval [DEPRECATED: "
446+
"use --wait-on-lock-interval and --wait-on-lock-limit instead]",
447+
int, 'store_or_None', None),
448+
'wait-on-lock-interval': ("Wait interval (in seconds) to use when waiting for existing lock to be removed",
449+
int, 'store', DEFAULT_WAIT_ON_LOCK_INTERVAL),
450+
'wait-on-lock-limit': ("Maximum amount of time (in seconds) to wait until lock is released (0 means no "
451+
"waiting at all, exit with error; -1 means no waiting limit, keep waiting)",
452+
int, 'store', DEFAULT_WAIT_ON_LOCK_LIMIT),
447453
'zip-logs': ("Zip logs that are copied to install directory, using specified command",
448454
None, 'store_or_None', 'gzip'),
449455

0 commit comments

Comments
 (0)