Skip to content

Commit 2655a07

Browse files
authored
Merge pull request #2 from boegel/cuda-device-code-sanity-check
rename `--cuda-sanity-check-error-on-fail` to `--cuda-sanity-check-error-on-failed-checks` + improve help text for `--cuda-sanity-check-*` configuration options
2 parents b6eb063 + 22858ec commit 2655a07

File tree

5 files changed

+46
-43
lines changed

5 files changed

+46
-43
lines changed

easybuild/framework/easyblock.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3398,7 +3398,7 @@ def sanity_check_cuda(self, cuda_dirs=None):
33983398

33993399
fail_msgs = []
34003400
cfg_ccs = build_option('cuda_compute_capabilities') or self.cfg.get('cuda_compute_capabilities', None)
3401-
ignore_failures = not build_option('cuda_sanity_check_error_on_fail')
3401+
ignore_failures = not build_option('cuda_sanity_check_error_on_failed_checks')
34023402
strict_cc_check = build_option('cuda_sanity_check_strict')
34033403
accept_ptx_as_devcode = build_option('cuda_sanity_check_accept_ptx_as_devcode')
34043404
accept_missing_ptx = build_option('cuda_sanity_check_accept_missing_ptx')
@@ -3484,13 +3484,13 @@ def format_file_list(files_list):
34843484
missing_devcodes = list(set(cfg_ccs) - set(found_dev_code_ccs))
34853485

34863486
# There are two reasons for ignoring failures:
3487-
# - We are running with --disable-cuda-sanity-check-error-on-fail
3487+
# - We are running with --disable-cuda-sanity-check-error-on-failed-checks
34883488
# - The specific {path} is on the cuda_sanity_ignore_files in the easyconfig
34893489
# In case we run with both, we'll just report that we're running with
3490-
# --disable-cuda-sanity-check-error-on-fail
3490+
# --disable-cuda-sanity-check-error-on-failed-checks
34913491
if ignore_failures:
34923492
ignore_msg = f"Failure for {path} will be ignored since we are not running with "
3493-
ignore_msg += "--cuda-sanity-check-error-on-fail"
3493+
ignore_msg += "--cuda-sanity-check-error-on-failed-checks"
34943494
else:
34953495
ignore_msg = f"This failure will be ignored as '{path}' is listed in "
34963496
ignore_msg += "'cuda_sanity_ignore_files'."
@@ -3511,8 +3511,8 @@ def format_file_list(files_list):
35113511
if strict_cc_check:
35123512
# cuda-sanity-check-strict, so no additional compute capabilities allowed
35133513
if path in ignore_file_list or ignore_failures:
3514-
# No error, because either path is on the cuda_sanity_ignore_files list in the
3515-
# easyconfig, or we are running with --disable-cuda-sanity-check-error-on-fail
3514+
# No error, either path is in cuda_sanity_ignore_files list in easyconfig,
3515+
# or we are running with --disable-cuda-sanity-check-error-on-failed-checks
35163516
files_additional_devcode_ignored.append(os.path.relpath(path, self.installdir))
35173517
fail_msg += ignore_msg
35183518
else:
@@ -3551,7 +3551,7 @@ def format_file_list(files_list):
35513551
if path in ignore_file_list or ignore_failures:
35523552
# No error, because either path is on the cuda_sanity_ignore_files list in
35533553
# the easyconfig, or we are running with
3554-
# --disable-cuda-sanity-check-error-on-fail
3554+
# --disable-cuda-sanity-check-error-on-failed-checks
35553555
files_missing_devcode_ignored.append(os.path.relpath(path, self.installdir))
35563556
fail_msg += ignore_msg
35573557
else:
@@ -3562,8 +3562,8 @@ def format_file_list(files_list):
35623562
# This is considered a failure
35633563
files_missing_devcode.append(os.path.relpath(path, self.installdir))
35643564
if path in ignore_file_list or ignore_failures:
3565-
# No error, because either path is on the cuda_sanity_ignore_files list in the
3566-
# easyconfig, or we are running with --disable-cuda-sanity-check-error-on-fail
3565+
# No error, either path is in cuda_sanity_ignore_files list in easyconfig,
3566+
# or we are running with --disable-cuda-sanity-check-error-on-failed-checks
35673567
files_missing_devcode_ignored.append(os.path.relpath(path, self.installdir))
35683568
fail_msg += ignore_msg
35693569
else:
@@ -3585,7 +3585,7 @@ def format_file_list(files_list):
35853585
fail_msg += "(PTX architectures supported in that file: %s). "
35863586
if path in ignore_file_list or ignore_failures:
35873587
# No error, because either path is on the cuda_sanity_ignore_files list in the
3588-
# easyconfig, or we are running with --disable-cuda-sanity-check-error-on-fail
3588+
# easyconfig, or we are running with --disable-cuda-sanity-check-error-on-failed-checks
35893589
files_missing_ptx_ignored.append(os.path.relpath(path, self.installdir))
35903590
fail_msg += ignore_msg
35913591
self.log.warning(fail_msg, highest_cc[0], path, found_ptx_ccs)
@@ -3616,7 +3616,7 @@ def trace_and_log(msg):
36163616
elif ignore_failures:
36173617
msg = f"Number of files missing one or more CUDA Compute Capabilities: {len(files_missing_devcode)}"
36183618
trace_and_log(msg)
3619-
trace_and_log("(not running with --cuda-sanity-check-error-on-fail, so not considered failures)")
3619+
trace_and_log("(not running with --cuda-sanity-check-error-on-failed-checks, so not considered failures)")
36203620
else:
36213621
msg = f"Number of files missing one or more CUDA Compute Capabilities: {len(files_missing_devcode)}"
36223622
msg += f" (ignored: {len(files_missing_devcode_ignored)}, "
@@ -3633,7 +3633,7 @@ def trace_and_log(msg):
36333633
msg = "Number of files with device code for more CUDA Compute Capabilities than requested: "
36343634
msg += f"{len(files_additional_devcode)}"
36353635
trace_and_log(msg)
3636-
trace_and_log("(not running with --cuda-sanity-check-error-on-fail, so not considered failures)")
3636+
trace_and_log("(not running with --cuda-sanity-check-error-on-failed-checks, so not considered failures)")
36373637
elif strict_cc_check:
36383638
msg = "Number of files with device code for more CUDA Compute Capabilities than requested: "
36393639
msg += f"{len(files_additional_devcode)} (ignored: {len(files_additional_devcode_ignored)}, "
@@ -3650,7 +3650,7 @@ def trace_and_log(msg):
36503650
msg = "Number of files missing PTX code for the highest configured CUDA Compute Capability: "
36513651
msg += f"{len(files_missing_ptx)}"
36523652
trace_and_log(msg)
3653-
trace_and_log("(not running with --cuda-sanity-check-error-on-fail, so not considered failures)")
3653+
trace_and_log("(not running with --cuda-sanity-check-error-on-failed-checks, so not considered failures)")
36543654
elif accept_missing_ptx:
36553655
msg = "Number of files missing PTX code for the highest configured CUDA Compute Capability: "
36563656
msg += f"{len(files_missing_ptx)}"

easybuild/tools/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
307307
'container_build_image',
308308
'cuda_sanity_check_accept_ptx_as_devcode',
309309
'cuda_sanity_check_accept_missing_ptx',
310-
'cuda_sanity_check_error_on_fail',
310+
'cuda_sanity_check_error_on_failed_checks',
311311
'cuda_sanity_check_strict',
312312
'debug',
313313
'debug_lmod',

easybuild/tools/options.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -406,30 +406,31 @@ def override_options(self):
406406
"--cuda-sanity-check-accept-ptx-as-devcode, "
407407
"or made more stringent using --cuda-sanity-check-strict.",
408408
'strlist', 'extend', None),
409-
'cuda-sanity-check-accept-missing-ptx': ("CUDA sanity check also passes if PTX code for the highest "
409+
'cuda-sanity-check-accept-missing-ptx': ("Relax CUDA sanity check to accept that PTX code for the highest "
410410
"requested CUDA compute capability is not present (but will "
411411
"print a warning)",
412412
None, 'store_true', False),
413-
'cuda-sanity-check-accept-ptx-as-devcode': ("CUDA sanity check also passes if requested device code is "
414-
"not present, as long as PTX code is present that can be "
415-
"JIT-compiled for each target in --cuda-compute-capabilities "
416-
"E.g. if --cuda-compute-capabilities=8.0 and a binary is "
417-
"found in the installation that does not have device code for "
418-
"8.0, but it does have PTX code for 7.0, the sanity check "
419-
"will pass if, and only if, this option is True. "
413+
'cuda-sanity-check-accept-ptx-as-devcode': ("Relax CUDA sanity check to accept that requested device code "
414+
"is not present, as long as PTX code is present that can be "
415+
"JIT-compiled for each target in --cuda-compute-capabilities. "
416+
"For example, if --cuda-compute-capabilities=8.0 and a binary "
417+
"is found in the installation that does not have device code "
418+
"for 8.0, but it does have PTX code for 7.0, the sanity check "
419+
"will pass if, and only if, this option is enabled. "
420420
"Note that JIT-compiling means the binary will work on the "
421421
"requested architecture, but is it not necessarily as well "
422422
"optimized as when actual device code is present for the "
423423
"requested architecture ",
424424
None, 'store_true', False),
425-
'cuda-sanity-check-error-on-fail': ("If True, failures in the CUDA sanity check will produce an error. "
426-
"If False, the CUDA sanity check will be performed, and failures will "
427-
"be reported, but they will not result in an error",
428-
None, 'store_true', False),
425+
'cuda-sanity-check-error-on-failed-checks': ("If enabled, failures in the CUDA sanity check will produce "
426+
"an error. If disabled, the CUDA sanity check will be "
427+
"performed and failures will be reported through warnings, "
428+
"but they will not result in an error",
429+
None, 'store_true', False),
429430
'cuda-sanity-check-strict': ("Perform strict CUDA sanity check. Without this option, the CUDA sanity "
430431
"check will fail if the CUDA binaries don't contain code for (at least) "
431-
"all compute capabilities defined in --cude-compute-capabilities, but will "
432-
"accept if code for additional compute capabilities is present. "
432+
"all compute capabilities defined in --cude-compute-capabilities, "
433+
"but will accept if code for additional compute capabilities is present. "
433434
"With this setting, the sanity check will also fail if code is present for "
434435
"more compute capabilities than defined in --cuda-compute-capabilities.",
435436
None, 'store_true', False),

easybuild/tools/systemtools.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,12 @@
2727
2828
Authors:
2929
30+
* Kenneth Hoste (Ghent University)
3031
* Jens Timmerman (Ghent University)
3132
* Ward Poelmans (Ghent University)
3233
* Jasper Grimm (UoY)
3334
* Jan Andre Reuter (Forschungszentrum Juelich GmbH)
35+
* Caspar van Leeuwen (SURF)
3436
"""
3537
import csv
3638
import ctypes

0 commit comments

Comments
 (0)