Skip to content

Commit 5cef2e0

Browse files
authored
Merge pull request #4 from boegel/cuda-device-code-sanity-check
extend `test_toy_cuda_sanity_check` to also check whether shared libraries under `lib/python*/site-packages` are being checked in CUDA sanity check
2 parents e73900c + 7e92cd5 commit 5cef2e0

File tree

1 file changed

+69
-50
lines changed

1 file changed

+69
-50
lines changed

test/framework/toy_build.py

Lines changed: 69 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -3121,9 +3121,22 @@ def test_toy_cuda_sanity_check(self):
31213121
topdir = os.path.dirname(os.path.abspath(__file__))
31223122
toy_ec = os.path.join(topdir, 'easyconfigs', 'test_ecs', 't', 'toy', 'toy-0.0.eb')
31233123

3124+
toy_bin = '%(installdir)s/bin/toy'
3125+
py_site_pkgs = '%(installdir)s/lib/python3.9/site-packages'
3126+
shlib_ext = get_shared_lib_ext()
3127+
31243128
toy_ec_cuda = os.path.join(self.test_prefix, 'toy-0.0-cuda.eb')
3125-
write_file(toy_ec_cuda, read_file(toy_ec) + "\ndependencies = [('CUDA', '5.5.22', '', SYSTEM)]")
3126-
toy_ec = toy_ec_cuda
3129+
toy_ec_txt = read_file(toy_ec)
3130+
toy_ec_txt += '\n' + '\n'.join([
3131+
"dependencies = [('CUDA', '5.5.22', '', SYSTEM)]",
3132+
"postinstallcmds += [",
3133+
" 'mkdir -p %(installdir)s/lib/python3.9/site-packages/plugins',",
3134+
# copy 'toy' binary, must be something that passes 'file' check in get_cuda_object_dump_raw
3135+
" 'cp %s %s/pytoy-cuda.cpython-39-x86_64-linux-gnu.%s'," % (toy_bin, py_site_pkgs, shlib_ext),
3136+
" 'cp %s %s/plugins/libpytoy_cuda.%s'," % (toy_bin, py_site_pkgs, shlib_ext),
3137+
"]",
3138+
])
3139+
write_file(toy_ec_cuda, toy_ec_txt)
31273140

31283141
# Create mock cuobjdump
31293142
# First, lets define sections of echo's for cuobjdump for various scenarios
@@ -3246,10 +3259,10 @@ def test_toy_cuda_sanity_check(self):
32463259
# If either of these fail their assert, print an informative, standardized message
32473260
def assert_regex(pattern, log, stdout=None):
32483261
regex = re.compile(pattern, re.M)
3249-
msg = "Pattern %s not found in full build log: %s" % (pattern, log)
3262+
msg = "Pattern '%s' not found in full build log: %s" % (pattern, log)
32503263
self.assertTrue(regex.search(log), msg)
32513264
if stdout is not None:
3252-
msg2 = "Pattern %s not found in standard output: %s" % (pattern, stdout)
3265+
msg2 = "Pattern '%s' not found in standard output: %s" % (pattern, stdout)
32533266
self.assertTrue(regex.search(stdout), msg2)
32543267

32553268
def assert_cuda_report(missing_cc, additional_cc, missing_ptx, log, stdout=None, missing_cc_but_ptx=None,
@@ -3281,9 +3294,9 @@ def assert_cuda_report(missing_cc, additional_cc, missing_ptx, log, stdout=None,
32813294
args = ['--cuda-compute-capabilities=8.0']
32823295
# We expect this to pass, so no need to check errors
32833296
with self.mocked_stdout_stderr():
3284-
outtxt = self._test_toy_build(ec_file=toy_ec, extra_args=args, raise_error=True)
3297+
outtxt = self._test_toy_build(ec_file=toy_ec_cuda, extra_args=args, raise_error=True)
32853298
stdout = self.get_stdout()
3286-
assert_cuda_report(missing_cc=0, additional_cc=0, missing_ptx=1, log=outtxt, stdout=stdout)
3299+
assert_cuda_report(missing_cc=0, additional_cc=0, missing_ptx=3, log=outtxt, stdout=stdout)
32873300

32883301
# Test case 1b: test with default options, --cuda-compute-capabilities=8.0 and a binary that contains
32893302
# 7.0 and 9.0 device code and 8.0 PTX code.
@@ -3300,29 +3313,29 @@ def assert_cuda_report(missing_cc, additional_cc, missing_ptx, log, stdout=None,
33003313
args = ['--cuda-compute-capabilities=8.0']
33013314
# We expect this to pass, so no need to check errors
33023315
with self.mocked_stdout_stderr():
3303-
outtxt = self._test_toy_build(ec_file=toy_ec, extra_args=args, raise_error=True)
3316+
outtxt = self._test_toy_build(ec_file=toy_ec_cuda, extra_args=args, raise_error=True)
33043317
stdout = self.get_stdout()
3305-
msg = "Pattern %s not found in full build log: %s" % (device_additional_70_90_code_regex.pattern, outtxt)
3318+
msg = "Pattern '%s' not found in full build log: %s" % (device_additional_70_90_code_regex.pattern, outtxt)
33063319
self.assertTrue(device_additional_70_90_code_regex.search(outtxt), msg)
3307-
msg = "Pattern %s not found in full build log: %s" % (device_missing_80_code_regex.pattern, outtxt)
3320+
msg = "Pattern '%s' not found in full build log: %s" % (device_missing_80_code_regex.pattern, outtxt)
33083321
self.assertTrue(device_missing_80_code_regex.search(outtxt), msg)
3309-
assert_cuda_report(missing_cc=1, additional_cc=1, missing_ptx=0, log=outtxt, stdout=stdout)
3322+
assert_cuda_report(missing_cc=3, additional_cc=3, missing_ptx=0, log=outtxt, stdout=stdout)
33103323

33113324
# Test case 2: same as Test case 1, but add --cuda-sanity-check-error-on-failed-checks
33123325
# This is expected to fail since there is missing device code for CC80
33133326
args = ['--cuda-compute-capabilities=8.0', '--cuda-sanity-check-error-on-failed-checks']
33143327
# We expect this to fail, so first check error, then run again to check output
3315-
error_pattern = r"Files missing CUDA device code: 1."
3328+
error_pattern = r"Files missing CUDA device code: 3."
33163329
with self.mocked_stdout_stderr():
3317-
self.assertErrorRegex(EasyBuildError, error_pattern, self._test_toy_build, ec_file=toy_ec,
3330+
self.assertErrorRegex(EasyBuildError, error_pattern, self._test_toy_build, ec_file=toy_ec_cuda,
33183331
extra_args=args, raise_error=True)
3319-
outtxt = self._test_toy_build(ec_file=toy_ec, extra_args=args, raise_error=False, verify=False)
3332+
outtxt = self._test_toy_build(ec_file=toy_ec_cuda, extra_args=args, raise_error=False, verify=False)
33203333
stdout = self.get_stdout()
3321-
msg = "Pattern %s not found in full build log: %s" % (device_additional_70_90_code_regex.pattern, outtxt)
3334+
msg = "Pattern '%s' not found in full build log: %s" % (device_additional_70_90_code_regex.pattern, outtxt)
33223335
self.assertTrue(device_additional_70_90_code_regex.search(outtxt), msg)
3323-
msg = "Pattern %s not found in full build log: %s" % (device_missing_80_code_regex.pattern, outtxt)
3336+
msg = "Pattern '%s' not found in full build log: %s" % (device_missing_80_code_regex.pattern, outtxt)
33243337
self.assertTrue(device_missing_80_code_regex.search(outtxt), msg)
3325-
assert_cuda_report(missing_cc=1, additional_cc=1, missing_ptx=0, log=outtxt, stdout=stdout)
3338+
assert_cuda_report(missing_cc=3, additional_cc=3, missing_ptx=0, log=outtxt, stdout=stdout)
33263339

33273340
# Test case 3: same as Test case 2, but add --cuda-sanity-check-accept-ptx-as-devcode
33283341
# This is expected to succeed, since now the PTX code for CC80 will be accepted as
@@ -3332,28 +3345,28 @@ def assert_cuda_report(missing_cc, additional_cc, missing_ptx, log, stdout=None,
33323345
'--cuda-sanity-check-accept-ptx-as-devcode']
33333346
# We expect this to pass, so no need to check errors
33343347
with self.mocked_stdout_stderr():
3335-
outtxt = self._test_toy_build(ec_file=toy_ec, extra_args=args, raise_error=True)
3348+
outtxt = self._test_toy_build(ec_file=toy_ec_cuda, extra_args=args, raise_error=True)
33363349
stdout = self.get_stdout()
3337-
msg = "Pattern %s not found in full build log: %s" % (device_additional_70_90_code_regex.pattern, outtxt)
3350+
msg = "Pattern '%s' not found in full build log: %s" % (device_additional_70_90_code_regex.pattern, outtxt)
33383351
self.assertTrue(device_additional_70_90_code_regex.search(outtxt), msg)
3339-
msg = "Pattern %s not found in full build log: %s" % (device_missing_80_code_regex.pattern, outtxt)
3352+
msg = "Pattern '%s' not found in full build log: %s" % (device_missing_80_code_regex.pattern, outtxt)
33403353
self.assertTrue(device_missing_80_code_regex.search(outtxt), msg)
3341-
assert_cuda_report(missing_cc=0, additional_cc=1, missing_ptx=0, log=outtxt, stdout=stdout,
3342-
missing_cc_but_ptx=1)
3354+
assert_cuda_report(missing_cc=0, additional_cc=3, missing_ptx=0, log=outtxt, stdout=stdout,
3355+
missing_cc_but_ptx=3)
33433356

33443357
# Test case 4: same as Test case 2, but run with --cuda-compute-capabilities=9.0
33453358
# This is expected to fail: device code is present, but PTX code for the highest CC (9.0) is missing
33463359
args = ['--cuda-compute-capabilities=9.0', '--cuda-sanity-check-error-on-failed-checks']
33473360
# We expect this to fail, so first check error, then run again to check output
3348-
error_pattern = r"Files missing CUDA PTX code: 1"
3361+
error_pattern = r"Files missing CUDA PTX code: 3"
33493362
with self.mocked_stdout_stderr():
3350-
self.assertErrorRegex(EasyBuildError, error_pattern, self._test_toy_build, ec_file=toy_ec,
3363+
self.assertErrorRegex(EasyBuildError, error_pattern, self._test_toy_build, ec_file=toy_ec_cuda,
33513364
extra_args=args, raise_error=True)
3352-
outtxt = self._test_toy_build(ec_file=toy_ec, extra_args=args, raise_error=False, verify=False)
3365+
outtxt = self._test_toy_build(ec_file=toy_ec_cuda, extra_args=args, raise_error=False, verify=False)
33533366
stdout = self.get_stdout()
3354-
msg = "Pattern %s not found in full build log: %s" % (device_additional_70_code_regex.pattern, outtxt)
3367+
msg = "Pattern '%s' not found in full build log: %s" % (device_additional_70_code_regex.pattern, outtxt)
33553368
self.assertTrue(device_additional_70_code_regex.search(outtxt), msg)
3356-
assert_cuda_report(missing_cc=0, additional_cc=1, missing_ptx=1, log=outtxt, stdout=stdout)
3369+
assert_cuda_report(missing_cc=0, additional_cc=3, missing_ptx=3, log=outtxt, stdout=stdout)
33573370

33583371
# Test case 5: same as Test case 4, but add --cuda-sanity-check-accept-missing-ptx
33593372
# This is expected to succeed: device code is present, PTX code is missing, but that's accepted
@@ -3365,43 +3378,49 @@ def assert_cuda_report(missing_cc, additional_cc, missing_ptx, log, stdout=None,
33653378
warning_pattern += r"\(PTX architectures supported in that file: \['8\.0'\]\)"
33663379
warning_pattern_regex = re.compile(warning_pattern, re.M)
33673380
with self.mocked_stdout_stderr():
3368-
outtxt = self._test_toy_build(ec_file=toy_ec, extra_args=args, raise_error=True)
3381+
outtxt = self._test_toy_build(ec_file=toy_ec_cuda, extra_args=args, raise_error=True)
33693382
stdout = self.get_stdout()
3370-
msg = "Pattern %s not found in full build log: %s" % (device_additional_70_code_regex.pattern, outtxt)
3383+
msg = "Pattern '%s' not found in full build log: %s" % (device_additional_70_code_regex.pattern, outtxt)
33713384
self.assertTrue(device_additional_70_code_regex.search(outtxt), msg)
3372-
msg = "Pattern %s not found in full build log: %s" % (warning_pattern, outtxt)
3385+
msg = "Pattern '%s' not found in full build log: %s" % (warning_pattern, outtxt)
33733386
self.assertTrue(warning_pattern_regex.search(outtxt), msg)
3374-
assert_cuda_report(missing_cc=0, additional_cc=1, missing_ptx=1, log=outtxt, stdout=stdout)
3387+
assert_cuda_report(missing_cc=0, additional_cc=3, missing_ptx=3, log=outtxt, stdout=stdout)
33753388

33763389
# Test case 6: same as Test case 5, but add --cuda-sanity-check-strict
33773390
# This is expected to fail: device code is present, PTX code is missing (but accepted due to option)
33783391
# but additional device code is present, which is not allowed by --cuda-sanity-check-strict
33793392
args = ['--cuda-compute-capabilities=9.0', '--cuda-sanity-check-error-on-failed-checks',
33803393
'--cuda-sanity-check-accept-missing-ptx', '--cuda-sanity-check-strict']
33813394
# We expect this to fail, so first check error, then run again to check output
3382-
error_pattern = r"Files with additional CUDA device code: 1"
3395+
error_pattern = r"Files with additional CUDA device code: 3"
33833396
with self.mocked_stdout_stderr():
3384-
self.assertErrorRegex(EasyBuildError, error_pattern, self._test_toy_build, ec_file=toy_ec,
3397+
self.assertErrorRegex(EasyBuildError, error_pattern, self._test_toy_build, ec_file=toy_ec_cuda,
33853398
extra_args=args, raise_error=True)
3386-
outtxt = self._test_toy_build(ec_file=toy_ec, extra_args=args, raise_error=False, verify=False)
3399+
outtxt = self._test_toy_build(ec_file=toy_ec_cuda, extra_args=args, raise_error=False, verify=False)
33873400
stdout = self.get_stdout()
3388-
msg = "Pattern %s not found in full build log: %s" % (device_additional_70_code_regex.pattern, outtxt)
3401+
msg = "Pattern '%s' not found in full build log: %s" % (device_additional_70_code_regex.pattern, outtxt)
33893402
self.assertTrue(device_additional_70_code_regex.search(outtxt), msg)
3390-
assert_cuda_report(missing_cc=0, additional_cc=1, missing_ptx=1, log=outtxt, stdout=stdout)
3403+
assert_cuda_report(missing_cc=0, additional_cc=3, missing_ptx=3, log=outtxt, stdout=stdout)
33913404

33923405
# Test case 7: same as Test case 6, but add the failing file to the cuda_sanity_ignore_files
33933406
# This is expected to succeed: the individual file which _would_ cause the sanity check to fail is
33943407
# now on the ignore list
33953408
toy_whitelist_ec = os.path.join(self.test_prefix, 'toy-0.0-cuda-whitelist.eb')
3396-
write_file(toy_whitelist_ec, read_file(toy_ec) + '\ncuda_sanity_ignore_files = ["bin/toy"]')
3409+
toy_ec_txt = read_file(toy_ec)
3410+
toy_ec_txt += '\n' + '\n'.join([
3411+
"dependencies = [('CUDA', '5.5.22', '', SYSTEM)]",
3412+
"cuda_sanity_ignore_files = ['bin/toy']",
3413+
])
3414+
write_file(toy_ec_cuda, toy_ec_txt)
3415+
write_file(toy_whitelist_ec, toy_ec_txt)
33973416

33983417
args = ['--cuda-compute-capabilities=9.0', '--cuda-sanity-check-error-on-failed-checks',
33993418
'--cuda-sanity-check-accept-missing-ptx', '--cuda-sanity-check-strict']
34003419
# We expect this to succeed, so check output for expected patterns
34013420
with self.mocked_stdout_stderr():
34023421
outtxt = self._test_toy_build(ec_file=toy_whitelist_ec, extra_args=args, raise_error=True, verify=False)
34033422
stdout = self.get_stdout()
3404-
msg = "Pattern %s not found in full build log: %s" % (device_additional_70_code_regex.pattern, outtxt)
3423+
msg = "Pattern '%s' not found in full build log: %s" % (device_additional_70_code_regex.pattern, outtxt)
34053424
self.assertTrue(device_additional_70_code_regex.search(outtxt), msg)
34063425
assert_cuda_report(missing_cc=0, additional_cc=1, missing_ptx=1, log=outtxt, stdout=stdout)
34073426

@@ -3420,15 +3439,15 @@ def assert_cuda_report(missing_cc, additional_cc, missing_ptx, log, stdout=None,
34203439
'--cuda-sanity-check-strict']
34213440
# We expect this to pass, so no need to check errors
34223441
with self.mocked_stdout_stderr():
3423-
outtxt = self._test_toy_build(ec_file=toy_ec, extra_args=args, raise_error=True)
3442+
outtxt = self._test_toy_build(ec_file=toy_ec_cuda, extra_args=args, raise_error=True)
34243443
stdout = self.get_stdout()
3425-
msg = "Pattern %s not found in full build log: %s" % (device_code_regex_success.pattern, outtxt)
3444+
msg = "Pattern '%s' not found in full build log: %s" % (device_code_regex_success.pattern, outtxt)
34263445
self.assertTrue(device_code_regex_success.search(outtxt), msg)
3427-
msg = "Pattern %s not found in full build log: %s" % (ptx_code_regex_success.pattern, outtxt)
3446+
msg = "Pattern '%s' not found in full build log: %s" % (ptx_code_regex_success.pattern, outtxt)
34283447
self.assertTrue(ptx_code_regex_success.search(outtxt), msg)
34293448
expected_result_pattern = "INFO Sanity check for toy successful"
34303449
expected_result = re.compile(expected_result_pattern, re.M)
3431-
msg = "Pattern %s not found in full build log: %s" % (expected_result, outtxt)
3450+
msg = "Pattern '%s' not found in full build log: %s" % (expected_result, outtxt)
34323451
self.assertTrue(expected_result.search(outtxt), msg)
34333452
assert_cuda_report(missing_cc=0, additional_cc=0, missing_ptx=0, log=outtxt, stdout=stdout)
34343453

@@ -3437,15 +3456,15 @@ def assert_cuda_report(missing_cc, additional_cc, missing_ptx, log, stdout=None,
34373456
args = ['--cuda-sanity-check-error-on-failed-checks', '--cuda-sanity-check-strict']
34383457
# We expect this to pass, so no need to check errors
34393458
with self.mocked_stdout_stderr():
3440-
outtxt = self._test_toy_build(ec_file=toy_ec, extra_args=args, raise_error=True)
3459+
outtxt = self._test_toy_build(ec_file=toy_ec_cuda, extra_args=args, raise_error=True)
34413460
stdout = self.get_stdout()
34423461
cuda_sanity_skipped = r"INFO Skipping CUDA sanity check, as no CUDA compute capabilities were configured"
34433462
cuda_sanity_skipped_regex = re.compile(cuda_sanity_skipped, re.M)
3444-
msg = "Pattern %s not found in full build log: %s" % (cuda_sanity_skipped, outtxt)
3463+
msg = "Pattern '%s' not found in full build log: %s" % (cuda_sanity_skipped, outtxt)
34453464
self.assertTrue(cuda_sanity_skipped_regex.search(outtxt), msg)
34463465
expected_result_pattern = "INFO Sanity check for toy successful"
34473466
expected_result = re.compile(expected_result_pattern, re.M)
3448-
msg = "Pattern %s not found in full build log: %s" % (expected_result, outtxt)
3467+
msg = "Pattern '%s' not found in full build log: %s" % (expected_result, outtxt)
34493468
self.assertTrue(expected_result.search(outtxt), msg)
34503469

34513470
# Test case 10: running with default options and a binary that does not contain ANY CUDA device code
@@ -3456,16 +3475,16 @@ def assert_cuda_report(missing_cc, additional_cc, missing_ptx, log, stdout=None,
34563475
args = ['--cuda-compute-capabilities=9.0']
34573476
# We expect this to pass, so no need to check errors
34583477
with self.mocked_stdout_stderr():
3459-
outtxt = self._test_toy_build(ec_file=toy_ec, extra_args=args, raise_error=True)
3478+
outtxt = self._test_toy_build(ec_file=toy_ec_cuda, extra_args=args, raise_error=True)
34603479
stdout = self.get_stdout()
34613480
no_cuda_pattern = r".*/bin/toy does not appear to be a CUDA executable \(no CUDA device code found\), "
34623481
no_cuda_pattern += r"so skipping CUDA sanity check"
34633482
no_cuda_regex = re.compile(no_cuda_pattern, re.M)
3464-
msg = "Pattern %s not found in full build log: %s" % (no_cuda_pattern, outtxt)
3483+
msg = "Pattern '%s' not found in full build log: %s" % (no_cuda_pattern, outtxt)
34653484
self.assertTrue(no_cuda_regex.search(outtxt), msg)
34663485
expected_result_pattern = "INFO Sanity check for toy successful"
34673486
expected_result = re.compile(expected_result_pattern, re.M)
3468-
msg = "Pattern %s not found in full build log: %s" % (expected_result, outtxt)
3487+
msg = "Pattern '%s' not found in full build log: %s" % (expected_result, outtxt)
34693488
self.assertTrue(expected_result.search(outtxt), msg)
34703489
assert_cuda_report(missing_cc=0, additional_cc=0, missing_ptx=0, log=outtxt, stdout=stdout, num_checked=0)
34713490

@@ -3474,16 +3493,16 @@ def assert_cuda_report(missing_cc, additional_cc, missing_ptx, log, stdout=None,
34743493
args = ['--cuda-compute-capabilities=9.0', '--cuda-sanity-check-error-on-failed-checks']
34753494
# We expect this to pass, so no need to check errors
34763495
with self.mocked_stdout_stderr():
3477-
outtxt = self._test_toy_build(ec_file=toy_ec, extra_args=args, raise_error=True)
3496+
outtxt = self._test_toy_build(ec_file=toy_ec_cuda, extra_args=args, raise_error=True)
34783497
stdout = self.get_stdout()
34793498
no_cuda_pattern = r".*/bin/toy does not appear to be a CUDA executable \(no CUDA device code found\), "
34803499
no_cuda_pattern += r"so skipping CUDA sanity check"
34813500
no_cuda_regex = re.compile(no_cuda_pattern, re.M)
3482-
msg = "Pattern %s not found in full build log: %s" % (no_cuda_pattern, outtxt)
3501+
msg = "Pattern '%s' not found in full build log: %s" % (no_cuda_pattern, outtxt)
34833502
self.assertTrue(no_cuda_regex.search(outtxt), msg)
34843503
expected_result_pattern = "INFO Sanity check for toy successful"
34853504
expected_result = re.compile(expected_result_pattern, re.M)
3486-
msg = "Pattern %s not found in full build log: %s" % (expected_result, outtxt)
3505+
msg = "Pattern '%s' not found in full build log: %s" % (expected_result, outtxt)
34873506
self.assertTrue(expected_result.search(outtxt), msg)
34883507
assert_cuda_report(missing_cc=0, additional_cc=0, missing_ptx=0, log=outtxt, stdout=stdout, num_checked=0)
34893508

0 commit comments

Comments
 (0)