Skip to content

Commit e30995a

Browse files
committed
Create CUDA cache (for JIT compiled PTX code) in build dir instead of $HOME
Add option to control the cache size and to disable it
1 parent 5c695af commit e30995a

File tree

6 files changed

+98
-1
lines changed

6 files changed

+98
-1
lines changed

easybuild/framework/easyblock.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2150,6 +2150,22 @@ def prepare_step(self, start_dir=True, load_tc_deps_modules=True):
21502150
self.log.info("Loading extra modules: %s", extra_modules)
21512151
self.modules_tool.load(extra_modules)
21522152

2153+
# Setup CUDA cache if required. If we don't do this, CUDA will use the $HOME for its cache files
2154+
if get_software_root('CUDA') or get_software_root('CUDAcore'):
2155+
cuda_cache_maxsize = build_option('cuda_cache_maxsize')
2156+
if cuda_cache_maxsize is None:
2157+
cuda_cache_maxsize = 1 * 1024 * 1024 * 1024 # 1 GB default value
2158+
if cuda_cache_maxsize == 0:
2159+
self.log.info('Disabling CUDA PTX cache as per request')
2160+
env.setvar('CUDA_CACHE_DISABLE', '1')
2161+
else:
2162+
cuda_cache_dir = os.path.join(self.builddir, 'eb-cuda-cache')
2163+
self.log.info('Enabling CUDA PTX cache of size %s MB at %s',
2164+
cuda_cache_maxsize / 1024 / 1024, cuda_cache_dir)
2165+
env.setvar('CUDA_CACHE_DISABLE', '0')
2166+
env.setvar('CUDA_CACHE_PATH', cuda_cache_dir)
2167+
env.setvar('CUDA_CACHE_MAXSIZE', str(cuda_cache_maxsize))
2168+
21532169
# guess directory to start configure/build/install process in, and move there
21542170
if start_dir:
21552171
self.guess_start_dir()

easybuild/tools/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
169169
'container_image_name',
170170
'container_template_recipe',
171171
'container_tmpdir',
172+
'cuda_cache_maxsize',
172173
'cuda_compute_capabilities',
173174
'download_timeout',
174175
'dump_test_report',

easybuild/tools/options.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,9 @@ def override_options(self):
357357
'consider-archived-easyconfigs': ("Also consider archived easyconfigs", None, 'store_true', False),
358358
'containerize': ("Generate container recipe/image", None, 'store_true', False, 'C'),
359359
'copy-ec': ("Copy specified easyconfig(s) to specified location", None, 'store_true', False),
360+
'cuda-cache-maxsize': ("Maximum size of the CUDA cache (in bytes) used for JIT compilation of PTX code. "
361+
"Leave value empty to let EasyBuild choose a value or '0' to disable the cache",
362+
int, 'store_or_None', None),
360363
'cuda-compute-capabilities': ("List of CUDA compute capabilities to use when building GPU software; "
361364
"values should be specified as digits separated by a dot, "
362365
"for example: 3.5,5.0,7.2", 'strlist', 'extend', None),

test/framework/easyblock.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1761,6 +1761,57 @@ def test_prepare_step_hmns(self):
17611761
self.assertEqual(len(loaded_modules), 1)
17621762
self.assertEqual(loaded_modules[0]['mod_name'], 'GCC/6.4.0-2.28')
17631763

1764+
def test_prepare_step_cuda_cache(self):
1765+
"""Test handling cuda-cache-maxsize option."""
1766+
1767+
init_config(build_options={'cuda_cache_maxsize': None}) # Automatic mode
1768+
1769+
test_ecs = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'easyconfigs', 'test_ecs')
1770+
toy_ec = os.path.join(test_ecs, 't', 'toy', 'toy-0.0.eb')
1771+
ec = process_easyconfig(toy_ec)[0]
1772+
eb = EasyBlock(ec['ec'])
1773+
eb.silent = True
1774+
eb.make_builddir()
1775+
1776+
eb.prepare_step(start_dir=False)
1777+
logtxt = read_file(eb.logfile)
1778+
self.assertNotIn('Disabling CUDA PTX cache', logtxt)
1779+
self.assertNotIn('Enabling CUDA PTX cache', logtxt)
1780+
1781+
# Now with CUDA
1782+
test_ec = os.path.join(self.test_prefix, 'test.eb')
1783+
test_ectxt = re.sub('^toolchain = .*', "toolchain = {'name': 'gcccuda', 'version': '2018a'}",
1784+
read_file(toy_ec), flags=re.M)
1785+
write_file(test_ec, test_ectxt)
1786+
ec = process_easyconfig(test_ec)[0]
1787+
eb = EasyBlock(ec['ec'])
1788+
eb.silent = True
1789+
eb.make_builddir()
1790+
1791+
write_file(eb.logfile, '')
1792+
eb.prepare_step(start_dir=False)
1793+
logtxt = read_file(eb.logfile)
1794+
self.assertNotIn('Disabling CUDA PTX cache', logtxt)
1795+
self.assertIn('Enabling CUDA PTX cache', logtxt)
1796+
self.assertEqual(os.environ['CUDA_CACHE_DISABLE'], '0')
1797+
1798+
init_config(build_options={'cuda_cache_maxsize': 0}) # Disable
1799+
write_file(eb.logfile, '')
1800+
eb.prepare_step(start_dir=False)
1801+
logtxt = read_file(eb.logfile)
1802+
self.assertIn('Disabling CUDA PTX cache', logtxt)
1803+
self.assertNotIn('Enabling CUDA PTX cache', logtxt)
1804+
self.assertEqual(os.environ['CUDA_CACHE_DISABLE'], '1')
1805+
1806+
init_config(build_options={'cuda_cache_maxsize': 1234567890}) # Specified size
1807+
write_file(eb.logfile, '')
1808+
eb.prepare_step(start_dir=False)
1809+
logtxt = read_file(eb.logfile)
1810+
self.assertNotIn('Disabling CUDA PTX cache', logtxt)
1811+
self.assertIn('Enabling CUDA PTX cache', logtxt)
1812+
self.assertEqual(os.environ['CUDA_CACHE_DISABLE'], '0')
1813+
self.assertEqual(os.environ['CUDA_CACHE_MAXSIZE'], '1234567890')
1814+
17641815
def test_checksum_step(self):
17651816
"""Test checksum step"""
17661817
testdir = os.path.abspath(os.path.dirname(__file__))

test/framework/modules.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454

5555

5656
# number of modules included for testing purposes
57-
TEST_MODULES_COUNT = 81
57+
TEST_MODULES_COUNT = 82
5858

5959

6060
class ModulesTest(EnhancedTestCase):
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#%Module
2+
3+
proc ModulesHelp { } {
4+
puts stderr { GCC based compiler toolchain with CUDA support, and including
5+
OpenMPI for MPI support, OpenBLAS (BLAS and LAPACK support), FFTW and ScaLAPACK. - Homepage: (none)
6+
}
7+
}
8+
9+
module-whatis {GNU Compiler Collection (GCC) based compiler toolchain, along with CUDA toolkit. - Homepage: (none)}
10+
11+
set root /prefix/software/gcccuda/2018a
12+
13+
conflict gcccuda
14+
15+
if { ![is-loaded GCC/6.4.0-2.28] } {
16+
module load GCC/6.4.0-2.28
17+
}
18+
19+
if { ![is-loaded CUDA/9.1.85] } {
20+
module load CUDA/9.1.85
21+
}
22+
23+
24+
setenv EBROOTGCCCUDA "$root"
25+
setenv EBVERSIONGCCCUDA "2018a"
26+
setenv EBDEVELGCCCUDA "$root/easybuild/gcccuda-2018a-easybuild-devel"

0 commit comments

Comments
 (0)