Skip to content

Commit e1ee851

Browse files
authored
Merge pull request #4474 from smoors/data
add support for data installations
2 parents 79e458a + fea2ae1 commit e1ee851

File tree

7 files changed

+78
-33
lines changed

7 files changed

+78
-33
lines changed

easybuild/framework/easyblock.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@
8282
from easybuild.tools.config import MOD_SEARCH_PATH_HEADERS, PYTHONPATH, SEARCH_PATH_BIN_DIRS, SEARCH_PATH_LIB_DIRS
8383
from easybuild.tools.config import build_option, build_path, get_failed_install_build_dirs_path
8484
from easybuild.tools.config import get_failed_install_logs_path, get_log_filename, get_repository, get_repositorypath
85-
from easybuild.tools.config import install_path, log_path, package_path, source_paths
85+
from easybuild.tools.config import install_path, log_path, package_path, source_paths, source_paths_data
86+
from easybuild.tools.config import DATA, SOFTWARE
8687
from easybuild.tools.environment import restore_env, sanitize_env
8788
from easybuild.tools.filetools import CHECKSUM_TYPE_SHA256
8889
from easybuild.tools.filetools import adjust_permissions, apply_patch, back_up_file, change_dir, check_lock
@@ -167,12 +168,13 @@ def __init__(self, ec, logfile=None):
167168
# list of patch/source files, along with checksums
168169
self.patches = []
169170
self.src = []
171+
self.data_src = []
170172
self.checksums = []
171173
self.json_checksums = None
172174

173175
# build/install directories
174176
self.builddir = None
175-
self.installdir = None # software
177+
self.installdir = None # software or data
176178
self.installdir_mod = None # module file
177179

178180
# extensions
@@ -522,11 +524,11 @@ def fetch_sources(self, sources=None, checksums=None):
522524
Add a list of source files (can be tarballs, isos, urls).
523525
All source files will be checked if a file exists (or can be located)
524526
525-
:param sources: list of sources to fetch (if None, use 'sources' easyconfig parameter)
527+
:param sources: list of sources to fetch (if None, use 'sources' or 'data_sources' easyconfig parameter)
526528
:param checksums: list of checksums for sources
527529
"""
528530
if sources is None:
529-
sources = self.cfg['sources']
531+
sources = self.cfg['sources'] or self.cfg['data_sources']
530532
if checksums is None:
531533
checksums = self.cfg['checksums']
532534

@@ -804,7 +806,10 @@ def obtain_file(self, filename, extension=False, urls=None, download_filename=No
804806
:param download_instructions: instructions to manually add source (used for complex cases)
805807
:param alt_location: alternative location to use instead of self.name
806808
"""
807-
srcpaths = source_paths()
809+
if self.cfg['data_sources']:
810+
srcpaths = source_paths_data()
811+
else:
812+
srcpaths = source_paths()
808813

809814
# We don't account for the checksums file in the progress bar
810815
if filename != 'checksum.json':
@@ -1169,7 +1174,10 @@ def gen_installdir(self):
11691174
"""
11701175
Generate the name of the installation directory.
11711176
"""
1172-
basepath = install_path()
1177+
if self.cfg['data_sources']:
1178+
basepath = install_path(DATA)
1179+
else:
1180+
basepath = install_path(SOFTWARE)
11731181
if basepath:
11741182
self.install_subdir = ActiveMNS().det_install_subdir(self.cfg)
11751183
self.installdir = os.path.join(os.path.abspath(basepath), self.install_subdir)
@@ -2598,8 +2606,10 @@ def fetch_step(self, skip_checksums=False):
25982606
# fetch sources
25992607
if self.cfg['sources']:
26002608
self.fetch_sources(self.cfg['sources'], checksums=self.cfg['checksums'])
2609+
elif self.cfg['data_sources']:
2610+
self.fetch_sources(self.cfg['data_sources'], checksums=self.cfg['checksums'])
26012611
else:
2602-
self.log.info('no sources provided')
2612+
self.log.info('no sources or data_sources provided')
26032613

26042614
if self.dry_run:
26052615
# actual list of patches is printed via _obtain_file_dry_run method
@@ -5108,8 +5118,8 @@ def make_checksum_lines(checksums, indent_level):
51085118
if app.src:
51095119
placeholder = '# PLACEHOLDER FOR SOURCES/PATCHES WITH CHECKSUMS'
51105120

5111-
# grab raw lines for source_urls, sources, patches
5112-
keys = ['patches', 'source_urls', 'sources']
5121+
# grab raw lines for source_urls, sources, data_sources, patches
5122+
keys = ['data_sources', 'patches', 'source_urls', 'sources']
51135123
raw = {}
51145124
for key in keys:
51155125
regex = re.compile(r'^(%s(?:.|\n)*?\])\s*$' % key, re.M)
@@ -5123,10 +5133,12 @@ def make_checksum_lines(checksums, indent_level):
51235133
# inject combination of source_urls/sources/patches/checksums into easyconfig
51245134
# by replacing first occurence of placeholder that was put in place
51255135
sources_raw = raw.get('sources', '')
5136+
data_sources_raw = raw.get('data_sources', '')
51265137
source_urls_raw = raw.get('source_urls', '')
51275138
patches_raw = raw.get('patches', '')
51285139
regex = re.compile(placeholder + '\n', re.M)
5129-
ectxt = regex.sub(source_urls_raw + sources_raw + patches_raw + checksums_txt + '\n', ectxt, count=1)
5140+
ectxt = regex.sub(source_urls_raw + sources_raw + data_sources_raw + patches_raw + checksums_txt + '\n',
5141+
ectxt, count=1)
51305142

51315143
# get rid of potential remaining placeholders
51325144
ectxt = regex.sub('', ectxt)

easybuild/framework/easyconfig/default.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
'checksums': [[], "Checksums for sources and patches", BUILD],
9393
'configopts': ['', 'Extra options passed to configure (default already has --prefix)', BUILD],
9494
'cuda_compute_capabilities': [[], "List of CUDA compute capabilities to build with (if supported)", BUILD],
95+
'data_sources': [[], "List of source files for data", BUILD],
9596
'download_instructions': ['', "Specify steps to acquire necessary file, if obtaining it is difficult", BUILD],
9697
'easyblock': [None, "EasyBlock to use for building; if set to None, an easyblock is selected "
9798
"based on the software name", BUILD],
@@ -132,7 +133,7 @@
132133
'skip_mod_files_sanity_check': [False, "Skip the check for .mod files in a GCCcore level install", BUILD],
133134
'skipsteps': [[], "Skip these steps", BUILD],
134135
'source_urls': [[], "List of URLs for source files", BUILD],
135-
'sources': [[], "List of source files", BUILD],
136+
'sources': [[], "List of source files for software", BUILD],
136137
'stop': [None, 'Keyword to halt the build process after a certain step.', BUILD],
137138
'testopts': ['', 'Extra options for test.', BUILD],
138139
'tests': [[], ("List of test-scripts to run after install. A test script should return a "

easybuild/framework/easyconfig/format/format.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
['name', 'version', 'versionprefix', 'versionsuffix'],
6363
['homepage', 'description'],
6464
['toolchain', 'toolchainopts'],
65-
['source_urls', 'sources', 'patches', 'checksums'],
65+
['source_urls', 'sources', 'data_sources', 'patches', 'checksums'],
6666
DEPENDENCY_PARAMETERS + ['multi_deps'],
6767
['osdependencies'],
6868
['preconfigopts', 'configopts'],

easybuild/tools/config.py

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@
7171

7272
EMPTY_LIST = 'empty_list'
7373

74+
DATA = 'data'
75+
MODULES = 'modules'
76+
SOFTWARE = 'software'
77+
7478
PKG_TOOL_FPM = 'fpm'
7579
PKG_TYPE_RPM = 'rpm'
7680

@@ -112,8 +116,10 @@
112116
'packagepath': 'packages',
113117
'repositorypath': 'ebfiles_repo',
114118
'sourcepath': 'sources',
115-
'subdir_modules': 'modules',
116-
'subdir_software': 'software',
119+
'sourcepath_data': 'sources',
120+
'subdir_data': DATA,
121+
'subdir_modules': MODULES,
122+
'subdir_software': SOFTWARE,
117123
}
118124
DEFAULT_PKG_RELEASE = '1'
119125
DEFAULT_PKG_TOOL = PKG_TOOL_FPM
@@ -478,6 +484,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
478484
('chem', "Chemistry, Computational Chemistry and Quantum Chemistry"),
479485
('compiler', "Compilers"),
480486
('data', "Data management & processing tools"),
487+
('dataset', "Datasets"),
481488
('debugger', "Debuggers"),
482489
('devel', "Development tools"),
483490
('geo', "Earth Sciences"),
@@ -512,6 +519,7 @@ class ConfigurationVariables(BaseConfigurationVariables):
512519
'failed_install_build_dirs_path',
513520
'failed_install_logs_path',
514521
'installpath',
522+
'installpath_data',
515523
'installpath_modules',
516524
'installpath_software',
517525
'job_backend',
@@ -526,6 +534,8 @@ class ConfigurationVariables(BaseConfigurationVariables):
526534
'repository',
527535
'repositorypath',
528536
'sourcepath',
537+
'sourcepath_data',
538+
'subdir_data',
529539
'subdir_modules',
530540
'subdir_software',
531541
'tmp_logdir',
@@ -569,16 +579,20 @@ def init(options, config_options_dict):
569579
"""
570580
tmpdict = copy.deepcopy(config_options_dict)
571581

572-
# make sure source path is a list
573-
sourcepath = tmpdict['sourcepath']
574-
if isinstance(sourcepath, str):
575-
tmpdict['sourcepath'] = sourcepath.split(':')
576-
_log.debug("Converted source path ('%s') to a list of paths: %s" % (sourcepath, tmpdict['sourcepath']))
577-
elif not isinstance(sourcepath, (tuple, list)):
578-
raise EasyBuildError(
579-
"Value for sourcepath has invalid type (%s): %s", type(sourcepath), sourcepath,
580-
exit_code=EasyBuildExit.OPTION_ERROR
581-
)
582+
if tmpdict['sourcepath_data'] is None:
583+
tmpdict['sourcepath_data'] = tmpdict['sourcepath'][:]
584+
585+
for srcpath in ['sourcepath', 'sourcepath_data']:
586+
# make sure source path is a list
587+
sourcepath = tmpdict[srcpath]
588+
if isinstance(sourcepath, str):
589+
tmpdict[srcpath] = sourcepath.split(':')
590+
_log.debug("Converted source path ('%s') to a list of paths: %s" % (sourcepath, tmpdict[srcpath]))
591+
elif not isinstance(sourcepath, (tuple, list)):
592+
raise EasyBuildError(
593+
"Value for %s has invalid type (%s): %s", srcpath, type(sourcepath), sourcepath,
594+
exit_code=EasyBuildExit.OPTION_ERROR
595+
)
582596

583597
# initialize configuration variables (any future calls to ConfigurationVariables() will yield the same instance
584598
variables = ConfigurationVariables(tmpdict, ignore_unknown_keys=True)
@@ -704,11 +718,18 @@ def build_path():
704718

705719
def source_paths():
706720
"""
707-
Return the list of source paths
721+
Return the list of source paths for software
708722
"""
709723
return ConfigurationVariables()['sourcepath']
710724

711725

726+
def source_paths_data():
727+
"""
728+
Return the list of source paths for data
729+
"""
730+
return ConfigurationVariables()['sourcepath_data']
731+
732+
712733
def source_path():
713734
"""NO LONGER SUPPORTED: use source_paths instead"""
714735
_log.nosupport("source_path() is replaced by source_paths()", '2.0')
@@ -717,15 +738,16 @@ def source_path():
717738
def install_path(typ=None):
718739
"""
719740
Returns the install path
720-
- subdir 'software' for actual installation (default)
741+
- subdir 'software' for actual software installation (default)
721742
- subdir 'modules' for environment modules (typ='mod')
743+
- subdir 'data' for data installation (typ='data')
722744
"""
723745
if typ is None:
724-
typ = 'software'
746+
typ = SOFTWARE
725747
elif typ == 'mod':
726-
typ = 'modules'
748+
typ = MODULES
727749

728-
known_types = ['modules', 'software']
750+
known_types = [MODULES, SOFTWARE, DATA]
729751
if typ not in known_types:
730752
raise EasyBuildError(
731753
"Unknown type specified in install_path(): %s (known: %s)", typ, ', '.join(known_types),

easybuild/tools/options.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,8 @@ def config_options(self):
611611
'strlist', 'store', []),
612612
'installpath': ("Install path for software and modules",
613613
None, 'store', mk_full_default_path('installpath')),
614+
'installpath-data': ("Install path for data (if None, combine --installpath and --subdir-data)",
615+
None, 'store', None),
614616
'installpath-modules': ("Install path for modules (if None, combine --installpath and --subdir-modules)",
615617
None, 'store', None),
616618
'installpath-software': ("Install path for software (if None, combine --installpath and --subdir-software)",
@@ -644,7 +646,7 @@ def config_options(self):
644646
None, 'store', mk_full_default_path('packagepath')),
645647
'package-naming-scheme': ("Packaging naming scheme choice",
646648
'choice', 'store', DEFAULT_PNS, sorted(avail_package_naming_schemes().keys())),
647-
'prefix': (("Change prefix for buildpath, installpath, sourcepath and repositorypath "
649+
'prefix': (("Change prefix for buildpath, installpath, sourcepath, sourcepath-data, and repositorypath "
648650
"(used prefix for defaults %s)" % DEFAULT_PREFIX),
649651
None, 'store', None),
650652
'recursive-module-unload': ("Enable generating of modules that unload recursively.",
@@ -659,8 +661,12 @@ def config_options(self):
659661
'store', DEFAULT_SEARCH_PATH_CPP_HEADERS, [*SEARCH_PATH["cpp_headers"]]),
660662
'search-path-linker': ("Search path used at build time by the linker for libraries", 'choice',
661663
'store', DEFAULT_SEARCH_PATH_LINKER, [*SEARCH_PATH["linker"]]),
662-
'sourcepath': ("Path(s) to where sources should be downloaded (string, colon-separated)",
664+
'sourcepath': ("Path(s) to where software sources should be downloaded (string, colon-separated)",
663665
None, 'store', mk_full_default_path('sourcepath')),
666+
'sourcepath-data': ("Path(s) to where data sources should be downloaded (string, colon-separated) "
667+
"(same as sourcepath if not specified)", None, 'store', None),
668+
'subdir-data': ("Installpath subdir for data",
669+
None, 'store', DEFAULT_PATH_SUBDIRS['subdir_data']),
664670
'subdir-modules': ("Installpath subdir for modules", None, 'store', DEFAULT_PATH_SUBDIRS['subdir_modules']),
665671
'subdir-software': ("Installpath subdir for software",
666672
None, 'store', DEFAULT_PATH_SUBDIRS['subdir_software']),
@@ -1234,7 +1240,7 @@ def _postprocess_config(self):
12341240
# (see also https://github.com/easybuilders/easybuild-framework/issues/3892);
12351241
path_opt_names = ['buildpath', 'containerpath', 'failed_install_build_dirs_path', 'failed_install_logs_path',
12361242
'git_working_dirs_path', 'installpath', 'installpath_modules', 'installpath_software',
1237-
'prefix', 'packagepath', 'robot_paths', 'sourcepath']
1243+
'installpath_data', 'prefix', 'packagepath', 'robot_paths', 'sourcepath', 'sourcepath_data']
12381244

12391245
for opt_name in path_opt_names:
12401246
self._ensure_abs_path(opt_name)
@@ -1244,7 +1250,7 @@ def _postprocess_config(self):
12441250
# repository has to be reinitialised to take new repositorypath in account;
12451251
# in the legacy-style configuration, repository is initialised in configuration file itself;
12461252
path_opts = ['buildpath', 'containerpath', 'installpath', 'packagepath', 'repository', 'repositorypath',
1247-
'sourcepath']
1253+
'sourcepath', 'sourcepath_data']
12481254
for dest in path_opts:
12491255
if not self.options._action_taken.get(dest, False):
12501256
if dest == 'repository':

test/framework/options.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5335,6 +5335,7 @@ def test_show_config(self):
53355335
'EASYBUILD_INSTALLPATH',
53365336
'EASYBUILD_ROBOT_PATHS',
53375337
'EASYBUILD_SOURCEPATH',
5338+
'EASYBUILD_SOURCEPATH_DATA',
53385339
]
53395340
for key in os.environ.keys():
53405341
if key.startswith('EASYBUILD_') and key not in retained_eb_env_vars:
@@ -5368,6 +5369,7 @@ def test_show_config(self):
53685369
r"robot-paths\s* \(E\) = " + os.path.join(test_dir, 'easyconfigs', 'test_ecs'),
53695370
r"rpath\s* \(D\) = " + ('False' if get_os_type() == DARWIN else 'True'),
53705371
r"sourcepath\s* \(E\) = " + os.path.join(test_dir, 'sandbox', 'sources'),
5372+
r"sourcepath-data\s* \(E\) = " + os.path.join(test_dir, 'sandbox', 'data_sources'),
53715373
r"subdir-modules\s* \(F\) = mods",
53725374
]
53735375

test/framework/utilities.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,9 @@ def setUp(self):
115115
testdir = os.path.dirname(os.path.abspath(__file__))
116116

117117
self.test_sourcepath = os.path.join(testdir, 'sandbox', 'sources')
118+
self.test_sourcepath_data = os.path.join(testdir, 'sandbox', 'data_sources')
118119
os.environ['EASYBUILD_SOURCEPATH'] = self.test_sourcepath
120+
os.environ['EASYBUILD_SOURCEPATH_DATA'] = self.test_sourcepath_data
119121
os.environ['EASYBUILD_PREFIX'] = self.test_prefix
120122
self.test_buildpath = tempfile.mkdtemp()
121123
os.environ['EASYBUILD_BUILDPATH'] = self.test_buildpath

0 commit comments

Comments
 (0)