diff --git a/easybuild/easyblocks/generic/cargo.py b/easybuild/easyblocks/generic/cargo.py index b92bc9bdd6..fd190ba96b 100755 --- a/easybuild/easyblocks/generic/cargo.py +++ b/easybuild/easyblocks/generic/cargo.py @@ -37,6 +37,7 @@ import tempfile from glob import glob from pathlib import Path +from typing import Dict, List, Union import easybuild.tools.environment as env import easybuild.tools.systemtools as systemtools @@ -78,19 +79,136 @@ CARGO_CHECKSUM_JSON = '{{"files": {{}}, "package": "{checksum}"}}' -def get_workspace_members(crate_dir: Path): - """Find all members of a cargo workspace in crate_dir. +def parse_toml_list(value: str) -> List[str]: + """Split a TOML list value""" + if not value.startswith('[') or not value.endswith(']'): + raise ValueError(f"'{value}' is not a TOML list") + value = value[1:-1].strip() + simple_str_markers = ('"""', "'''", "'") + current_value = '' + result = [] + while value: + for marker in simple_str_markers: + if value.startswith(marker): + idx = value.index(marker, len(marker)) + current_value += value[:idx + len(marker)] + value = value[idx + len(marker):].lstrip() + break + else: + if value.startswith('"'): + m = re.match(r'".*?(? str: + """Remove comments and trim line""" + if '#' not in line: + return line.strip() + if expected_end is not None and expected_end[0] in ("'", '"'): + try: + idx = line.index(expected_end) + len(expected_end) + except ValueError: + return line.strip() # Ignore #-sign in multi-line string + else: + idx = 0 + in_str = False + escaped = False + while idx < len(line): + c = line[idx] + if in_str: + if escaped: + if c == '\\': + escaped = False + elif c == '"': + in_str = False + elif c == '\\': + escaped = True + elif c == '#': + break + elif c == '"': + in_str = True + elif c == "'": + try: + idx = line.index("'", idx + 1) + except ValueError: + idx = len(line) + idx += 1 + return line[:idx].strip() + + +def parse_toml(file_or_content: Union[Path, str]) -> Dict[str, str]: + """Minimally parse a TOML file into sections, keys and values + + Values will be the raw strings (including quotes for string-typed values)""" + + result: Dict[str, Union[str, List[str]]] = {} + pending_key = None + pending_value = None + expected_end = None + current_section = None + content = read_file(file_or_content) if isinstance(file_or_content, Path) else file_or_content + line_num = raw_line = None + start_end = { + '[': ']', + '{': '}', + '"""': '"""', + "'''": "'''", + } + try: + for line_num, raw_line in enumerate(content.splitlines()): # noqa B007: line_num used in error only + line: str = _clean_line(raw_line, expected_end) + if not line: + continue + if pending_key is None and line.startswith("[") and line.endswith("]"): + current_section = line.strip()[1:-1].strip() + result.setdefault(current_section, {}) + continue + if pending_key is None: + key, val = line.split("=", 1) + pending_key = key.strip() + pending_value = val.strip() + for start, end in start_end.items(): + if pending_value.startswith(start): + expected_end = end + break + else: + expected_end = None + else: + pending_value += '\n' + line + if expected_end is None or (pending_value != expected_end and pending_value.endswith(expected_end)): + result[current_section][pending_key] = pending_value.strip() + pending_key = None + except Exception as e: + raise ValueError(f'Failed to parse {file_or_content}, error {e} at line {line_num}: {raw_line}') + return result - (Minimally) parse the Cargo.toml file. + +def get_workspace_members(cargo_toml: Dict[str, str]): + """Find all members of a cargo workspace in the parsed the Cargo.toml file. Return a tuple: (has_package, workspace-members). has_package determines if it is a virtual workspace ([workspace] and no [package]) workspace-members are all members (subfolder names) if it is a workspace, otherwise None """ - cargo_toml = crate_dir / 'Cargo.toml' - lines = [line.strip() for line in read_file(cargo_toml).splitlines()] # A virtual (workspace) manifest has no [package], but only a [workspace] section. - has_package = '[package]' in lines + has_package = 'package' in cargo_toml # We are looking for this: # [workspace] @@ -101,30 +219,15 @@ def get_workspace_members(crate_dir: Path): # ] try: - start_idx = lines.index('[workspace]') - except ValueError: + workspace = cargo_toml['workspace'] + except KeyError: return has_package, None - # Find "members = [" and concatenate the value, stop at end of section or file - member_str = None - for line in lines[start_idx + 1:]: - if line.startswith('#'): - continue # Skip comments - if re.match(r'\[\w+\]', line): - break # New section - if member_str is None: - m = re.match(r'members\s+=\s+\[', line) - if m: - member_str = line[m.end():] - else: - member_str += line - # Stop if we reach the end of the list - if member_str is not None and member_str.endswith(']'): - member_str = member_str[:-1] - break - if member_str is None: + try: + member_strs = parse_toml_list(workspace['members']) + except (KeyError, ValueError): raise EasyBuildError('Failed to find members in %s', cargo_toml) - # Split at commas after removing possibly trailing ones and remove the quotes - members = [member.strip().strip('"') for member in member_str.rstrip(',').split(',')] + # Remove the quotes + members = [member.strip('"') for member in member_strs] # Sanity check that we didn't pick up anything unexpected invalid_members = [member for member in members if not re.match(r'(\w|-)+', member)] if invalid_members: @@ -133,6 +236,35 @@ def get_workspace_members(crate_dir: Path): return has_package, members +def merge_sub_crate(cargo_toml_path: Path, workspace_toml: Dict[str, str]): + """Resolve workspace references in the Cargo.toml file""" + # Lines such as 'authors.workspace = true' must be replaced by 'authors = ' + content: str = read_file(cargo_toml_path) + SUFFIX = '.workspace' + if SUFFIX not in content: + return + cargo_toml = parse_toml(content) + lines = content.splitlines() + + def do_replacement(section, workspace_section): + if not section or not workspace_section: + return + + for key, value in section.items(): + if key.endswith(SUFFIX) and value == 'true': + real_key = key[:-len(SUFFIX)] + value = workspace_section[real_key] + idx = next(idx for idx, line in enumerate(lines) if key in line) + lines[idx] = f'{real_key} = {value}' + + do_replacement(cargo_toml.get('package'), workspace_toml.get('workspace.package')) + do_replacement(cargo_toml.get('dependencies'), workspace_toml.get('workspace.dependencies')) + do_replacement(cargo_toml.get('build-dependencies'), workspace_toml.get('workspace.dependencies')) + do_replacement(cargo_toml.get('dev-dependencies'), workspace_toml.get('workspace.dependencies')) + + write_file(cargo_toml_path, '\n'.join(lines)) + + def get_checksum(src, log): """Get the checksum from an extracted source""" checksum = src['checksum'] @@ -354,7 +486,8 @@ def _setup_offline_config(self, git_sources): tmp_dir = Path(tempfile.mkdtemp(dir=self.builddir, prefix='tmp_crate_')) # Add checksum file for each crate such that it is recognized by cargo. # Glob to catch multiple folders in a source archive. - for crate_dir in (p.parent for p in Path(self.vendor_dir).glob('*/Cargo.toml')): + for cargo_toml in Path(self.vendor_dir).glob('*/Cargo.toml'): + crate_dir = cargo_toml.parent src = path_to_source.get(str(crate_dir)) if src: try: @@ -372,7 +505,8 @@ def _setup_offline_config(self, git_sources): # otherwise (Only "[workspace]" section and no "[package]" section) # we have to remove the top-level folder or cargo fails with: # "found a virtual manifest at [...]Cargo.toml instead of a package manifest" - has_package, members = get_workspace_members(crate_dir) + parsed_toml = parse_toml(cargo_toml) + has_package, members = get_workspace_members(parsed_toml) if members: self.log.info(f'Found workspace in {crate_dir}. Members: ' + ', '.join(members)) if not any((crate_dir / crate).is_dir() for crate in members): @@ -397,6 +531,8 @@ def _setup_offline_config(self, git_sources): # Use copy_dir to resolve symlinks that might point to the parent folder copy_dir(tmp_crate_dir / member, target_path, symlinks=False) cargo_pkg_dirs.append(target_path) + self.log.info(f'Resolving workspace values for crate {member}') + merge_sub_crate(target_path / 'Cargo.toml', parsed_toml) if has_package: # Remove the copied crate folders for member in members: diff --git a/test/easyblocks/easyblock_specific.py b/test/easyblocks/easyblock_specific.py index e18c7b93ee..e4ee2c066f 100644 --- a/test/easyblocks/easyblock_specific.py +++ b/test/easyblocks/easyblock_specific.py @@ -36,10 +36,12 @@ import textwrap from io import StringIO from unittest import TestLoader, TextTestRunner +from pathlib import Path from test.easyblocks.module import cleanup import easybuild.tools.options as eboptions import easybuild.easyblocks.generic.pythonpackage as pythonpackage +import easybuild.easyblocks.generic.cargo as cargo import easybuild.easyblocks.l.lammps as lammps import easybuild.easyblocks.p.python as python from easybuild.base.testing import TestCase @@ -51,7 +53,7 @@ from easybuild.tools.build_log import EasyBuildError from easybuild.tools.config import GENERAL_CLASS, get_module_syntax from easybuild.tools.environment import modify_env -from easybuild.tools.filetools import adjust_permissions, mkdir, move_file, remove_dir, symlink, write_file +from easybuild.tools.filetools import adjust_permissions, mkdir, move_file, read_file, remove_dir, symlink, write_file from easybuild.tools.modules import modules_tool from easybuild.tools.options import set_tmpdir from easybuild.tools.run import RunShellCmdResult @@ -323,6 +325,158 @@ def test_det_py_install_scheme(self): res = pythonpackage.det_py_install_scheme() self.assertTrue(isinstance(res, str)) + def test_cargo_toml_parsers(self): + """Test get_workspace_members in the Cargo easyblock""" + crate_dir = Path(tempfile.mkdtemp()) + cargo_toml = crate_dir / 'Cargo.toml' + # Simple crate + write_file(cargo_toml, textwrap.dedent(""" + [package] + #[dummy] + # ignore = this + name = 'my_crate\\' #comment1' # comment2 + version = "0.1.0" + edition = "2021#2" + description = ''' + Line 1 + Line 2 + ''' + documentation = "url?\\"#anchor" + readme = \""" + README.md + \""" + license = \"""MIT\""" + authors = [ + '''Name d'Or Si''', + ] + empty='''''' + """)) + parsed = cargo.parse_toml(cargo_toml) + self.assertEqual(parsed, { + 'package': { + 'name': "'my_crate\\'", + 'version': '"0.1.0"', + 'edition': '"2021#2"', + 'description': "'''\nLine 1\nLine 2\n'''", + 'documentation': '"url?\\"#anchor"', + 'readme': '"""\nREADME.md\n"""', + 'license': '"""MIT"""', + 'authors': "[\n'''Name d'Or Si''',\n]", + 'empty': "''''''", + } + }) + has_package, members = cargo.get_workspace_members(parsed) + self.assertTrue(has_package) + self.assertIsNone(members) + + # Virtual manifest + write_file(cargo_toml, textwrap.dedent(""" + [workspace] + members = [ + "reqwest-middleware", + "reqwest-tracing", + "reqwest-retry", + ] + """)) + parsed = cargo.parse_toml(cargo_toml) + self.assertEqual(parsed, { + 'workspace': { + 'members': '[\n"reqwest-middleware",\n"reqwest-tracing",\n"reqwest-retry",\n]', + } + }) + has_package, members = cargo.get_workspace_members(parsed) + self.assertFalse(has_package) + self.assertEqual(members, ["reqwest-middleware", "reqwest-tracing", "reqwest-retry"]) + + # Workspace (root is a package too) + write_file(cargo_toml, textwrap.dedent(""" + [package] + name = "nothing-linux-ui" + version = "0.0.2" + edition = "2021" + authors = ["sn99"] + + [workspace] + members = ["nothing", "src-tauri"] + + [dependencies] + leptos = { version = "0.6", features = ["csr"] } + """)) + parsed = cargo.parse_toml(cargo_toml) + self.assertEqual(parsed, { + 'package': { + "name": '"nothing-linux-ui"', + "version": '"0.0.2"', + "edition": '"2021"', + "authors": '["sn99"]', + }, + 'workspace': { + "members": '["nothing", "src-tauri"]', + }, + 'dependencies': { + "leptos": '{ version = "0.6", features = ["csr"] }', + }, + }) + has_package, members = cargo.get_workspace_members(parsed) + self.assertTrue(has_package) + self.assertEqual(members, ["nothing", "src-tauri"]) + + def test_cargo_merge_sub_crate(self): + """Test merge_sub_crate in the Cargo easyblock""" + crate_dir = Path(tempfile.mkdtemp()) + cargo_toml = crate_dir / 'Cargo.toml' + write_file(cargo_toml, textwrap.dedent(""" + [workspace] + members = ["bar"] + + [workspace.package] + version = "1.2.3" + authors = ["Nice Folks"] + description = "A short description of my package" + documentation = "https://example.com/bar" + + [workspace.dependencies] + regex = { version = "1.6.0", default-features = false, features = ["std"] } + cc = "1.0.73" + rand = "0.8.5" + """)) + ws_parsed = cargo.parse_toml(cargo_toml) + write_file(cargo_toml, textwrap.dedent(""" + [package] + name = "bar" + version.workspace = true + authors.workspace = true + description.workspace = true + documentation.workspace = true + + [dependencies] + regex.workspace = true + + [build-dependencies] + cc.workspace = true + + [dev-dependencies] + rand.workspace = true + """)) + cargo.merge_sub_crate(cargo_toml, ws_parsed) + self.assertEqual(read_file(cargo_toml).strip(), textwrap.dedent(""" + [package] + name = "bar" + version = "1.2.3" + authors = ["Nice Folks"] + description = "A short description of my package" + documentation = "https://example.com/bar" + + [dependencies] + regex = { version = "1.6.0", default-features = false, features = ["std"] } + + [build-dependencies] + cc = "1.0.73" + + [dev-dependencies] + rand = "0.8.5" + """).strip()) + def test_handle_local_py_install_scheme(self): """Test handle_local_py_install_scheme function provided by PythonPackage easyblock."""