Skip to content

Commit 25ac4a6

Browse files
authored
Merge pull request #3665 from Flamefire/20250314170220_new_pr_cargo
Unify handling of vendored crates in cargo EasyBlock and add support for non-virtual workspaces
2 parents 0e690d3 + 73bf121 commit 25ac4a6

File tree

1 file changed

+107
-88
lines changed

1 file changed

+107
-88
lines changed

easybuild/easyblocks/generic/cargo.py

Lines changed: 107 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,19 @@
3333

3434
import os
3535
import re
36+
import shutil
37+
import tempfile
3638
from glob import glob
39+
from pathlib import Path
3740

3841
import easybuild.tools.environment as env
3942
import easybuild.tools.systemtools as systemtools
4043
from easybuild.framework.easyconfig import CUSTOM
4144
from easybuild.framework.extensioneasyblock import ExtensionEasyBlock
4245
from easybuild.tools.build_log import EasyBuildError, print_warning
4346
from easybuild.tools.config import build_option
44-
from easybuild.tools.filetools import CHECKSUM_TYPE_SHA256, compute_checksum, extract_file, mkdir, move_file
45-
from easybuild.tools.filetools import read_file, write_file, which
47+
from easybuild.tools.filetools import CHECKSUM_TYPE_SHA256, compute_checksum, copy_dir, extract_file, mkdir
48+
from easybuild.tools.filetools import read_file, remove_dir, write_file, which
4649
from easybuild.tools.run import run_shell_cmd
4750
from easybuild.tools.toolchain.compiler import OPTARCH_GENERIC
4851

@@ -72,28 +75,22 @@
7275
replace-with = "vendored-sources"
7376
"""
7477

75-
CONFIG_TOML_SOURCE_GIT_WORKSPACE = """
76-
[source."real-{url}?rev={rev}"]
77-
directory = "{workspace_dir}"
78-
79-
[source."{url}?rev={rev}"]
80-
git = "{url}"
81-
rev = "{rev}"
82-
replace-with = "real-{url}?rev={rev}"
83-
84-
"""
85-
8678
CARGO_CHECKSUM_JSON = '{{"files": {{}}, "package": "{checksum}"}}'
8779

8880

89-
def get_workspace_members(crate_dir):
81+
def get_workspace_members(crate_dir: Path):
9082
"""Find all members of a cargo workspace in crate_dir.
9183
9284
(Minimally) parse the Cargo.toml file.
93-
If it is a workspace return all members (subfolder names).
94-
Otherwise return None.
85+
86+
Return a tuple: (has_package, workspace-members).
87+
has_package determines if it is a virtual workspace ([workspace] and no [package])
88+
workspace-members are all members (subfolder names) if it is a workspace, otherwise None
9589
"""
96-
cargo_toml = os.path.join(crate_dir, 'Cargo.toml')
90+
cargo_toml = crate_dir / 'Cargo.toml'
91+
lines = [line.strip() for line in read_file(cargo_toml).splitlines()]
92+
# A virtual (workspace) manifest has no [package], but only a [workspace] section.
93+
has_package = '[package]' in lines
9794

9895
# We are looking for this:
9996
# [workspace]
@@ -103,11 +100,10 @@ def get_workspace_members(crate_dir):
103100
# "reqwest-retry",
104101
# ]
105102

106-
lines = [line.strip() for line in read_file(cargo_toml).splitlines()]
107103
try:
108104
start_idx = lines.index('[workspace]')
109105
except ValueError:
110-
return None
106+
return has_package, None
111107
# Find "members = [" and concatenate the value, stop at end of section or file
112108
member_str = None
113109
for line in lines[start_idx + 1:]:
@@ -134,7 +130,7 @@ def get_workspace_members(crate_dir):
134130
if invalid_members:
135131
raise EasyBuildError('Failed to parse %s: Found seemingly invalid members: %s',
136132
cargo_toml, ', '.join(invalid_members))
137-
return [os.path.join(crate_dir, m) for m in members]
133+
return has_package, members
138134

139135

140136
def get_checksum(src, log):
@@ -165,6 +161,10 @@ def extra_options(extra_vars=None):
165161

166162
return extra_vars
167163

164+
@staticmethod
165+
def src_parameter_names():
166+
return super().src_parameter_names() + ['crates']
167+
168168
@staticmethod
169169
def crate_src_filename(pkg_name, pkg_version, _url=None, rev=None):
170170
"""Crate tarball filename based on package name, version and optionally git revision"""
@@ -279,12 +279,6 @@ def extract_step(self):
279279
"""
280280
self.vendor_dir = os.path.join(self.builddir, 'easybuild_vendor')
281281
mkdir(self.vendor_dir)
282-
# Sources from git repositories might contain multiple crates/folders in a so-called "workspace".
283-
# If we put such a workspace into the vendor folder, cargo fails with
284-
# "found a virtual manifest at [...]Cargo.toml instead of a package manifest".
285-
# Hence we put those in a separate folder and only move "regular" crates into the vendor folder.
286-
self.git_vendor_dir = os.path.join(self.builddir, 'easybuild_vendor_git')
287-
mkdir(self.git_vendor_dir)
288282

289283
vendor_crates = {self.crate_src_filename(*crate): crate for crate in self.crates}
290284
# Track git sources for building the cargo config and avoiding duplicated folders
@@ -325,63 +319,99 @@ def extract_step(self):
325319
continue
326320

327321
# Extract dependency crates into vendor subdirectory, separate from sources of main package
328-
extraction_dir = self.builddir
329-
if is_vendor_crate:
330-
extraction_dir = self.git_vendor_dir if git_key else self.vendor_dir
322+
extraction_dir = self.vendor_dir if is_vendor_crate else self.builddir
331323

332324
self.log.info("Unpacking source of %s", src['name'])
333325
existing_dirs = set(os.listdir(extraction_dir))
334-
src_dir = extract_file(src['path'], extraction_dir, cmd=src['cmd'],
335-
extra_options=self.cfg['unpack_options'], change_into_dir=False, trace=False)
326+
extract_file(src['path'], extraction_dir, cmd=src['cmd'],
327+
extra_options=self.cfg['unpack_options'], change_into_dir=False, trace=False)
336328
new_extracted_dirs = set(os.listdir(extraction_dir)) - existing_dirs
337329

338330
if len(new_extracted_dirs) == 0:
339331
# Extraction went wrong
340332
raise EasyBuildError("Unpacking sources of '%s' failed", src['name'])
341-
# Expected crate tarball with 1 folder
342-
# TODO: properly handle case with multiple extracted folders
343-
# this is currently in a grey area, might still be used by cargo
344-
if len(new_extracted_dirs) == 1:
345-
src_dir = os.path.join(extraction_dir, new_extracted_dirs.pop())
346-
self.log.debug("Unpacked sources of %s into: %s", src['name'], src_dir)
347-
348-
if is_vendor_crate and self.cfg['offline']:
349-
# Create checksum file for extracted sources required by vendored crates
350-
351-
# By default there is only a single crate
352-
crate_dirs = [src_dir]
353-
# For git sources determine the folders that contain crates by taking workspaces into account
354-
if git_key:
355-
member_dirs = get_workspace_members(src_dir)
356-
if member_dirs:
357-
crate_dirs = member_dirs
358-
359-
try:
360-
checksum = src[CHECKSUM_TYPE_SHA256]
361-
except KeyError:
362-
checksum = compute_checksum(src['path'], checksum_type=CHECKSUM_TYPE_SHA256)
363-
for crate_dir in crate_dirs:
364-
self.log.info('creating .cargo-checksums.json file for %s', os.path.basename(crate_dir))
365-
chkfile = os.path.join(src_dir, crate_dir, '.cargo-checksum.json')
366-
write_file(chkfile, CARGO_CHECKSUM_JSON.format(checksum=checksum))
367-
# Move non-workspace git crates to the vendor folder
368-
if git_key and member_dirs is None:
369-
src_dir = os.path.join(self.vendor_dir, os.path.basename(crate_dirs[0]))
370-
self.log.debug('Moving crate %s without workspaces to vendor folder', crate_name)
371-
move_file(crate_dirs[0], src_dir)
333+
# There can be multiple folders but we just use the first new one as the finalpath
334+
if len(new_extracted_dirs) > 1:
335+
self.log.warning(f"Found multiple folders when extracting {src['name']}: "
336+
f"{', '.join(new_extracted_dirs)}.")
337+
src_dir = os.path.join(extraction_dir, new_extracted_dirs.pop())
338+
self.log.debug("Unpacked sources of %s into: %s", src['name'], src_dir)
372339

373340
src['finalpath'] = src_dir
374341

375-
self._setup_offline_config(git_sources)
342+
if self.cfg['offline']:
343+
self._setup_offline_config(git_sources)
376344

377345
def _setup_offline_config(self, git_sources):
378346
"""
379347
Setup the configuration required for offline builds
380348
:param git_sources: dict mapping (git_repo, rev) to extracted source
381349
"""
382-
if not self.cfg['offline']:
383-
return
384350
self.log.info("Setting up vendored crates for offline operation")
351+
352+
self.log.debug("Setting up checksum files and unpacking workspaces with virtual manifest")
353+
path_to_source = {src['finalpath']: src for src in self.src}
354+
tmp_dir = Path(tempfile.mkdtemp(dir=self.builddir, prefix='tmp_crate_'))
355+
# Add checksum file for each crate such that it is recognized by cargo.
356+
# Glob to catch multiple folders in a source archive.
357+
for crate_dir in (p.parent for p in Path(self.vendor_dir).glob('*/Cargo.toml')):
358+
src = path_to_source.get(str(crate_dir))
359+
if src:
360+
try:
361+
checksum = src[CHECKSUM_TYPE_SHA256]
362+
except KeyError:
363+
self.log.debug(f"Computing checksum for {src['path']}.")
364+
checksum = compute_checksum(src['path'], checksum_type=CHECKSUM_TYPE_SHA256)
365+
else:
366+
self.log.debug(f'No source found for {crate_dir}. Using nul-checksum for vendoring')
367+
checksum = 'null'
368+
cargo_pkg_dirs = [crate_dir] # Default case: Single crate
369+
# Sources might contain multiple crates/folders in a so-called "workspace".
370+
# We have to move the individual packages out of the workspace so cargo can find them.
371+
# If there is a main package it should to used too,
372+
# otherwise (Only "[workspace]" section and no "[package]" section)
373+
# we have to remove the top-level folder or cargo fails with:
374+
# "found a virtual manifest at [...]Cargo.toml instead of a package manifest"
375+
has_package, members = get_workspace_members(crate_dir)
376+
if members:
377+
self.log.info(f'Found workspace in {crate_dir}. Members: ' + ', '.join(members))
378+
if not any((crate_dir / crate).is_dir() for crate in members):
379+
if not has_package:
380+
raise EasyBuildError(f'Virtual manifest found in {crate_dir} but none of the member folders '
381+
'exist. This cannot be handled by the build.')
382+
# Packages from crates.io contain only a single crate even if the Cargo.toml file lists multiple
383+
# members. Those members are in separate packages on crates.io, so this is a fairly common case.
384+
self.log.debug(f"Member folders of {crate_dir} don't exist so assuming they are in individual "
385+
"crates, e.g. from/on crates.io")
386+
else:
387+
cargo_pkg_dirs = []
388+
tmp_crate_dir = tmp_dir / crate_dir.name
389+
shutil.move(crate_dir, tmp_crate_dir)
390+
for member in members:
391+
# A member crate might be in a subfolder, e.g. 'components/foo',
392+
# which we need to ignore and make the crate a top-level folder.
393+
target_path = Path(self.vendor_dir, os.path.basename(member))
394+
if target_path.exists():
395+
raise EasyBuildError(f'Cannot move {member} out of {crate_dir.name} '
396+
f'as target path {target_path} exists')
397+
# Use copy_dir to resolve symlinks that might point to the parent folder
398+
copy_dir(tmp_crate_dir / member, target_path, symlinks=False)
399+
cargo_pkg_dirs.append(target_path)
400+
if has_package:
401+
# Remove the copied crate folders
402+
for member in members:
403+
remove_dir(tmp_crate_dir / member)
404+
# Keep the main package in the original location
405+
shutil.move(tmp_crate_dir, crate_dir)
406+
cargo_pkg_dirs.append(crate_dir)
407+
else:
408+
self.log.info(f'Virtual manifest found in {crate_dir}, removing it')
409+
remove_dir(tmp_crate_dir)
410+
for pkg_dir in cargo_pkg_dirs:
411+
self.log.info('creating .cargo-checksums.json file for %s', pkg_dir.name)
412+
chkfile = os.path.join(pkg_dir, '.cargo-checksum.json')
413+
write_file(chkfile, CARGO_CHECKSUM_JSON.format(checksum=checksum))
414+
385415
self.log.debug("Writting config.toml entry for vendored crates from crate.io")
386416
config_toml = os.path.join(self.cargo_home, 'config.toml')
387417
# Replace crates-io with vendored sources using build dir wide toml file in CARGO_HOME
@@ -390,29 +420,12 @@ def _setup_offline_config(self, git_sources):
390420
# Tell cargo about the vendored git sources to avoid it failing with:
391421
# Unable to update https://github.com/[...]
392422
# can't checkout from 'https://github.com/[...]]': you are in the offline mode (--offline)
393-
394423
for (git_repo, rev), src in git_sources.items():
395424
crate_name = src['crate'][0]
396-
src_dir = src['finalpath']
397-
if os.path.dirname(src_dir) == self.vendor_dir:
398-
# Non-workspace sources are in vendor_dir
399-
git_branch = self._get_crate_git_repo_branch(crate_name)
400-
template = CONFIG_TOML_SOURCE_GIT_BRANCH if git_branch else CONFIG_TOML_SOURCE_GIT
401-
self.log.debug(f"Writing config.toml entry for git repo: {git_repo} branch {git_branch}, rev {rev}")
402-
write_file(
403-
config_toml,
404-
template.format(url=git_repo, rev=rev, branch=git_branch),
405-
append=True
406-
)
407-
else:
408-
self.log.debug("Writing config.toml entry for git repo: %s rev %s", git_repo, rev)
409-
# Workspace sources stay in their own separate folder.
410-
# We cannot have a `directory = "<dir>"` entry where a folder containing a workspace is inside
411-
write_file(
412-
config_toml,
413-
CONFIG_TOML_SOURCE_GIT_WORKSPACE.format(url=git_repo, rev=rev, workspace_dir=src_dir),
414-
append=True
415-
)
425+
git_branch = self._get_crate_git_repo_branch(crate_name)
426+
template = CONFIG_TOML_SOURCE_GIT_BRANCH if git_branch else CONFIG_TOML_SOURCE_GIT
427+
self.log.debug(f"Writing config.toml entry for git repo: {git_repo} branch {git_branch}, rev {rev}")
428+
write_file(config_toml, template.format(url=git_repo, rev=rev, branch=git_branch), append=True)
416429

417430
def _get_crate_git_repo_branch(self, crate_name):
418431
"""
@@ -421,7 +434,7 @@ def _get_crate_git_repo_branch(self, crate_name):
421434
"""
422435
# Search all Cargo.toml files in main source and vendored crates
423436
cargo_toml_files = []
424-
for cargo_source_dir in (self.src[0]['finalpath'], self.vendor_dir, self.git_vendor_dir):
437+
for cargo_source_dir in (self.src[0]['finalpath'], self.vendor_dir):
425438
cargo_toml_files.extend(glob(os.path.join(cargo_source_dir, '**', 'Cargo.toml'), recursive=True))
426439

427440
if not cargo_toml_files:
@@ -561,7 +574,13 @@ def generate_crate_list(sourcedir):
561574
qs = parse_qs(parsed_url.query)
562575
rev_qs = qs.get('rev', [None])[0]
563576
if rev_qs is not None and rev_qs != rev:
564-
raise ValueError(f"Found different revision in query of URL {url}: {rev_qs} (expected: {rev})")
577+
# It is not an error if one is the short version of the other
578+
# E.g. https://github.com/astral/lsp-types.git?rev=3512a9f#3512a9f33eadc5402cfab1b8f7340824c8ca1439
579+
if (rev_qs and rev.startswith(rev_qs)) or rev_qs.startswith(rev):
580+
# The query value is the relevant one if both are present
581+
rev = rev_qs
582+
else:
583+
raise ValueError(f"Found different revision in query of URL {url}: {rev_qs} (expected: {rev})")
565584
crates.append((name, version, url, rev))
566585
return app_in_cratesio, crates, other_crates
567586

0 commit comments

Comments
 (0)