Skip to content

Commit 5c3dee5

Browse files
committed
Unify handling of vendored crates in cargo EasyBlock
1 parent b1bdebb commit 5c3dee5

File tree

1 file changed

+71
-85
lines changed

1 file changed

+71
-85
lines changed

easybuild/easyblocks/generic/cargo.py

Lines changed: 71 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333

3434
import os
3535
import re
36+
import shutil
37+
import tempfile
3638
from glob import glob
3739

3840
import easybuild.tools.environment as env
@@ -41,8 +43,8 @@
4143
from easybuild.framework.extensioneasyblock import ExtensionEasyBlock
4244
from easybuild.tools.build_log import EasyBuildError, print_warning
4345
from easybuild.tools.config import build_option
44-
from easybuild.tools.filetools import CHECKSUM_TYPE_SHA256, compute_checksum, extract_file, mkdir, move_file
45-
from easybuild.tools.filetools import read_file, write_file, which
46+
from easybuild.tools.filetools import CHECKSUM_TYPE_SHA256, compute_checksum, copy_dir, extract_file, mkdir
47+
from easybuild.tools.filetools import read_file, remove_dir, write_file, which
4648
from easybuild.tools.run import run_shell_cmd
4749
from easybuild.tools.toolchain.compiler import OPTARCH_GENERIC
4850

@@ -72,28 +74,20 @@
7274
replace-with = "vendored-sources"
7375
"""
7476

75-
CONFIG_TOML_SOURCE_GIT_WORKSPACE = """
76-
[source."real-{url}?rev={rev}"]
77-
directory = "{workspace_dir}"
78-
79-
[source."{url}?rev={rev}"]
80-
git = "{url}"
81-
rev = "{rev}"
82-
replace-with = "real-{url}?rev={rev}"
83-
84-
"""
85-
8677
CARGO_CHECKSUM_JSON = '{{"files": {{}}, "package": "{checksum}"}}'
8778

8879

89-
def get_workspace_members(crate_dir):
90-
"""Find all members of a cargo workspace in crate_dir.
80+
def get_virtual_workspace_members(crate_dir):
81+
"""Find all members of a cargo virtual workspace in crate_dir.
9182
9283
(Minimally) parse the Cargo.toml file.
93-
If it is a workspace return all members (subfolder names).
84+
If it is a virtual workspace ([workspace] and no [package]) return all members (subfolder names).
9485
Otherwise return None.
9586
"""
9687
cargo_toml = os.path.join(crate_dir, 'Cargo.toml')
88+
lines = [line.strip() for line in read_file(cargo_toml).splitlines()]
89+
if '[package]' in lines:
90+
return None
9791

9892
# We are looking for this:
9993
# [workspace]
@@ -103,7 +97,6 @@ def get_workspace_members(crate_dir):
10397
# "reqwest-retry",
10498
# ]
10599

106-
lines = [line.strip() for line in read_file(cargo_toml).splitlines()]
107100
try:
108101
start_idx = lines.index('[workspace]')
109102
except ValueError:
@@ -134,7 +127,7 @@ def get_workspace_members(crate_dir):
134127
if invalid_members:
135128
raise EasyBuildError('Failed to parse %s: Found seemingly invalid members: %s',
136129
cargo_toml, ', '.join(invalid_members))
137-
return [os.path.join(crate_dir, m) for m in members]
130+
return members
138131

139132

140133
def get_checksum(src, log):
@@ -279,12 +272,6 @@ def extract_step(self):
279272
"""
280273
self.vendor_dir = os.path.join(self.builddir, 'easybuild_vendor')
281274
mkdir(self.vendor_dir)
282-
# Sources from git repositories might contain multiple crates/folders in a so-called "workspace".
283-
# If we put such a workspace into the vendor folder, cargo fails with
284-
# "found a virtual manifest at [...]Cargo.toml instead of a package manifest".
285-
# Hence we put those in a separate folder and only move "regular" crates into the vendor folder.
286-
self.git_vendor_dir = os.path.join(self.builddir, 'easybuild_vendor_git')
287-
mkdir(self.git_vendor_dir)
288275

289276
vendor_crates = {self.crate_src_filename(*crate): crate for crate in self.crates}
290277
# Track git sources for building the cargo config and avoiding duplicated folders
@@ -325,63 +312,79 @@ def extract_step(self):
325312
continue
326313

327314
# Extract dependency crates into vendor subdirectory, separate from sources of main package
328-
extraction_dir = self.builddir
329-
if is_vendor_crate:
330-
extraction_dir = self.git_vendor_dir if git_key else self.vendor_dir
315+
extraction_dir = self.vendor_dir if is_vendor_crate else self.builddir
331316

332317
self.log.info("Unpacking source of %s", src['name'])
333318
existing_dirs = set(os.listdir(extraction_dir))
334-
src_dir = extract_file(src['path'], extraction_dir, cmd=src['cmd'],
335-
extra_options=self.cfg['unpack_options'], change_into_dir=False, trace=False)
319+
extract_file(src['path'], extraction_dir, cmd=src['cmd'],
320+
extra_options=self.cfg['unpack_options'], change_into_dir=False, trace=False)
336321
new_extracted_dirs = set(os.listdir(extraction_dir)) - existing_dirs
337322

338323
if len(new_extracted_dirs) == 0:
339324
# Extraction went wrong
340325
raise EasyBuildError("Unpacking sources of '%s' failed", src['name'])
341-
# Expected crate tarball with 1 folder
342-
# TODO: properly handle case with multiple extracted folders
343-
# this is currently in a grey area, might still be used by cargo
344-
if len(new_extracted_dirs) == 1:
345-
src_dir = os.path.join(extraction_dir, new_extracted_dirs.pop())
346-
self.log.debug("Unpacked sources of %s into: %s", src['name'], src_dir)
347-
348-
if is_vendor_crate and self.cfg['offline']:
349-
# Create checksum file for extracted sources required by vendored crates
350-
351-
# By default there is only a single crate
352-
crate_dirs = [src_dir]
353-
# For git sources determine the folders that contain crates by taking workspaces into account
354-
if git_key:
355-
member_dirs = get_workspace_members(src_dir)
356-
if member_dirs:
357-
crate_dirs = member_dirs
358-
359-
try:
360-
checksum = src[CHECKSUM_TYPE_SHA256]
361-
except KeyError:
362-
checksum = compute_checksum(src['path'], checksum_type=CHECKSUM_TYPE_SHA256)
363-
for crate_dir in crate_dirs:
364-
self.log.info('creating .cargo-checksums.json file for %s', os.path.basename(crate_dir))
365-
chkfile = os.path.join(src_dir, crate_dir, '.cargo-checksum.json')
366-
write_file(chkfile, CARGO_CHECKSUM_JSON.format(checksum=checksum))
367-
# Move non-workspace git crates to the vendor folder
368-
if git_key and member_dirs is None:
369-
src_dir = os.path.join(self.vendor_dir, os.path.basename(crate_dirs[0]))
370-
self.log.debug('Moving crate %s without workspaces to vendor folder', crate_name)
371-
move_file(crate_dirs[0], src_dir)
326+
# There can be multiple folders but we just use the first new one as the finalpath
327+
if len(new_extracted_dirs) > 1:
328+
self.log.warning(f"Found multiple folders when extracting {src['name']}: "
329+
f"{', '.join(new_extracted_dirs)}.")
330+
src_dir = os.path.join(extraction_dir, new_extracted_dirs.pop())
331+
self.log.debug("Unpacked sources of %s into: %s", src['name'], src_dir)
372332

373333
src['finalpath'] = src_dir
374334

375-
self._setup_offline_config(git_sources)
335+
if self.cfg['offline']:
336+
self._setup_offline_config(git_sources)
376337

377338
def _setup_offline_config(self, git_sources):
378339
"""
379340
Setup the configuration required for offline builds
380341
:param git_sources: dict mapping (git_repo, rev) to extracted source
381342
"""
382-
if not self.cfg['offline']:
383-
return
384343
self.log.info("Setting up vendored crates for offline operation")
344+
345+
self.log.debug("Setting up checksum files and unpacking workspaces with virtual manifest")
346+
path_to_source = {src['finalpath']: src for src in self.src}
347+
tmp_dir = tempfile.mkdtemp(dir=self.builddir, prefix='tmp_crate_')
348+
# Add checksum file for each crate such that it is recognized by cargo.
349+
# Glob to catch multiple folders in a source archive.
350+
crate_dirs = [os.path.dirname(p) for p in glob(os.path.join(self.vendor_dir, '*', 'Cargo.toml'))]
351+
for crate_dir in crate_dirs:
352+
src = path_to_source.get(crate_dir)
353+
if src:
354+
try:
355+
checksum = src[CHECKSUM_TYPE_SHA256]
356+
except KeyError:
357+
self.log.debug(f"Computing checksum for {src['path']}.")
358+
checksum = compute_checksum(src['path'], checksum_type=CHECKSUM_TYPE_SHA256)
359+
else:
360+
self.log.debug(f'No source found for {crate_dir}. Using nul-checksum for vendoring')
361+
checksum = 'null'
362+
# Sources might contain multiple crates/folders in a so-called "workspace".
363+
# If there isn't a main package (Only "[workspace]" section and no "[package]" section) we have to move
364+
# the individual packages out of the workspace or cargo fails with
365+
# "found a virtual manifest at [...]Cargo.toml instead of a package manifest"
366+
member_dirs = get_virtual_workspace_members(crate_dir)
367+
if member_dirs:
368+
self.log.info(f'Found virtual manifest in {crate_dir}. Members: ' + ', '.join(member_dirs))
369+
crate_dirs = []
370+
tmp_crate_dir = os.path.join(tmp_dir, os.path.basename(crate_dir))
371+
shutil.move(crate_dir, tmp_crate_dir)
372+
for crate in member_dirs:
373+
target_path = os.path.join(self.vendor_dir, crate)
374+
if os.path.exists(target_path):
375+
raise EasyBuildError(f'Cannot move {crate} out of {os.path.basename(crate_dir)} '
376+
f'as target path {target_path} exists')
377+
# Use copy_dir to resolve symlinks that might point to the parent folder
378+
copy_dir(os.path.join(tmp_crate_dir, crate), target_path, symlinks=False)
379+
crate_dirs.append(target_path)
380+
remove_dir(tmp_crate_dir)
381+
else:
382+
crate_dirs = [crate_dir]
383+
for crate_dir in crate_dirs:
384+
self.log.info('creating .cargo-checksums.json file for %s', os.path.basename(crate_dir))
385+
chkfile = os.path.join(crate_dir, '.cargo-checksum.json')
386+
write_file(chkfile, CARGO_CHECKSUM_JSON.format(checksum=checksum))
387+
385388
self.log.debug("Writting config.toml entry for vendored crates from crate.io")
386389
config_toml = os.path.join(self.cargo_home, 'config.toml')
387390
# Replace crates-io with vendored sources using build dir wide toml file in CARGO_HOME
@@ -390,29 +393,12 @@ def _setup_offline_config(self, git_sources):
390393
# Tell cargo about the vendored git sources to avoid it failing with:
391394
# Unable to update https://github.com/[...]
392395
# can't checkout from 'https://github.com/[...]]': you are in the offline mode (--offline)
393-
394396
for (git_repo, rev), src in git_sources.items():
395397
crate_name = src['crate'][0]
396-
src_dir = src['finalpath']
397-
if os.path.dirname(src_dir) == self.vendor_dir:
398-
# Non-workspace sources are in vendor_dir
399-
git_branch = self._get_crate_git_repo_branch(crate_name)
400-
template = CONFIG_TOML_SOURCE_GIT_BRANCH if git_branch else CONFIG_TOML_SOURCE_GIT
401-
self.log.debug(f"Writing config.toml entry for git repo: {git_repo} branch {git_branch}, rev {rev}")
402-
write_file(
403-
config_toml,
404-
template.format(url=git_repo, rev=rev, branch=git_branch),
405-
append=True
406-
)
407-
else:
408-
self.log.debug("Writing config.toml entry for git repo: %s rev %s", git_repo, rev)
409-
# Workspace sources stay in their own separate folder.
410-
# We cannot have a `directory = "<dir>"` entry where a folder containing a workspace is inside
411-
write_file(
412-
config_toml,
413-
CONFIG_TOML_SOURCE_GIT_WORKSPACE.format(url=git_repo, rev=rev, workspace_dir=src_dir),
414-
append=True
415-
)
398+
git_branch = self._get_crate_git_repo_branch(crate_name)
399+
template = CONFIG_TOML_SOURCE_GIT_BRANCH if git_branch else CONFIG_TOML_SOURCE_GIT
400+
self.log.debug(f"Writing config.toml entry for git repo: {git_repo} branch {git_branch}, rev {rev}")
401+
write_file(config_toml, template.format(url=git_repo, rev=rev, branch=git_branch), append=True)
416402

417403
def _get_crate_git_repo_branch(self, crate_name):
418404
"""
@@ -421,7 +407,7 @@ def _get_crate_git_repo_branch(self, crate_name):
421407
"""
422408
# Search all Cargo.toml files in main source and vendored crates
423409
cargo_toml_files = []
424-
for cargo_source_dir in (self.src[0]['finalpath'], self.vendor_dir, self.git_vendor_dir):
410+
for cargo_source_dir in (self.src[0]['finalpath'], self.vendor_dir):
425411
cargo_toml_files.extend(glob(os.path.join(cargo_source_dir, '**', 'Cargo.toml'), recursive=True))
426412

427413
if not cargo_toml_files:

0 commit comments

Comments
 (0)