3333
3434import os
3535import re
36+ import shutil
37+ import tempfile
3638from glob import glob
39+ from pathlib import Path
3740
3841import easybuild .tools .environment as env
3942import easybuild .tools .systemtools as systemtools
4043from easybuild .framework .easyconfig import CUSTOM
4144from easybuild .framework .extensioneasyblock import ExtensionEasyBlock
4245from easybuild .tools .build_log import EasyBuildError , print_warning
4346from easybuild .tools .config import build_option
44- from easybuild .tools .filetools import CHECKSUM_TYPE_SHA256 , compute_checksum , extract_file , mkdir , move_file
45- from easybuild .tools .filetools import read_file , write_file , which
47+ from easybuild .tools .filetools import CHECKSUM_TYPE_SHA256 , compute_checksum , copy_dir , extract_file , mkdir
48+ from easybuild .tools .filetools import read_file , remove_dir , write_file , which
4649from easybuild .tools .run import run_shell_cmd
4750from easybuild .tools .toolchain .compiler import OPTARCH_GENERIC
4851
7275replace-with = "vendored-sources"
7376"""
7477
75- CONFIG_TOML_SOURCE_GIT_WORKSPACE = """
76- [source."real-{url}?rev={rev}"]
77- directory = "{workspace_dir}"
78-
79- [source."{url}?rev={rev}"]
80- git = "{url}"
81- rev = "{rev}"
82- replace-with = "real-{url}?rev={rev}"
83-
84- """
85-
8678CARGO_CHECKSUM_JSON = '{{"files": {{}}, "package": "{checksum}"}}'
8779
8880
89- def get_workspace_members (crate_dir ):
81+ def get_workspace_members (crate_dir : Path ):
9082 """Find all members of a cargo workspace in crate_dir.
9183
9284 (Minimally) parse the Cargo.toml file.
93- If it is a workspace return all members (subfolder names).
94- Otherwise return None.
85+
86+ Return a tuple: (has_package, workspace-members).
87+ has_package determines if it is a virtual workspace ([workspace] and no [package])
88+ workspace-members are all members (subfolder names) if it is a workspace, otherwise None
9589 """
96- cargo_toml = os .path .join (crate_dir , 'Cargo.toml' )
90+ cargo_toml = crate_dir / 'Cargo.toml'
91+ lines = [line .strip () for line in read_file (cargo_toml ).splitlines ()]
92+ # A virtual (workspace) manifest has no [package], but only a [workspace] section.
93+ has_package = '[package]' in lines
9794
9895 # We are looking for this:
9996 # [workspace]
@@ -103,11 +100,10 @@ def get_workspace_members(crate_dir):
103100 # "reqwest-retry",
104101 # ]
105102
106- lines = [line .strip () for line in read_file (cargo_toml ).splitlines ()]
107103 try :
108104 start_idx = lines .index ('[workspace]' )
109105 except ValueError :
110- return None
106+ return has_package , None
111107 # Find "members = [" and concatenate the value, stop at end of section or file
112108 member_str = None
113109 for line in lines [start_idx + 1 :]:
@@ -134,7 +130,7 @@ def get_workspace_members(crate_dir):
134130 if invalid_members :
135131 raise EasyBuildError ('Failed to parse %s: Found seemingly invalid members: %s' ,
136132 cargo_toml , ', ' .join (invalid_members ))
137- return [ os . path . join ( crate_dir , m ) for m in members ]
133+ return has_package , members
138134
139135
140136def get_checksum (src , log ):
@@ -165,6 +161,10 @@ def extra_options(extra_vars=None):
165161
166162 return extra_vars
167163
164+ @staticmethod
165+ def src_parameter_names ():
166+ return super ().src_parameter_names () + ['crates' ]
167+
168168 @staticmethod
169169 def crate_src_filename (pkg_name , pkg_version , _url = None , rev = None ):
170170 """Crate tarball filename based on package name, version and optionally git revision"""
@@ -279,12 +279,6 @@ def extract_step(self):
279279 """
280280 self .vendor_dir = os .path .join (self .builddir , 'easybuild_vendor' )
281281 mkdir (self .vendor_dir )
282- # Sources from git repositories might contain multiple crates/folders in a so-called "workspace".
283- # If we put such a workspace into the vendor folder, cargo fails with
284- # "found a virtual manifest at [...]Cargo.toml instead of a package manifest".
285- # Hence we put those in a separate folder and only move "regular" crates into the vendor folder.
286- self .git_vendor_dir = os .path .join (self .builddir , 'easybuild_vendor_git' )
287- mkdir (self .git_vendor_dir )
288282
289283 vendor_crates = {self .crate_src_filename (* crate ): crate for crate in self .crates }
290284 # Track git sources for building the cargo config and avoiding duplicated folders
@@ -325,63 +319,99 @@ def extract_step(self):
325319 continue
326320
327321 # Extract dependency crates into vendor subdirectory, separate from sources of main package
328- extraction_dir = self .builddir
329- if is_vendor_crate :
330- extraction_dir = self .git_vendor_dir if git_key else self .vendor_dir
322+ extraction_dir = self .vendor_dir if is_vendor_crate else self .builddir
331323
332324 self .log .info ("Unpacking source of %s" , src ['name' ])
333325 existing_dirs = set (os .listdir (extraction_dir ))
334- src_dir = extract_file (src ['path' ], extraction_dir , cmd = src ['cmd' ],
335- extra_options = self .cfg ['unpack_options' ], change_into_dir = False , trace = False )
326+ extract_file (src ['path' ], extraction_dir , cmd = src ['cmd' ],
327+ extra_options = self .cfg ['unpack_options' ], change_into_dir = False , trace = False )
336328 new_extracted_dirs = set (os .listdir (extraction_dir )) - existing_dirs
337329
338330 if len (new_extracted_dirs ) == 0 :
339331 # Extraction went wrong
340332 raise EasyBuildError ("Unpacking sources of '%s' failed" , src ['name' ])
341- # Expected crate tarball with 1 folder
342- # TODO: properly handle case with multiple extracted folders
343- # this is currently in a grey area, might still be used by cargo
344- if len (new_extracted_dirs ) == 1 :
345- src_dir = os .path .join (extraction_dir , new_extracted_dirs .pop ())
346- self .log .debug ("Unpacked sources of %s into: %s" , src ['name' ], src_dir )
347-
348- if is_vendor_crate and self .cfg ['offline' ]:
349- # Create checksum file for extracted sources required by vendored crates
350-
351- # By default there is only a single crate
352- crate_dirs = [src_dir ]
353- # For git sources determine the folders that contain crates by taking workspaces into account
354- if git_key :
355- member_dirs = get_workspace_members (src_dir )
356- if member_dirs :
357- crate_dirs = member_dirs
358-
359- try :
360- checksum = src [CHECKSUM_TYPE_SHA256 ]
361- except KeyError :
362- checksum = compute_checksum (src ['path' ], checksum_type = CHECKSUM_TYPE_SHA256 )
363- for crate_dir in crate_dirs :
364- self .log .info ('creating .cargo-checksums.json file for %s' , os .path .basename (crate_dir ))
365- chkfile = os .path .join (src_dir , crate_dir , '.cargo-checksum.json' )
366- write_file (chkfile , CARGO_CHECKSUM_JSON .format (checksum = checksum ))
367- # Move non-workspace git crates to the vendor folder
368- if git_key and member_dirs is None :
369- src_dir = os .path .join (self .vendor_dir , os .path .basename (crate_dirs [0 ]))
370- self .log .debug ('Moving crate %s without workspaces to vendor folder' , crate_name )
371- move_file (crate_dirs [0 ], src_dir )
333+ # There can be multiple folders but we just use the first new one as the finalpath
334+ if len (new_extracted_dirs ) > 1 :
335+ self .log .warning (f"Found multiple folders when extracting { src ['name' ]} : "
336+ f"{ ', ' .join (new_extracted_dirs )} ." )
337+ src_dir = os .path .join (extraction_dir , new_extracted_dirs .pop ())
338+ self .log .debug ("Unpacked sources of %s into: %s" , src ['name' ], src_dir )
372339
373340 src ['finalpath' ] = src_dir
374341
375- self ._setup_offline_config (git_sources )
342+ if self .cfg ['offline' ]:
343+ self ._setup_offline_config (git_sources )
376344
377345 def _setup_offline_config (self , git_sources ):
378346 """
379347 Setup the configuration required for offline builds
380348 :param git_sources: dict mapping (git_repo, rev) to extracted source
381349 """
382- if not self .cfg ['offline' ]:
383- return
384350 self .log .info ("Setting up vendored crates for offline operation" )
351+
352+ self .log .debug ("Setting up checksum files and unpacking workspaces with virtual manifest" )
353+ path_to_source = {src ['finalpath' ]: src for src in self .src }
354+ tmp_dir = Path (tempfile .mkdtemp (dir = self .builddir , prefix = 'tmp_crate_' ))
355+ # Add checksum file for each crate such that it is recognized by cargo.
356+ # Glob to catch multiple folders in a source archive.
357+ for crate_dir in (p .parent for p in Path (self .vendor_dir ).glob ('*/Cargo.toml' )):
358+ src = path_to_source .get (str (crate_dir ))
359+ if src :
360+ try :
361+ checksum = src [CHECKSUM_TYPE_SHA256 ]
362+ except KeyError :
363+ self .log .debug (f"Computing checksum for { src ['path' ]} ." )
364+ checksum = compute_checksum (src ['path' ], checksum_type = CHECKSUM_TYPE_SHA256 )
365+ else :
366+ self .log .debug (f'No source found for { crate_dir } . Using nul-checksum for vendoring' )
367+ checksum = 'null'
368+ cargo_pkg_dirs = [crate_dir ] # Default case: Single crate
369+ # Sources might contain multiple crates/folders in a so-called "workspace".
370+ # We have to move the individual packages out of the workspace so cargo can find them.
371+ # If there is a main package it should to used too,
372+ # otherwise (Only "[workspace]" section and no "[package]" section)
373+ # we have to remove the top-level folder or cargo fails with:
374+ # "found a virtual manifest at [...]Cargo.toml instead of a package manifest"
375+ has_package , members = get_workspace_members (crate_dir )
376+ if members :
377+ self .log .info (f'Found workspace in { crate_dir } . Members: ' + ', ' .join (members ))
378+ if not any ((crate_dir / crate ).is_dir () for crate in members ):
379+ if not has_package :
380+ raise EasyBuildError (f'Virtual manifest found in { crate_dir } but none of the member folders '
381+ 'exist. This cannot be handled by the build.' )
382+ # Packages from crates.io contain only a single crate even if the Cargo.toml file lists multiple
383+ # members. Those members are in separate packages on crates.io, so this is a fairly common case.
384+ self .log .debug (f"Member folders of { crate_dir } don't exist so assuming they are in individual "
385+ "crates, e.g. from/on crates.io" )
386+ else :
387+ cargo_pkg_dirs = []
388+ tmp_crate_dir = tmp_dir / crate_dir .name
389+ shutil .move (crate_dir , tmp_crate_dir )
390+ for member in members :
391+ # A member crate might be in a subfolder, e.g. 'components/foo',
392+ # which we need to ignore and make the crate a top-level folder.
393+ target_path = Path (self .vendor_dir , os .path .basename (member ))
394+ if target_path .exists ():
395+ raise EasyBuildError (f'Cannot move { member } out of { crate_dir .name } '
396+ f'as target path { target_path } exists' )
397+ # Use copy_dir to resolve symlinks that might point to the parent folder
398+ copy_dir (tmp_crate_dir / member , target_path , symlinks = False )
399+ cargo_pkg_dirs .append (target_path )
400+ if has_package :
401+ # Remove the copied crate folders
402+ for member in members :
403+ remove_dir (tmp_crate_dir / member )
404+ # Keep the main package in the original location
405+ shutil .move (tmp_crate_dir , crate_dir )
406+ cargo_pkg_dirs .append (crate_dir )
407+ else :
408+ self .log .info (f'Virtual manifest found in { crate_dir } , removing it' )
409+ remove_dir (tmp_crate_dir )
410+ for pkg_dir in cargo_pkg_dirs :
411+ self .log .info ('creating .cargo-checksums.json file for %s' , pkg_dir .name )
412+ chkfile = os .path .join (pkg_dir , '.cargo-checksum.json' )
413+ write_file (chkfile , CARGO_CHECKSUM_JSON .format (checksum = checksum ))
414+
385415 self .log .debug ("Writting config.toml entry for vendored crates from crate.io" )
386416 config_toml = os .path .join (self .cargo_home , 'config.toml' )
387417 # Replace crates-io with vendored sources using build dir wide toml file in CARGO_HOME
@@ -390,29 +420,12 @@ def _setup_offline_config(self, git_sources):
390420 # Tell cargo about the vendored git sources to avoid it failing with:
391421 # Unable to update https://github.com/[...]
392422 # can't checkout from 'https://github.com/[...]]': you are in the offline mode (--offline)
393-
394423 for (git_repo , rev ), src in git_sources .items ():
395424 crate_name = src ['crate' ][0 ]
396- src_dir = src ['finalpath' ]
397- if os .path .dirname (src_dir ) == self .vendor_dir :
398- # Non-workspace sources are in vendor_dir
399- git_branch = self ._get_crate_git_repo_branch (crate_name )
400- template = CONFIG_TOML_SOURCE_GIT_BRANCH if git_branch else CONFIG_TOML_SOURCE_GIT
401- self .log .debug (f"Writing config.toml entry for git repo: { git_repo } branch { git_branch } , rev { rev } " )
402- write_file (
403- config_toml ,
404- template .format (url = git_repo , rev = rev , branch = git_branch ),
405- append = True
406- )
407- else :
408- self .log .debug ("Writing config.toml entry for git repo: %s rev %s" , git_repo , rev )
409- # Workspace sources stay in their own separate folder.
410- # We cannot have a `directory = "<dir>"` entry where a folder containing a workspace is inside
411- write_file (
412- config_toml ,
413- CONFIG_TOML_SOURCE_GIT_WORKSPACE .format (url = git_repo , rev = rev , workspace_dir = src_dir ),
414- append = True
415- )
425+ git_branch = self ._get_crate_git_repo_branch (crate_name )
426+ template = CONFIG_TOML_SOURCE_GIT_BRANCH if git_branch else CONFIG_TOML_SOURCE_GIT
427+ self .log .debug (f"Writing config.toml entry for git repo: { git_repo } branch { git_branch } , rev { rev } " )
428+ write_file (config_toml , template .format (url = git_repo , rev = rev , branch = git_branch ), append = True )
416429
417430 def _get_crate_git_repo_branch (self , crate_name ):
418431 """
@@ -421,7 +434,7 @@ def _get_crate_git_repo_branch(self, crate_name):
421434 """
422435 # Search all Cargo.toml files in main source and vendored crates
423436 cargo_toml_files = []
424- for cargo_source_dir in (self .src [0 ]['finalpath' ], self .vendor_dir , self . git_vendor_dir ):
437+ for cargo_source_dir in (self .src [0 ]['finalpath' ], self .vendor_dir ):
425438 cargo_toml_files .extend (glob (os .path .join (cargo_source_dir , '**' , 'Cargo.toml' ), recursive = True ))
426439
427440 if not cargo_toml_files :
@@ -561,7 +574,13 @@ def generate_crate_list(sourcedir):
561574 qs = parse_qs (parsed_url .query )
562575 rev_qs = qs .get ('rev' , [None ])[0 ]
563576 if rev_qs is not None and rev_qs != rev :
564- raise ValueError (f"Found different revision in query of URL { url } : { rev_qs } (expected: { rev } )" )
577+ # It is not an error if one is the short version of the other
578+ # E.g. https://github.com/astral/lsp-types.git?rev=3512a9f#3512a9f33eadc5402cfab1b8f7340824c8ca1439
579+ if (rev_qs and rev .startswith (rev_qs )) or rev_qs .startswith (rev ):
580+ # The query value is the relevant one if both are present
581+ rev = rev_qs
582+ else :
583+ raise ValueError (f"Found different revision in query of URL { url } : { rev_qs } (expected: { rev } )" )
565584 crates .append ((name , version , url , rev ))
566585 return app_in_cratesio , crates , other_crates
567586
0 commit comments