@@ -3,14 +3,18 @@ use crate::core::SourceId;
33use crate :: core:: { GitReference , Package , Workspace } ;
44use crate :: ops;
55use crate :: sources:: path:: PathSource ;
6- use crate :: sources:: PathEntry ;
6+ use crate :: sources:: RegistrySource ;
77use crate :: sources:: SourceConfigMap ;
88use crate :: sources:: CRATES_IO_REGISTRY ;
99use crate :: util:: cache_lock:: CacheLockMode ;
1010use crate :: util:: { try_canonicalize, CargoResult , GlobalContext } ;
11+
1112use anyhow:: { bail, Context as _} ;
1213use cargo_util:: { paths, Sha256 } ;
14+ use cargo_util_schemas:: core:: SourceKind ;
1315use serde:: Serialize ;
16+ use walkdir:: WalkDir ;
17+
1418use std:: collections:: HashSet ;
1519use std:: collections:: { BTreeMap , BTreeSet , HashMap } ;
1620use std:: ffi:: OsStr ;
@@ -35,7 +39,7 @@ pub fn vendor(ws: &Workspace<'_>, opts: &VendorOptions<'_>) -> CargoResult<()> {
3539 extra_workspaces. push ( ws) ;
3640 }
3741 let workspaces = extra_workspaces. iter ( ) . chain ( Some ( ws) ) . collect :: < Vec < _ > > ( ) ;
38- let _lock = gctx. acquire_package_cache_lock ( CacheLockMode :: MutateExclusive ) ?;
42+ let _lock = gctx. acquire_package_cache_lock ( CacheLockMode :: DownloadExclusive ) ?;
3943 let vendor_config = sync ( gctx, & workspaces, opts) . context ( "failed to sync" ) ?;
4044
4145 if gctx. shell ( ) . verbosity ( ) != Verbosity :: Quiet {
@@ -86,9 +90,16 @@ struct SourceReplacementCache<'gctx> {
8690}
8791
8892impl SourceReplacementCache < ' _ > {
89- fn new ( gctx : & GlobalContext ) -> CargoResult < SourceReplacementCache < ' _ > > {
93+ fn new (
94+ gctx : & GlobalContext ,
95+ respect_source_config : bool ,
96+ ) -> CargoResult < SourceReplacementCache < ' _ > > {
9097 Ok ( SourceReplacementCache {
91- map : SourceConfigMap :: new ( gctx) ?,
98+ map : if respect_source_config {
99+ SourceConfigMap :: new ( gctx)
100+ } else {
101+ SourceConfigMap :: empty ( gctx)
102+ } ?,
92103 cache : Default :: default ( ) ,
93104 } )
94105 }
@@ -111,14 +122,14 @@ fn sync(
111122 opts : & VendorOptions < ' _ > ,
112123) -> CargoResult < VendorConfig > {
113124 let dry_run = false ;
114- let canonical_destination = try_canonicalize ( opts. destination ) ;
115- let canonical_destination = canonical_destination . as_deref ( ) . unwrap_or ( opts. destination ) ;
116- let dest_dir_already_exists = canonical_destination . exists ( ) ;
125+ let vendor_dir = try_canonicalize ( opts. destination ) ;
126+ let vendor_dir = vendor_dir . as_deref ( ) . unwrap_or ( opts. destination ) ;
127+ let vendor_dir_already_exists = vendor_dir . exists ( ) ;
117128
118- paths:: create_dir_all ( & canonical_destination ) ?;
129+ paths:: create_dir_all ( & vendor_dir ) ?;
119130 let mut to_remove = HashSet :: new ( ) ;
120131 if !opts. no_delete {
121- for entry in canonical_destination . read_dir ( ) ? {
132+ for entry in vendor_dir . read_dir ( ) ? {
122133 let entry = entry?;
123134 if !entry
124135 . file_name ( )
@@ -130,19 +141,13 @@ fn sync(
130141 }
131142 }
132143
133- let mut source_replacement_cache = SourceReplacementCache :: new ( gctx) ?;
134-
135- // First up attempt to work around rust-lang/cargo#5956. Apparently build
136- // artifacts sprout up in Cargo's global cache for whatever reason, although
137- // it's unsure what tool is causing these issues at this time. For now we
138- // apply a heavy-hammer approach which is to delete Cargo's unpacked version
139- // of each crate to start off with. After we do this we'll re-resolve and
140- // redownload again, which should trigger Cargo to re-extract all the
141- // crates.
142- //
143- // Note that errors are largely ignored here as this is a best-effort
144- // attempt. If anything fails here we basically just move on to the next
145- // crate to work with.
144+ let mut source_replacement_cache =
145+ SourceReplacementCache :: new ( gctx, opts. respect_source_config ) ?;
146+
147+ let mut checksums = HashMap :: new ( ) ;
148+ let mut ids = BTreeMap :: new ( ) ;
149+
150+ // Let's download all crates and start storing internal tables about them.
146151 for ws in workspaces {
147152 let ( packages, resolve) = ops:: resolve_ws ( ws, dry_run)
148153 . with_context ( || format ! ( "failed to load lockfile for {}" , ws. root( ) . display( ) ) ) ?;
@@ -152,54 +157,19 @@ fn sync(
152157 . with_context ( || format ! ( "failed to download packages for {}" , ws. root( ) . display( ) ) ) ?;
153158
154159 for pkg in resolve. iter ( ) {
155- let sid = if opts. respect_source_config {
156- source_replacement_cache. get ( pkg. source_id ( ) ) ?
157- } else {
158- pkg. source_id ( )
159- } ;
160+ let sid = source_replacement_cache. get ( pkg. source_id ( ) ) ?;
160161
161- // Don't delete actual source code!
162+ // Don't vendor path crates since they're already in the repository
162163 if sid. is_path ( ) {
164+ // And don't delete actual source code!
163165 if let Ok ( path) = sid. url ( ) . to_file_path ( ) {
164166 if let Ok ( path) = try_canonicalize ( path) {
165167 to_remove. remove ( & path) ;
166168 }
167169 }
168170 continue ;
169171 }
170- if sid. is_git ( ) {
171- continue ;
172- }
173172
174- // Only delete sources that are safe to delete, i.e. they are caches.
175- if sid. is_registry ( ) {
176- if let Ok ( pkg) = packages. get_one ( pkg) {
177- drop ( fs:: remove_dir_all ( pkg. root ( ) ) ) ;
178- }
179- continue ;
180- }
181- }
182- }
183-
184- let mut checksums = HashMap :: new ( ) ;
185- let mut ids = BTreeMap :: new ( ) ;
186-
187- // Next up let's actually download all crates and start storing internal
188- // tables about them.
189- for ws in workspaces {
190- let ( packages, resolve) = ops:: resolve_ws ( ws, dry_run)
191- . with_context ( || format ! ( "failed to load lockfile for {}" , ws. root( ) . display( ) ) ) ?;
192-
193- packages
194- . get_many ( resolve. iter ( ) )
195- . with_context ( || format ! ( "failed to download packages for {}" , ws. root( ) . display( ) ) ) ?;
196-
197- for pkg in resolve. iter ( ) {
198- // No need to vendor path crates since they're already in the
199- // repository
200- if pkg. source_id ( ) . is_path ( ) {
201- continue ;
202- }
203173 ids. insert (
204174 pkg,
205175 packages
@@ -247,7 +217,7 @@ fn sync(
247217 } ;
248218
249219 sources. insert ( id. source_id ( ) ) ;
250- let dst = canonical_destination . join ( & dst_name) ;
220+ let dst = vendor_dir . join ( & dst_name) ;
251221 to_remove. remove ( & dst) ;
252222 let cksum = dst. join ( ".cargo-checksum.json" ) ;
253223 // Registries are the only immutable sources,
@@ -263,16 +233,89 @@ fn sync(
263233 ) ?;
264234
265235 let _ = fs:: remove_dir_all ( & dst) ;
266- let pathsource = PathSource :: new ( src, id. source_id ( ) , gctx) ;
267- let paths = pathsource. list_files ( pkg) ?;
268- let mut map = BTreeMap :: new ( ) ;
269- cp_sources ( pkg, src, & paths, & dst, & mut map, & mut tmp_buf, gctx)
270- . with_context ( || format ! ( "failed to copy over vendored sources for: {}" , id) ) ?;
236+
237+ let mut file_cksums = BTreeMap :: new ( ) ;
238+
239+ // Need this mapping anyway because we will directly consult registry sources,
240+ // otherwise builtin source replacement (sparse registry) won't be respected.
241+ let sid = source_replacement_cache. get ( id. source_id ( ) ) ?;
242+
243+ if sid. is_registry ( ) {
244+ // To keep the unpacked source from registry in a pristine state,
245+ // we'll do a direct extraction into the vendor directory.
246+ let registry = match sid. kind ( ) {
247+ SourceKind :: Registry | SourceKind :: SparseRegistry => {
248+ RegistrySource :: remote ( sid, & Default :: default ( ) , gctx) ?
249+ }
250+ SourceKind :: LocalRegistry => {
251+ let path = sid. url ( ) . to_file_path ( ) . expect ( "local path" ) ;
252+ RegistrySource :: local ( sid, & path, & Default :: default ( ) , gctx)
253+ }
254+ _ => unreachable ! ( "not registry source: {sid}" ) ,
255+ } ;
256+
257+ let walkdir = |root| {
258+ WalkDir :: new ( root)
259+ . into_iter ( )
260+ // It is safe to skip errors,
261+ // since we'll hit them during copying/reading later anyway.
262+ . filter_map ( |e| e. ok ( ) )
263+ // There should be no symlink in tarballs on crates.io,
264+ // but might be wrong for local registries.
265+ // Hence here be conservative and include symlinks.
266+ . filter ( |e| e. file_type ( ) . is_file ( ) || e. file_type ( ) . is_symlink ( ) )
267+ } ;
268+ let mut compute_file_cksums = |root| {
269+ for e in walkdir ( root) {
270+ let path = e. path ( ) ;
271+ let relative = path. strip_prefix ( & dst) . unwrap ( ) ;
272+ let cksum = Sha256 :: new ( )
273+ . update_path ( path)
274+ . map ( Sha256 :: finish_hex)
275+ . with_context ( || format ! ( "failed to checksum `{}`" , path. display( ) ) ) ?;
276+ file_cksums. insert ( relative. to_str ( ) . unwrap ( ) . replace ( "\\ " , "/" ) , cksum) ;
277+ }
278+ Ok :: < _ , anyhow:: Error > ( ( ) )
279+ } ;
280+ if dir_has_version_suffix {
281+ registry. unpack_package_in ( id, & vendor_dir, & vendor_this) ?;
282+ compute_file_cksums ( & dst) ?;
283+ } else {
284+ // Due to the extra sanity check in registry unpack
285+ // (ensure it contain only one top-level directory with name `pkg-version`),
286+ // we can only unpack a directory with version suffix,
287+ // and move it to the no suffix directory.
288+ let staging_dir = tempfile:: Builder :: new ( )
289+ . prefix ( ".vendor-staging" )
290+ . tempdir_in ( vendor_dir) ?;
291+ let unpacked_src =
292+ registry. unpack_package_in ( id, staging_dir. path ( ) , & vendor_this) ?;
293+ if let Err ( e) = fs:: rename ( & unpacked_src, & dst) {
294+ // This fallback is mainly for Windows 10 versions earlier than 1607.
295+ // The destination of `fs::rename` can't be a diretory in older versions.
296+ // Can be removed once the minimal supported Windows version gets bumped.
297+ tracing:: warn!( "failed to `mv {unpacked_src:?} {dst:?}`: {e}" ) ;
298+ let paths: Vec < _ > = walkdir ( & unpacked_src) . map ( |e| e. into_path ( ) ) . collect ( ) ;
299+ cp_sources ( pkg, src, & paths, & dst, & mut file_cksums, & mut tmp_buf, gctx)
300+ . with_context ( || format ! ( "failed to copy vendored sources for {id}" ) ) ?;
301+ } else {
302+ compute_file_cksums ( & dst) ?;
303+ }
304+ }
305+ } else {
306+ let paths = PathSource :: new ( src, sid, gctx)
307+ . list_files ( pkg) ?
308+ . into_iter ( )
309+ . map ( |p| p. into_path_buf ( ) )
310+ . collect :: < Vec < _ > > ( ) ;
311+ cp_sources ( pkg, src, & paths, & dst, & mut file_cksums, & mut tmp_buf, gctx)
312+ . with_context ( || format ! ( "failed to copy vendored sources for {id}" ) ) ?;
313+ }
271314
272315 // Finally, emit the metadata about this package
273316 let json = serde_json:: json!( {
274317 "package" : checksums. get( id) ,
275- "files" : map ,
318+ "files" : file_cksums ,
276319 } ) ;
277320
278321 paths:: write ( & cksum, json. to_string ( ) ) ?;
@@ -347,9 +390,9 @@ fn sync(
347390 directory : opts. destination . to_string_lossy ( ) . replace ( "\\ " , "/" ) ,
348391 } ,
349392 ) ;
350- } else if !dest_dir_already_exists {
393+ } else if !vendor_dir_already_exists {
351394 // Nothing to vendor. Remove the destination dir we've just created.
352- paths:: remove_dir ( canonical_destination ) ?;
395+ paths:: remove_dir ( vendor_dir ) ?;
353396 }
354397
355398 Ok ( VendorConfig { source : config } )
@@ -358,36 +401,18 @@ fn sync(
358401fn cp_sources (
359402 pkg : & Package ,
360403 src : & Path ,
361- paths : & [ PathEntry ] ,
404+ paths : & [ PathBuf ] ,
362405 dst : & Path ,
363406 cksums : & mut BTreeMap < String , String > ,
364407 tmp_buf : & mut [ u8 ] ,
365408 gctx : & GlobalContext ,
366409) -> CargoResult < ( ) > {
367410 for p in paths {
368- let p = p. as_ref ( ) ;
369411 let relative = p. strip_prefix ( & src) . unwrap ( ) ;
370412
371- match relative. to_str ( ) {
372- // Skip git config files as they're not relevant to builds most of
373- // the time and if we respect them (e.g. in git) then it'll
374- // probably mess with the checksums when a vendor dir is checked
375- // into someone else's source control
376- Some ( ".gitattributes" | ".gitignore" | ".git" ) => continue ,
377-
378- // Temporary Cargo files
379- Some ( ".cargo-ok" ) => continue ,
380-
381- // Skip patch-style orig/rej files. Published crates on crates.io
382- // have `Cargo.toml.orig` which we don't want to use here and
383- // otherwise these are rarely used as part of the build process.
384- Some ( filename) => {
385- if filename. ends_with ( ".orig" ) || filename. ends_with ( ".rej" ) {
386- continue ;
387- }
388- }
389- _ => { }
390- } ;
413+ if !vendor_this ( relative) {
414+ continue ;
415+ }
391416
392417 // Join pathname components individually to make sure that the joined
393418 // path uses the correct directory separators everywhere, since
@@ -417,7 +442,7 @@ fn cp_sources(
417442 & dst,
418443 & mut dst_opts,
419444 & mut contents. as_bytes ( ) ,
420- "Generated Cargo.toml" ,
445+ Path :: new ( "Generated Cargo.toml" ) ,
421446 tmp_buf,
422447 ) ?
423448 } else {
@@ -430,13 +455,7 @@ fn cp_sources(
430455 . with_context ( || format ! ( "failed to stat {:?}" , p) ) ?;
431456 dst_opts. mode ( src_metadata. mode ( ) ) ;
432457 }
433- copy_and_checksum (
434- & dst,
435- & mut dst_opts,
436- & mut src,
437- & p. display ( ) . to_string ( ) ,
438- tmp_buf,
439- ) ?
458+ copy_and_checksum ( & dst, & mut dst_opts, & mut src, & p, tmp_buf) ?
440459 } ;
441460
442461 cksums. insert ( relative. to_str ( ) . unwrap ( ) . replace ( "\\ " , "/" ) , cksum) ;
@@ -562,7 +581,7 @@ fn copy_and_checksum<T: Read>(
562581 dst_path : & Path ,
563582 dst_opts : & mut OpenOptions ,
564583 contents : & mut T ,
565- contents_path : & str ,
584+ contents_path : & Path ,
566585 buf : & mut [ u8 ] ,
567586) -> CargoResult < String > {
568587 let mut dst = dst_opts
@@ -584,3 +603,25 @@ fn copy_and_checksum<T: Read>(
584603 . with_context ( || format ! ( "failed to write to {:?}" , dst_path) ) ?;
585604 }
586605}
606+
607+ /// Filters files we want to vendor.
608+ ///
609+ /// `relative` is a path relative to the package root.
610+ fn vendor_this ( relative : & Path ) -> bool {
611+ match relative. to_str ( ) {
612+ // Skip git config files as they're not relevant to builds most of
613+ // the time and if we respect them (e.g. in git) then it'll
614+ // probably mess with the checksums when a vendor dir is checked
615+ // into someone else's source control
616+ Some ( ".gitattributes" | ".gitignore" | ".git" ) => false ,
617+
618+ // Temporary Cargo files
619+ Some ( ".cargo-ok" ) => false ,
620+
621+ // Skip patch-style orig/rej files. Published crates on crates.io
622+ // have `Cargo.toml.orig` which we don't want to use here and
623+ // otherwise these are rarely used as part of the build process.
624+ Some ( p) if p. ends_with ( ".orig" ) || p. ends_with ( ".rej" ) => false ,
625+ _ => true ,
626+ }
627+ }
0 commit comments