11//! Helpers to gather the VCS information for `cargo package`.
2+
23use crate :: core:: { Package , Workspace } ;
34use crate :: ops:: PackageOpts ;
45use crate :: sources:: PathEntry ;
@@ -7,11 +8,11 @@ use anyhow::Context;
78use cargo_util:: paths;
89use gix:: bstr:: ByteSlice ;
910use gix:: dir:: walk:: EmissionMode ;
11+ use gix:: dirwalk:: Options ;
1012use gix:: index:: entry:: Mode ;
1113use gix:: status:: tree_index:: TrackRenames ;
1214use gix:: worktree:: stack:: state:: ignore:: Source ;
1315use serde:: Serialize ;
14- use std:: collections:: HashSet ;
1516use std:: path:: { Path , PathBuf } ;
1617use tracing:: debug;
1718
@@ -47,7 +48,7 @@ pub fn check_repo_state(
4748 opts : & PackageOpts < ' _ > ,
4849) -> CargoResult < Option < VcsInfo > > {
4950 let gctx = ws. gctx ( ) ;
50- let Ok ( repo) = gix:: discover ( p. root ( ) ) else {
51+ let Ok ( mut repo) = gix:: discover ( p. root ( ) ) else {
5152 gctx. shell ( ) . verbose ( |shell| {
5253 shell. warn ( format_args ! (
5354 "no (git) VCS found for `{}`" ,
@@ -115,7 +116,7 @@ pub fn check_repo_state(
115116 path. display( ) ,
116117 workdir. display( ) ,
117118 ) ;
118- let Some ( git) = git ( ws, p, src_files, & repo, & opts) ? else {
119+ let Some ( git) = git ( ws, p, src_files, & mut repo, & opts) ? else {
119120 // If the git repo lacks essential field like `sha1`, and since this field exists from the beginning,
120121 // then don't generate the corresponding file in order to maintain consistency with past behavior.
121122 return Ok ( None ) ;
@@ -181,31 +182,32 @@ fn git(
181182 ws : & Workspace < ' _ > ,
182183 pkg : & Package ,
183184 src_files : & [ PathEntry ] ,
184- repo : & gix:: Repository ,
185+ repo : & mut gix:: Repository ,
185186 opts : & PackageOpts < ' _ > ,
186187) -> CargoResult < Option < GitVcsInfo > > {
188+ {
189+ let mut config = repo. config_snapshot_mut ( ) ;
190+ // This currently is only a very minor speedup for the biggest repositories,
191+ // but might trigger creating many threads.
192+ config. set_value ( & gix:: config:: tree:: Index :: THREADS , "false" ) ?;
193+ }
187194 // This is a collection of any dirty or untracked files. This covers:
188195 // - new/modified/deleted/renamed/type change (index or worktree)
189196 // - untracked files (which are "new" worktree files)
190197 // - ignored (in case the user has an `include` directive that
191198 // conflicts with .gitignore).
192- let ( mut dirty_files, mut dirty_files_outside_package_root ) = ( Vec :: new ( ) , Vec :: new ( ) ) ;
199+ let mut dirty_files = Vec :: new ( ) ;
193200 let workdir = repo. workdir ( ) . unwrap ( ) ;
194201 collect_statuses (
195202 repo,
196203 workdir,
197204 relative_package_root ( repo, pkg. root ( ) ) . as_deref ( ) ,
198205 & mut dirty_files,
199- & mut dirty_files_outside_package_root,
200206 ) ?;
201207
202208 // Include each submodule so that the error message can provide
203209 // specifically *which* files in a submodule are modified.
204- status_submodules (
205- repo,
206- & mut dirty_files,
207- & mut dirty_files_outside_package_root,
208- ) ?;
210+ status_submodules ( repo, & mut dirty_files) ?;
209211
210212 // Find the intersection of dirty in git, and the src_files that would
211213 // be packaged. This is a lazy n^2 check, but seems fine with
@@ -230,10 +232,7 @@ fn git(
230232 }
231233 } )
232234 . map ( |p| p. as_ref ( ) )
233- . chain (
234- dirty_files_outside_pkg_root ( ws, pkg, & dirty_files_outside_package_root, src_files) ?
235- . iter ( ) ,
236- )
235+ . chain ( dirty_files_outside_pkg_root_orig ( ws, pkg, repo, src_files) ?. iter ( ) )
237236 . map ( |path| {
238237 pathdiff:: diff_paths ( path, cwd)
239238 . as_ref ( )
@@ -271,25 +270,17 @@ fn collect_statuses(
271270 workdir : & Path ,
272271 relative_package_root : Option < & Path > ,
273272 dirty_files : & mut Vec < PathBuf > ,
274- dirty_files_outside_package_root : & mut Vec < PathBuf > ,
275273) -> CargoResult < ( ) > {
276274 let statuses = repo
277275 . status ( gix:: progress:: Discard ) ?
278- . dirwalk_options ( |opts| {
279- opts. emit_untracked ( gix:: dir:: walk:: EmissionMode :: Matching )
280- // Also pick up ignored files or whole directories
281- // to specifically catch overzealously ignored source files.
282- // Later we will match these dirs by prefix, which is why collapsing
283- // them is desirable here.
284- . emit_ignored ( Some ( EmissionMode :: CollapseDirectory ) )
285- . emit_tracked ( false )
286- . recurse_repositories ( false )
287- . symlinks_to_directories_are_ignored_like_directories ( true )
288- . emit_empty_directories ( false )
289- } )
276+ . dirwalk_options ( configure_dirwalk)
290277 . tree_index_track_renames ( TrackRenames :: Disabled )
291278 . index_worktree_submodules ( None )
292- . into_iter ( None /* pathspec patterns */ )
279+ . into_iter (
280+ relative_package_root. map ( |rela_pkg_root| {
281+ gix:: path:: into_bstr ( rela_pkg_root) . into_owned ( )
282+ } ) , /* pathspec patterns */
283+ )
293284 . with_context ( || {
294285 format ! (
295286 "failed to begin git status for repo {}" ,
@@ -307,11 +298,6 @@ fn collect_statuses(
307298
308299 let rel_path = gix:: path:: from_bstr ( status. location ( ) ) ;
309300 let path = workdir. join ( & rel_path) ;
310- if relative_package_root. is_some_and ( |pkg_root| !rel_path. starts_with ( pkg_root) ) {
311- dirty_files_outside_package_root. push ( path) ;
312- continue ;
313- }
314-
315301 // It is OK to include Cargo.lock even if it is ignored.
316302 if path. ends_with ( "Cargo.lock" )
317303 && matches ! (
@@ -330,11 +316,7 @@ fn collect_statuses(
330316}
331317
332318/// Helper to collect dirty statuses while recursing into submodules.
333- fn status_submodules (
334- repo : & gix:: Repository ,
335- dirty_files : & mut Vec < PathBuf > ,
336- dirty_files_outside_package_root : & mut Vec < PathBuf > ,
337- ) -> CargoResult < ( ) > {
319+ fn status_submodules ( repo : & gix:: Repository , dirty_files : & mut Vec < PathBuf > ) -> CargoResult < ( ) > {
338320 let Some ( submodules) = repo. submodules ( ) ? else {
339321 return Ok ( ( ) ) ;
340322 } ;
@@ -345,14 +327,8 @@ fn status_submodules(
345327 let Some ( workdir) = sub_repo. workdir ( ) else {
346328 continue ;
347329 } ;
348- status_submodules ( & sub_repo, dirty_files, dirty_files_outside_package_root) ?;
349- collect_statuses (
350- & sub_repo,
351- workdir,
352- None ,
353- dirty_files,
354- dirty_files_outside_package_root,
355- ) ?;
330+ status_submodules ( & sub_repo, dirty_files) ?;
331+ collect_statuses ( & sub_repo, workdir, None , dirty_files) ?;
356332 }
357333 }
358334 Ok ( ( ) )
@@ -374,27 +350,29 @@ fn relative_package_root(repo: &gix::Repository, pkg_root: &Path) -> Option<Path
374350/// This currently looks at
375351///
376352/// * `package.readme` and `package.license-file` pointing to paths outside package root
377- /// * symlinks targets reside outside package root
353+ /// * symlinks targets residing outside package root
378354/// * Any change in the root workspace manifest, regardless of what has changed.
379355///
380356/// This is required because those paths may link to a file outside the
381357/// current package root, but still under the git workdir, affecting the
382358/// final packaged `.crate` file.
383- fn dirty_files_outside_pkg_root (
359+ fn dirty_files_outside_pkg_root_orig (
384360 ws : & Workspace < ' _ > ,
385361 pkg : & Package ,
386- dirty_files_outside_of_package_root : & [ PathBuf ] ,
362+ repo : & gix :: Repository ,
387363 src_files : & [ PathEntry ] ,
388- ) -> CargoResult < HashSet < PathBuf > > {
364+ ) -> CargoResult < Vec < PathBuf > > {
389365 let pkg_root = pkg. root ( ) ;
366+ let workdir = repo. workdir ( ) . unwrap ( ) ;
367+
390368 let meta = pkg. manifest ( ) . metadata ( ) ;
391369 let metadata_paths: Vec < _ > = [ & meta. license_file , & meta. readme ]
392370 . into_iter ( )
393371 . filter_map ( |p| p. as_deref ( ) )
394372 . map ( |path| paths:: normalize_path ( & pkg_root. join ( path) ) )
395373 . collect ( ) ;
396374
397- let dirty_files = src_files
375+ let linked_files_outside_package_root : Vec < _ > = src_files
398376 . iter ( )
399377 . filter ( |p| p. is_symlink_or_under_symlink ( ) )
400378 . map ( |p| p. as_ref ( ) . as_path ( ) )
@@ -403,19 +381,58 @@ fn dirty_files_outside_pkg_root(
403381 // If inside package root. Don't bother checking git status.
404382 . filter ( |p| paths:: strip_prefix_canonical ( p, pkg_root) . is_err ( ) )
405383 // Handle files outside package root but under git workdir,
406- . filter_map ( |src_file| {
407- let canon_src_path = gix:: path:: realpath_opts (
408- src_file,
409- ws. gctx ( ) . cwd ( ) ,
410- gix:: path:: realpath:: MAX_SYMLINKS ,
384+ . filter_map ( |p| paths:: strip_prefix_canonical ( p, workdir) . ok ( ) )
385+ . collect ( ) ;
386+
387+ if linked_files_outside_package_root. is_empty ( ) {
388+ return Ok ( Vec :: new ( ) ) ;
389+ }
390+
391+ let statuses = repo
392+ . status ( gix:: progress:: Discard ) ?
393+ . dirwalk_options ( configure_dirwalk)
394+ // Limit the amount of threads for used for the worktree status, as the pathspec will
395+ // prevent most paths from being visited anyway there is not much work.
396+ . index_worktree_options_mut ( |opts| opts. thread_limit = Some ( 1 ) )
397+ . tree_index_track_renames ( TrackRenames :: Disabled )
398+ . index_worktree_submodules ( None )
399+ . into_iter (
400+ linked_files_outside_package_root
401+ . into_iter ( )
402+ . map ( |p| gix:: path:: into_bstr ( p) . into_owned ( ) ) ,
403+ )
404+ . with_context ( || {
405+ format ! (
406+ "failed to begin git status for outfor repo {}" ,
407+ repo. path( ) . display( )
411408 )
412- . unwrap_or_else ( |_| src_file . to_owned ( ) ) ;
409+ } ) ? ;
413410
414- dirty_files_outside_of_package_root
415- . iter ( )
416- . any ( |p| canon_src_path. starts_with ( p) )
417- . then_some ( canon_src_path)
418- } )
419- . collect ( ) ;
411+ let mut dirty_files = Vec :: new ( ) ;
412+ for status in statuses {
413+ let status = status. with_context ( || {
414+ format ! (
415+ "failed to retrieve git status from repo {}" ,
416+ repo. path( ) . display( )
417+ )
418+ } ) ?;
419+
420+ let rel_path = gix:: path:: from_bstr ( status. location ( ) ) ;
421+ let path = workdir. join ( & rel_path) ;
422+ dirty_files. push ( path) ;
423+ }
420424 Ok ( dirty_files)
421425}
426+
427+ fn configure_dirwalk ( opts : Options ) -> Options {
428+ opts. emit_untracked ( gix:: dir:: walk:: EmissionMode :: Matching )
429+ // Also pick up ignored files or whole directories
430+ // to specifically catch overzealously ignored source files.
431+ // Later we will match these dirs by prefix, which is why collapsing
432+ // them is desirable here.
433+ . emit_ignored ( Some ( EmissionMode :: CollapseDirectory ) )
434+ . emit_tracked ( false )
435+ . recurse_repositories ( false )
436+ . symlinks_to_directories_are_ignored_like_directories ( true )
437+ . emit_empty_directories ( false )
438+ }
0 commit comments