@@ -41,24 +41,25 @@ impl fmt::Debug for UnitHash {
4141 }
4242}
4343
44- /// The `Metadata` is a hash used to make unique file names for each unit in a
45- /// build. It is also used for symbol mangling .
44+ /// [ `Metadata`] tracks several [`UnitHash`]s, including
45+ /// [`Metadata::unit_id`], [`Metadata::c_metadata`], and [`Metadata::c_extra_filename`] .
4646///
47- /// For example:
47+ /// We use a hash because it is an easy way to guarantee
48+ /// that all the inputs can be converted to a valid path.
49+ ///
50+ /// [`Metadata::unit_id`] is used to uniquely identify a unit in the build graph.
51+ /// This serves as a similar role as [`Metadata::c_extra_filename`] in that it uniquely identifies output
52+ /// on the filesystem except that its always present.
53+ ///
54+ /// [`Metadata::c_extra_filename`] is needed for cases like:
4855/// - A project may depend on crate `A` and crate `B`, so the package name must be in the file name.
4956/// - Similarly a project may depend on two versions of `A`, so the version must be in the file name.
5057///
51- /// In general this must include all things that need to be distinguished in different parts of
58+ /// This also acts as the main layer of caching provided by Cargo
59+ /// so this must include all things that need to be distinguished in different parts of
5260/// the same build. This is absolutely required or we override things before
5361/// we get chance to use them.
5462///
55- /// It is also used for symbol mangling, because if you have two versions of
56- /// the same crate linked together, their symbols need to be differentiated.
57- ///
58- /// We use a hash because it is an easy way to guarantee
59- /// that all the inputs can be converted to a valid path.
60- ///
61- /// This also acts as the main layer of caching provided by Cargo.
6263/// For example, we want to cache `cargo build` and `cargo doc` separately, so that running one
6364/// does not invalidate the artifacts for the other. We do this by including [`CompileMode`] in the
6465/// hash, thus the artifacts go in different folders and do not override each other.
@@ -73,37 +74,41 @@ impl fmt::Debug for UnitHash {
7374/// more space than needed. This makes not including something in `Metadata`
7475/// a form of cache invalidation.
7576///
76- /// You should also avoid anything that would interfere with reproducible
77+ /// Note that the `Fingerprint` is in charge of tracking everything needed to determine if a
78+ /// rebuild is needed.
79+ ///
80+ /// [`Metadata::c_metadata`] is used for symbol mangling, because if you have two versions of
81+ /// the same crate linked together, their symbols need to be differentiated.
82+ ///
83+ /// You should avoid anything that would interfere with reproducible
7784/// builds. For example, *any* absolute path should be avoided. This is one
78- /// reason that `RUSTFLAGS` is not in `Metadata` , because it often has
85+ /// reason that `RUSTFLAGS` is not in [ `Metadata::c_metadata`] , because it often has
7986/// absolute paths (like `--remap-path-prefix` which is fundamentally used for
8087/// reproducible builds and has absolute paths in it). Also, in some cases the
8188/// mangled symbols need to be stable between different builds with different
8289/// settings. For example, profile-guided optimizations need to swap
8390/// `RUSTFLAGS` between runs, but needs to keep the same symbol names.
84- ///
85- /// Note that the `Fingerprint` is in charge of tracking everything needed to determine if a
86- /// rebuild is needed.
8791#[ derive( Copy , Clone , Debug ) ]
8892pub struct Metadata {
89- meta_hash : UnitHash ,
90- use_extra_filename : bool ,
93+ unit_id : UnitHash ,
94+ c_metadata : UnitHash ,
95+ c_extra_filename : Option < UnitHash > ,
9196}
9297
9398impl Metadata {
9499 /// A hash to identify a given [`Unit`] in the build graph
95100 pub fn unit_id ( & self ) -> UnitHash {
96- self . meta_hash
101+ self . unit_id
97102 }
98103
99104 /// A hash to add to symbol naming through `-C metadata`
100105 pub fn c_metadata ( & self ) -> UnitHash {
101- self . meta_hash
106+ self . c_metadata
102107 }
103108
104109 /// A hash to add to file names through `-C extra-filename`
105110 pub fn c_extra_filename ( & self ) -> Option < UnitHash > {
106- self . use_extra_filename . then_some ( self . meta_hash )
111+ self . c_extra_filename
107112 }
108113}
109114
@@ -588,56 +593,54 @@ fn compute_metadata(
588593 metas : & mut HashMap < Unit , Metadata > ,
589594) -> Metadata {
590595 let bcx = & build_runner. bcx ;
591- let mut hasher = StableHasher :: new ( ) ;
596+ let deps_metadata = build_runner
597+ . unit_deps ( unit)
598+ . iter ( )
599+ . map ( |dep| * metadata_of ( & dep. unit , build_runner, metas) )
600+ . collect :: < Vec < _ > > ( ) ;
601+ let use_extra_filename = use_extra_filename ( bcx, unit) ;
592602
593- METADATA_VERSION . hash ( & mut hasher) ;
603+ let mut shared_hasher = StableHasher :: new ( ) ;
604+
605+ METADATA_VERSION . hash ( & mut shared_hasher) ;
594606
595607 // Unique metadata per (name, source, version) triple. This'll allow us
596608 // to pull crates from anywhere without worrying about conflicts.
597609 unit. pkg
598610 . package_id ( )
599611 . stable_hash ( bcx. ws . root ( ) )
600- . hash ( & mut hasher ) ;
612+ . hash ( & mut shared_hasher ) ;
601613
602614 // Also mix in enabled features to our metadata. This'll ensure that
603615 // when changing feature sets each lib is separately cached.
604- unit. features . hash ( & mut hasher) ;
605-
606- // Mix in the target-metadata of all the dependencies of this target.
607- let mut deps_metadata = build_runner
608- . unit_deps ( unit)
609- . iter ( )
610- . map ( |dep| metadata_of ( & dep. unit , build_runner, metas) . meta_hash )
611- . collect :: < Vec < _ > > ( ) ;
612- deps_metadata. sort ( ) ;
613- deps_metadata. hash ( & mut hasher) ;
616+ unit. features . hash ( & mut shared_hasher) ;
614617
615618 // Throw in the profile we're compiling with. This helps caching
616619 // `panic=abort` and `panic=unwind` artifacts, additionally with various
617620 // settings like debuginfo and whatnot.
618- unit. profile . hash ( & mut hasher ) ;
619- unit. mode . hash ( & mut hasher ) ;
620- build_runner. lto [ unit] . hash ( & mut hasher ) ;
621+ unit. profile . hash ( & mut shared_hasher ) ;
622+ unit. mode . hash ( & mut shared_hasher ) ;
623+ build_runner. lto [ unit] . hash ( & mut shared_hasher ) ;
621624
622625 // Artifacts compiled for the host should have a different
623626 // metadata piece than those compiled for the target, so make sure
624627 // we throw in the unit's `kind` as well. Use `fingerprint_hash`
625628 // so that the StableHash doesn't change based on the pathnames
626629 // of the custom target JSON spec files.
627- unit. kind . fingerprint_hash ( ) . hash ( & mut hasher ) ;
630+ unit. kind . fingerprint_hash ( ) . hash ( & mut shared_hasher ) ;
628631
629632 // Finally throw in the target name/kind. This ensures that concurrent
630633 // compiles of targets in the same crate don't collide.
631- unit. target . name ( ) . hash ( & mut hasher ) ;
632- unit. target . kind ( ) . hash ( & mut hasher ) ;
634+ unit. target . name ( ) . hash ( & mut shared_hasher ) ;
635+ unit. target . kind ( ) . hash ( & mut shared_hasher ) ;
633636
634- hash_rustc_version ( bcx, & mut hasher , unit) ;
637+ hash_rustc_version ( bcx, & mut shared_hasher , unit) ;
635638
636639 if build_runner. bcx . ws . is_member ( & unit. pkg ) {
637640 // This is primarily here for clippy. This ensures that the clippy
638641 // artifacts are separate from the `check` ones.
639642 if let Some ( path) = & build_runner. bcx . rustc ( ) . workspace_wrapper {
640- path. hash ( & mut hasher ) ;
643+ path. hash ( & mut shared_hasher ) ;
641644 }
642645 }
643646
@@ -648,7 +651,7 @@ fn compute_metadata(
648651 . gctx
649652 . get_env ( "__CARGO_DEFAULT_LIB_METADATA" )
650653 {
651- channel. hash ( & mut hasher ) ;
654+ channel. hash ( & mut shared_hasher ) ;
652655 }
653656
654657 // std units need to be kept separate from user dependencies. std crates
@@ -658,7 +661,7 @@ fn compute_metadata(
658661 // don't need unstable support. A future experiment might be to set
659662 // `is_std` to false for build dependencies so that they can be shared
660663 // with user dependencies.
661- unit. is_std . hash ( & mut hasher ) ;
664+ unit. is_std . hash ( & mut shared_hasher ) ;
662665
663666 // While we don't hash RUSTFLAGS because it may contain absolute paths that
664667 // hurts reproducibility, we track whether a unit's RUSTFLAGS is from host
@@ -677,15 +680,71 @@ fn compute_metadata(
677680 . target_config ( CompileKind :: Host )
678681 . links_overrides
679682 != unit. links_overrides ;
680- target_configs_are_different. hash ( & mut hasher ) ;
683+ target_configs_are_different. hash ( & mut shared_hasher ) ;
681684 }
682685
686+ let mut c_metadata_hasher = shared_hasher. clone ( ) ;
687+ // Mix in the target-metadata of all the dependencies of this target.
688+ let mut dep_c_metadata_hashes = deps_metadata
689+ . iter ( )
690+ . map ( |m| m. c_metadata )
691+ . collect :: < Vec < _ > > ( ) ;
692+ dep_c_metadata_hashes. sort ( ) ;
693+ dep_c_metadata_hashes. hash ( & mut c_metadata_hasher) ;
694+
695+ let mut c_extra_filename_hasher = shared_hasher. clone ( ) ;
696+ // Mix in the target-metadata of all the dependencies of this target.
697+ let mut dep_c_extra_filename_hashes = deps_metadata
698+ . iter ( )
699+ . map ( |m| m. c_extra_filename )
700+ . collect :: < Vec < _ > > ( ) ;
701+ dep_c_extra_filename_hashes. sort ( ) ;
702+ dep_c_extra_filename_hashes. hash ( & mut c_extra_filename_hasher) ;
703+ // Avoid trashing the caches on RUSTFLAGS changing via `c_extra_filename`
704+ //
705+ // Limited to `c_extra_filename` to help with reproducible build / PGO issues.
706+ let default = Vec :: new ( ) ;
707+ let extra_args = build_runner. bcx . extra_args_for ( unit) . unwrap_or ( & default) ;
708+ if !has_remap_path_prefix ( & extra_args) {
709+ extra_args. hash ( & mut c_extra_filename_hasher) ;
710+ }
711+ if unit. mode . is_doc ( ) || unit. mode . is_doc_scrape ( ) {
712+ if !has_remap_path_prefix ( & unit. rustdocflags ) {
713+ unit. rustdocflags . hash ( & mut c_extra_filename_hasher) ;
714+ }
715+ } else {
716+ if !has_remap_path_prefix ( & unit. rustflags ) {
717+ unit. rustflags . hash ( & mut c_extra_filename_hasher) ;
718+ }
719+ }
720+
721+ let c_metadata = UnitHash ( c_metadata_hasher. finish ( ) ) ;
722+ let c_extra_filename = UnitHash ( c_extra_filename_hasher. finish ( ) ) ;
723+ let unit_id = c_extra_filename;
724+
725+ let c_extra_filename = use_extra_filename. then_some ( c_extra_filename) ;
726+
683727 Metadata {
684- meta_hash : UnitHash ( hasher. finish ( ) ) ,
685- use_extra_filename : use_extra_filename ( bcx, unit) ,
728+ unit_id,
729+ c_metadata,
730+ c_extra_filename,
686731 }
687732}
688733
734+ /// HACK: Detect the *potential* presence of `--remap-path-prefix`
735+ ///
736+ /// As CLI parsing is contextual and dependent on the CLI definition to understand the context, we
737+ /// can't say for sure whether `--remap-path-prefix` is present, so we guess if anything looks like
738+ /// it.
739+ /// If we could, we'd strip it out for hashing.
740+ /// Instead, we use this to avoid hashing rustflags if it might be present to avoid the risk of taking
741+ /// a flag that is trying to make things reproducible and making things less reproducible by the
742+ /// `-Cextra-filename` showing up in the rlib, even with `split-debuginfo`.
743+ fn has_remap_path_prefix ( args : & [ String ] ) -> bool {
744+ args. iter ( )
745+ . any ( |s| s. starts_with ( "--remap-path-prefix=" ) || s == "--remap-path-prefix" )
746+ }
747+
689748/// Hash the version of rustc being used during the build process.
690749fn hash_rustc_version ( bcx : & BuildContext < ' _ , ' _ > , hasher : & mut StableHasher , unit : & Unit ) {
691750 let vers = & bcx. rustc ( ) . version ;
0 commit comments