@@ -63,15 +63,16 @@ pub enum PathFreshness {
6363/// The function behaves differently in CI and outside CI.
6464///
6565/// - Outside CI, we want to find out if `target_paths` were modified in some local commit on
66- /// top of the local master branch .
66+ /// top of the latest upstream commit that is available in local git history .
6767/// If not, we try to find the most recent upstream commit (which we assume are commits
6868/// made by bors) that modified `target_paths`.
6969/// We don't want to simply take the latest master commit to avoid changing the output of
7070/// this function frequently after rebasing on the latest master branch even if `target_paths`
7171/// were not modified upstream in the meantime. In that case we would be redownloading CI
7272/// artifacts unnecessarily.
7373///
74- /// - In CI, we always fetch only a single parent merge commit, so we do not have access
74+ /// - In CI, we use a shallow clone of depth 2, i.e., we fetch only a single parent commit
75+ /// (which will be the most recent bors merge commit) and do not have access
7576/// to the full git history. Luckily, we only need to distinguish between two situations:
7677/// 1) The current PR made modifications to `target_paths`.
7778/// In that case, a build is typically necessary.
@@ -91,22 +92,23 @@ pub fn check_path_modifications(
9192
9293 let upstream_sha = if matches ! ( ci_env, CiEnv :: GitHubActions ) {
9394 // Here the situation is different for PR CI and try/auto CI.
94- // For PR CI, we have the following history:
95- // <merge commit made by GitHub>
96- // 1-N PR commits
97- // upstream merge commit made by bors
9895 //
99- // For try/auto CI, we have the following history:
100- // <**non-upstream** merge commit made by bors>
101- // 1-N PR commits
102- // upstream merge commit made by bors
96+ // <**non-upstream** merge commit made by bors> [On try/auto builds]
97+ // <**non-upstream** merge commit made by GitHub> [On PR CI builds]
98+ // ^
99+ // ----first parent-----|----second parent-----
100+ // | |
101+ // | |
102+ // | |
103+ // v v
104+ // upstream merge commit made by bors 1-N PR commits
105+ // (could include other bors commits)
103106 //
104- // But on both cases, HEAD should be a merge commit.
107+ // But in both cases, HEAD should be a merge commit.
105108 // So if HEAD contains modifications of `target_paths`, our PR has modified
106109 // them. If not, we can use the only available upstream commit for downloading
107110 // artifacts.
108111
109- // Do not include HEAD, as it is never an upstream commit
110112 // If we do not find an upstream commit in CI, something is seriously wrong.
111113 Some (
112114 get_closest_upstream_commit ( Some ( git_dir) , config, ci_env) ?
@@ -117,14 +119,17 @@ pub fn check_path_modifications(
117119 // modified the set of paths, to have an upstream reference that does not change
118120 // unnecessarily often.
119121 // However, if such commit is not found, we can fall back to the latest upstream commit
120- let upstream_with_modifications = get_latest_commit_that_modified_files (
121- git_dir,
122- target_paths,
123- config. git_merge_commit_email ,
124- ) ?;
122+ let upstream_with_modifications =
123+ get_latest_upstream_commit_that_modified_files ( git_dir, config, target_paths) ?;
125124 match upstream_with_modifications {
126125 Some ( sha) => Some ( sha) ,
127- None => get_closest_upstream_commit ( Some ( git_dir) , config, ci_env) ?,
126+ None => {
127+ eprintln ! (
128+ "Warning: no upstream commit that modified `{}` found. Falling back to latest upstream commit." ,
129+ target_paths. join( "," )
130+ ) ;
131+ get_closest_upstream_commit ( Some ( git_dir) , config, ci_env) ?
132+ }
128133 }
129134 } ;
130135
@@ -156,17 +161,38 @@ pub fn has_changed_since(git_dir: &Path, base: &str, paths: &[&str]) -> bool {
156161 !git. status ( ) . expect ( "cannot run git diff-index" ) . success ( )
157162}
158163
159- /// Returns the latest commit that modified `target_paths`, or `None` if no such commit was found.
160- /// If `author` is `Some`, only considers commits made by that author .
161- fn get_latest_commit_that_modified_files (
164+ /// Returns the latest upstream commit that modified `target_paths`, or `None` if no such commit
165+ /// was found .
166+ fn get_latest_upstream_commit_that_modified_files (
162167 git_dir : & Path ,
168+ git_config : & GitConfig < ' _ > ,
163169 target_paths : & [ & str ] ,
164- author : & str ,
165170) -> Result < Option < String > , String > {
166171 let mut git = Command :: new ( "git" ) ;
167172 git. current_dir ( git_dir) ;
168173
169- git. args ( [ "rev-list" , "-n1" , "--first-parent" , "HEAD" , "--author" , author] ) ;
174+ // In theory, we could just use
175+ // `git rev-list --first-parent HEAD --author=<merge-bot> -- <paths>`
176+ // to find the latest upstream commit that modified `<paths>`.
177+ // However, this does not work if you are in a subtree sync branch that contains merge commits
178+ // which have the subtree history as their first parent, and the rustc history as second parent:
179+ // `--first-parent` will just walk up the subtree history and never see a single rustc commit.
180+ // We thus have to take a two-pronged approach. First lookup the most recent upstream commit
181+ // by *date* (this should work even in a subtree sync branch), and then start the lookup for
182+ // modified paths starting from that commit.
183+ //
184+ // See https://github.com/rust-lang/rust/pull/138591#discussion_r2037081858 for more details.
185+ let upstream = get_closest_upstream_commit ( Some ( git_dir) , git_config, CiEnv :: None ) ?
186+ . unwrap_or_else ( || "HEAD" . to_string ( ) ) ;
187+
188+ git. args ( [
189+ "rev-list" ,
190+ "--first-parent" ,
191+ "-n1" ,
192+ & upstream,
193+ "--author" ,
194+ git_config. git_merge_commit_email ,
195+ ] ) ;
170196
171197 if !target_paths. is_empty ( ) {
172198 git. arg ( "--" ) . args ( target_paths) ;
@@ -175,44 +201,65 @@ fn get_latest_commit_that_modified_files(
175201 if output. is_empty ( ) { Ok ( None ) } else { Ok ( Some ( output) ) }
176202}
177203
178- /// Returns the most recent commit found in the local history that should definitely
179- /// exist upstream. We identify upstream commits by the e-mail of the commit author.
204+ /// Returns the most recent (ordered chronologically) commit found in the local history that
205+ /// should exist upstream. We identify upstream commits by the e-mail of the commit
206+ /// author.
180207///
181- /// If `include_head` is false, the HEAD (current) commit will be ignored and only
182- /// its parents will be searched. This is useful for try/auto CI, where HEAD is
183- /// actually a commit made by bors, although it is not upstream yet.
208+ /// If we are in CI, we simply return our first parent.
184209fn get_closest_upstream_commit (
185210 git_dir : Option < & Path > ,
186211 config : & GitConfig < ' _ > ,
187212 env : CiEnv ,
188213) -> Result < Option < String > , String > {
214+ let base = match env {
215+ CiEnv :: None => "HEAD" ,
216+ CiEnv :: GitHubActions => {
217+ // On CI, we should always have a non-upstream merge commit at the tip,
218+ // and our first parent should be the most recently merged upstream commit.
219+ // We thus simply return our first parent.
220+ return resolve_commit_sha ( git_dir, "HEAD^1" ) . map ( Some ) ;
221+ }
222+ } ;
223+
189224 let mut git = Command :: new ( "git" ) ;
190225
191226 if let Some ( git_dir) = git_dir {
192227 git. current_dir ( git_dir) ;
193228 }
194229
195- let base = match env {
196- CiEnv :: None => "HEAD" ,
197- CiEnv :: GitHubActions => {
198- // On CI, we always have a merge commit at the tip.
199- // We thus skip it, because although it can be created by
200- // `config.git_merge_commit_email`, it should not be upstream.
201- "HEAD^1"
202- }
203- } ;
230+ // We do not use `--first-parent`, because we can be in a situation (outside CI) where we have
231+ // a subtree merge that actually has the main rustc history as its second parent.
232+ // Using `--first-parent` would recurse into the history of the subtree, which could have some
233+ // old bors commits that are not relevant to us.
234+ // With `--author-date-order`, git recurses into all parent subtrees, and returns the most
235+ // chronologically recent bors commit.
236+ // Here we assume that none of our subtrees use bors anymore, and that all their old bors
237+ // commits are way older than recent rustc bors commits!
204238 git. args ( [
205239 "rev-list" ,
240+ "--author-date-order" ,
206241 & format ! ( "--author={}" , config. git_merge_commit_email) ,
207242 "-n1" ,
208- "--first-parent" ,
209243 & base,
210244 ] ) ;
211245
212246 let output = output_result ( & mut git) ?. trim ( ) . to_owned ( ) ;
213247 if output. is_empty ( ) { Ok ( None ) } else { Ok ( Some ( output) ) }
214248}
215249
250+ /// Resolve the commit SHA of `commit_ref`.
251+ fn resolve_commit_sha ( git_dir : Option < & Path > , commit_ref : & str ) -> Result < String , String > {
252+ let mut git = Command :: new ( "git" ) ;
253+
254+ if let Some ( git_dir) = git_dir {
255+ git. current_dir ( git_dir) ;
256+ }
257+
258+ git. args ( [ "rev-parse" , commit_ref] ) ;
259+
260+ Ok ( output_result ( & mut git) ?. trim ( ) . to_owned ( ) )
261+ }
262+
216263/// Returns the files that have been modified in the current branch compared to the master branch.
217264/// This includes committed changes, uncommitted changes, and changes that are not even staged.
218265///
0 commit comments