Skip to content

Commit 0c3dffa

Browse files
committed
Use --author-date-order when looking up upstream commits to support subtree synces
1 parent b99f595 commit 0c3dffa

File tree

3 files changed

+164
-40
lines changed

3 files changed

+164
-40
lines changed

src/bootstrap/src/core/config/tests.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,7 @@ fn test_local_changes_in_previous_upstream() {
651651
ctx.create_branch("feature");
652652
ctx.modify("d");
653653
ctx.commit();
654+
654655
assert_eq!(
655656
ctx.check_modifications(&["a"], CiEnv::None),
656657
PathFreshness::LastModifiedUpstream { upstream: sha }
@@ -707,3 +708,73 @@ fn test_local_changes_negative_path() {
707708
);
708709
});
709710
}
711+
712+
#[test]
713+
fn test_local_changes_subtree_that_used_bors() {
714+
// Here we simulate a very specific situation related to subtrees.
715+
// When you have merge commits locally, we should ignore them w.r.t. the artifact download
716+
// logic.
717+
// The upstream search code currently uses a simple heuristic:
718+
// - Find commits by bors (or in general an author with the merge commit e-mail)
719+
// - Find the newest such commit
720+
// This should make it work even for subtrees that:
721+
// - Used bors in the past (so they have bors merge commits in their history).
722+
// - Use Josh to merge rustc into the subtree, in a way that the rustc history is the second
723+
// parent, not the first one.
724+
//
725+
// In addition, when searching for modified files, we cannot simply start from HEAD, because
726+
// in this situation git wouldn't find the right commit.
727+
//
728+
// This test checks that this specific scenario will resolve to the right rustc commit, both
729+
// when finding a modified file and when finding a non-existent file (which essentially means
730+
// that we just lookup the most recent upstream commit).
731+
//
732+
// See https://github.com/rust-lang/rust/issues/101907#issuecomment-2697671282 for more details.
733+
git_test(|ctx| {
734+
ctx.create_upstream_merge(&["a"]);
735+
736+
// Start unrelated subtree history
737+
ctx.run_git(&["switch", "--orphan", "subtree"]);
738+
ctx.modify("bar");
739+
ctx.commit();
740+
// Now we need to emulate old bors commits in the subtree.
741+
// Git only has a resolution of one second, which is a problem, since our git logic orders
742+
// merge commits by their date.
743+
// To avoid sleeping in the test, we modify the commit date to be forcefully in the past.
744+
ctx.create_upstream_merge(&["subtree/a"]);
745+
ctx.run_git(&["commit", "--amend", "--date", "Wed Feb 16 14:00 2011 +0100", "--no-edit"]);
746+
747+
// Merge the subtree history into rustc
748+
ctx.switch_to_branch("main");
749+
ctx.run_git(&["merge", "subtree", "--allow-unrelated"]);
750+
751+
// Create a rustc commit that modifies a path that we're interested in (`x`)
752+
let upstream_1 = ctx.create_upstream_merge(&["x"]);
753+
// Create another bors commit
754+
let upstream_2 = ctx.create_upstream_merge(&["a"]);
755+
756+
ctx.switch_to_branch("subtree");
757+
758+
// Create a subtree branch
759+
ctx.create_branch("subtree-pr");
760+
ctx.modify("baz");
761+
ctx.commit();
762+
// We merge rustc into this branch (simulating a "subtree pull")
763+
ctx.merge("main", "committer <[email protected]>");
764+
765+
// And then merge that branch into the subtree (simulating a situation right before a
766+
// "subtree push")
767+
ctx.switch_to_branch("subtree");
768+
ctx.merge("subtree-pr", "committer <[email protected]>");
769+
770+
// And we want to check that we resolve to the right commits.
771+
assert_eq!(
772+
ctx.check_modifications(&["x"], CiEnv::None),
773+
PathFreshness::LastModifiedUpstream { upstream: upstream_1 }
774+
);
775+
assert_eq!(
776+
ctx.check_modifications(&["nonexistent"], CiEnv::None),
777+
PathFreshness::LastModifiedUpstream { upstream: upstream_2 }
778+
);
779+
});
780+
}

src/bootstrap/src/utils/tests/git.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,14 @@ impl GitCtx {
5353
modified_files: &[&str],
5454
author: &str,
5555
) -> String {
56+
let current_branch = self.get_current_branch();
57+
5658
self.create_branch(branch);
5759
for file in modified_files {
5860
self.modify(file);
5961
}
6062
self.commit();
61-
self.switch_to_branch("main");
63+
self.switch_to_branch(&current_branch);
6264
self.merge(branch, author);
6365
self.run_git(&["branch", "-d", branch]);
6466
self.get_current_commit()
@@ -68,12 +70,16 @@ impl GitCtx {
6870
self.run_git(&["rev-parse", "HEAD"])
6971
}
7072

73+
pub fn get_current_branch(&self) -> String {
74+
self.run_git(&["rev-parse", "--abbrev-ref", "HEAD"])
75+
}
76+
7177
pub fn merge(&self, branch: &str, author: &str) {
7278
self.run_git(&["merge", "--no-commit", "--no-ff", branch]);
7379
self.run_git(&[
7480
"commit".to_string(),
7581
"-m".to_string(),
76-
"Merge of {branch}".to_string(),
82+
format!("Merge of {branch} into {}", self.get_current_branch()),
7783
"--author".to_string(),
7884
author.to_string(),
7985
]);

src/build_helper/src/git.rs

Lines changed: 85 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,16 @@ pub enum PathFreshness {
6363
/// The function behaves differently in CI and outside CI.
6464
///
6565
/// - Outside CI, we want to find out if `target_paths` were modified in some local commit on
66-
/// top of the local master branch.
66+
/// top of the latest upstream commit that is available in local git history.
6767
/// If not, we try to find the most recent upstream commit (which we assume are commits
6868
/// made by bors) that modified `target_paths`.
6969
/// We don't want to simply take the latest master commit to avoid changing the output of
7070
/// this function frequently after rebasing on the latest master branch even if `target_paths`
7171
/// were not modified upstream in the meantime. In that case we would be redownloading CI
7272
/// artifacts unnecessarily.
7373
///
74-
/// - In CI, we always fetch only a single parent merge commit, so we do not have access
74+
/// - In CI, we use a shallow clone of depth 2, i.e., we fetch only a single parent commit
75+
/// (which will be the most recent bors merge commit) and do not have access
7576
/// to the full git history. Luckily, we only need to distinguish between two situations:
7677
/// 1) The current PR made modifications to `target_paths`.
7778
/// In that case, a build is typically necessary.
@@ -91,22 +92,23 @@ pub fn check_path_modifications(
9192

9293
let upstream_sha = if matches!(ci_env, CiEnv::GitHubActions) {
9394
// Here the situation is different for PR CI and try/auto CI.
94-
// For PR CI, we have the following history:
95-
// <merge commit made by GitHub>
96-
// 1-N PR commits
97-
// upstream merge commit made by bors
9895
//
99-
// For try/auto CI, we have the following history:
100-
// <**non-upstream** merge commit made by bors>
101-
// 1-N PR commits
102-
// upstream merge commit made by bors
96+
// <**non-upstream** merge commit made by bors> [On try/auto builds]
97+
// <**non-upstream** merge commit made by GitHub> [On PR CI builds]
98+
// ^
99+
// ----first parent-----|----second parent-----
100+
// | |
101+
// | |
102+
// | |
103+
// v v
104+
// upstream merge commit made by bors 1-N PR commits
105+
// (could include other bors commits)
103106
//
104-
// But on both cases, HEAD should be a merge commit.
107+
// But in both cases, HEAD should be a merge commit.
105108
// So if HEAD contains modifications of `target_paths`, our PR has modified
106109
// them. If not, we can use the only available upstream commit for downloading
107110
// artifacts.
108111

109-
// Do not include HEAD, as it is never an upstream commit
110112
// If we do not find an upstream commit in CI, something is seriously wrong.
111113
Some(
112114
get_closest_upstream_commit(Some(git_dir), config, ci_env)?
@@ -117,14 +119,17 @@ pub fn check_path_modifications(
117119
// modified the set of paths, to have an upstream reference that does not change
118120
// unnecessarily often.
119121
// However, if such commit is not found, we can fall back to the latest upstream commit
120-
let upstream_with_modifications = get_latest_commit_that_modified_files(
121-
git_dir,
122-
target_paths,
123-
config.git_merge_commit_email,
124-
)?;
122+
let upstream_with_modifications =
123+
get_latest_upstream_commit_that_modified_files(git_dir, config, target_paths)?;
125124
match upstream_with_modifications {
126125
Some(sha) => Some(sha),
127-
None => get_closest_upstream_commit(Some(git_dir), config, ci_env)?,
126+
None => {
127+
eprintln!(
128+
"Warning: no upstream commit that modified `{}` found. Falling back to latest upstream commit.",
129+
target_paths.join(",")
130+
);
131+
get_closest_upstream_commit(Some(git_dir), config, ci_env)?
132+
}
128133
}
129134
};
130135

@@ -156,17 +161,38 @@ pub fn has_changed_since(git_dir: &Path, base: &str, paths: &[&str]) -> bool {
156161
!git.status().expect("cannot run git diff-index").success()
157162
}
158163

159-
/// Returns the latest commit that modified `target_paths`, or `None` if no such commit was found.
160-
/// If `author` is `Some`, only considers commits made by that author.
161-
fn get_latest_commit_that_modified_files(
164+
/// Returns the latest upstream commit that modified `target_paths`, or `None` if no such commit
165+
/// was found.
166+
fn get_latest_upstream_commit_that_modified_files(
162167
git_dir: &Path,
168+
git_config: &GitConfig<'_>,
163169
target_paths: &[&str],
164-
author: &str,
165170
) -> Result<Option<String>, String> {
166171
let mut git = Command::new("git");
167172
git.current_dir(git_dir);
168173

169-
git.args(["rev-list", "-n1", "--first-parent", "HEAD", "--author", author]);
174+
// In theory, we could just use
175+
// `git rev-list --first-parent HEAD --author=<merge-bot> -- <paths>`
176+
// to find the latest upstream commit that modified `<paths>`.
177+
// However, this does not work if you are in a subtree sync branch that contains merge commits
178+
// which have the subtree history as their first parent, and the rustc history as second parent:
179+
// `--first-parent` will just walk up the subtree history and never see a single rustc commit.
180+
// We thus have to take a two-pronged approach. First lookup the most recent upstream commit
181+
// by *date* (this should work even in a subtree sync branch), and then start the lookup for
182+
// modified paths starting from that commit.
183+
//
184+
// See https://github.com/rust-lang/rust/pull/138591#discussion_r2037081858 for more details.
185+
let upstream = get_closest_upstream_commit(Some(git_dir), git_config, CiEnv::None)?
186+
.unwrap_or_else(|| "HEAD".to_string());
187+
188+
git.args([
189+
"rev-list",
190+
"--first-parent",
191+
"-n1",
192+
&upstream,
193+
"--author",
194+
git_config.git_merge_commit_email,
195+
]);
170196

171197
if !target_paths.is_empty() {
172198
git.arg("--").args(target_paths);
@@ -175,44 +201,65 @@ fn get_latest_commit_that_modified_files(
175201
if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
176202
}
177203

178-
/// Returns the most recent commit found in the local history that should definitely
179-
/// exist upstream. We identify upstream commits by the e-mail of the commit author.
204+
/// Returns the most recent (ordered chronologically) commit found in the local history that
205+
/// should exist upstream. We identify upstream commits by the e-mail of the commit
206+
/// author.
180207
///
181-
/// If `include_head` is false, the HEAD (current) commit will be ignored and only
182-
/// its parents will be searched. This is useful for try/auto CI, where HEAD is
183-
/// actually a commit made by bors, although it is not upstream yet.
208+
/// If we are in CI, we simply return our first parent.
184209
fn get_closest_upstream_commit(
185210
git_dir: Option<&Path>,
186211
config: &GitConfig<'_>,
187212
env: CiEnv,
188213
) -> Result<Option<String>, String> {
214+
let base = match env {
215+
CiEnv::None => "HEAD",
216+
CiEnv::GitHubActions => {
217+
// On CI, we should always have a non-upstream merge commit at the tip,
218+
// and our first parent should be the most recently merged upstream commit.
219+
// We thus simply return our first parent.
220+
return resolve_commit_sha(git_dir, "HEAD^1").map(Some);
221+
}
222+
};
223+
189224
let mut git = Command::new("git");
190225

191226
if let Some(git_dir) = git_dir {
192227
git.current_dir(git_dir);
193228
}
194229

195-
let base = match env {
196-
CiEnv::None => "HEAD",
197-
CiEnv::GitHubActions => {
198-
// On CI, we always have a merge commit at the tip.
199-
// We thus skip it, because although it can be created by
200-
// `config.git_merge_commit_email`, it should not be upstream.
201-
"HEAD^1"
202-
}
203-
};
230+
// We do not use `--first-parent`, because we can be in a situation (outside CI) where we have
231+
// a subtree merge that actually has the main rustc history as its second parent.
232+
// Using `--first-parent` would recurse into the history of the subtree, which could have some
233+
// old bors commits that are not relevant to us.
234+
// With `--author-date-order`, git recurses into all parent subtrees, and returns the most
235+
// chronologically recent bors commit.
236+
// Here we assume that none of our subtrees use bors anymore, and that all their old bors
237+
// commits are way older than recent rustc bors commits!
204238
git.args([
205239
"rev-list",
240+
"--author-date-order",
206241
&format!("--author={}", config.git_merge_commit_email),
207242
"-n1",
208-
"--first-parent",
209243
&base,
210244
]);
211245

212246
let output = output_result(&mut git)?.trim().to_owned();
213247
if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
214248
}
215249

250+
/// Resolve the commit SHA of `commit_ref`.
251+
fn resolve_commit_sha(git_dir: Option<&Path>, commit_ref: &str) -> Result<String, String> {
252+
let mut git = Command::new("git");
253+
254+
if let Some(git_dir) = git_dir {
255+
git.current_dir(git_dir);
256+
}
257+
258+
git.args(["rev-parse", commit_ref]);
259+
260+
Ok(output_result(&mut git)?.trim().to_owned())
261+
}
262+
216263
/// Returns the files that have been modified in the current branch compared to the master branch.
217264
/// This includes committed changes, uncommitted changes, and changes that are not even staged.
218265
///

0 commit comments

Comments
 (0)