1+ #[ cfg( test) ]
2+ mod tests;
3+
14use std:: path:: Path ;
25use std:: process:: { Command , Stdio } ;
36
@@ -165,7 +168,177 @@ pub fn get_closest_merge_commit(
165168 Ok ( output_result ( & mut git) ?. trim ( ) . to_owned ( ) )
166169}
167170
171+ /// Represents the result of checking whether a set of paths
172+ /// have been modified locally or not.
173+ #[ derive( PartialEq , Debug ) ]
174+ pub enum PathFreshness {
175+ /// Artifacts should be downloaded from this upstream commit,
176+ /// there are no local modifications.
177+ LastModifiedUpstream { upstream : String } ,
178+ /// There are local modifications to a certain set of paths.
179+ /// "Local" essentially means "not-upstream" here.
180+ /// `upstream` is the latest upstream merge commit that made modifications to the
181+ /// set of paths.
182+ HasLocalModifications { upstream : String } ,
183+ }
184+
185+ /// This function figures out if a set of paths was last modified upstream or
186+ /// if there are some local modifications made to them.
187+ ///
188+ /// It can be used to figure out if we should download artifacts from CI or rather
189+ /// build them locally.
190+ ///
191+ /// `target_paths` should be a non-empty slice of paths (relative to `git_dir` or the
192+ /// current working directory) whose modifications would invalidate the artifact.
193+ /// Each path can also be a negative match, i.e. `:!foo`. This matches changes outside
194+ /// the `foo` directory.
195+ ///
196+ /// The function behaves differently in CI and outside CI.
197+ ///
198+ /// - Outside CI, we want to find out if `target_paths` were modified in some local commit on
199+ /// top of the local master branch.
200+ /// If not, we try to find the most recent upstream commit (which we assume are commits
201+ /// made by bors) that modified `target_paths`.
202+ /// We don't want to simply take the latest master commit to avoid changing the output of
203+ /// this function frequently after rebasing on the latest master branch even if `target_paths`
204+ /// were not modified upstream in the meantime. In that case we would be redownloading CI
205+ /// artifacts unnecessarily.
206+ ///
207+ /// - In CI, we always fetch only a single parent merge commit, so we do not have access
208+ /// to the full git history.
209+ /// Luckily, we only need to distinguish between two situations. The first is that the current
210+ /// PR made modifications to `target_paths`. If not, then we simply take the latest upstream
211+ /// commit, because on CI there is no need to avoid redownloading.
212+ pub fn check_path_modifications (
213+ git_dir : Option < & Path > ,
214+ config : & GitConfig < ' _ > ,
215+ target_paths : & [ & str ] ,
216+ ci_env : CiEnv ,
217+ ) -> Result < PathFreshness , String > {
218+ assert ! ( !target_paths. is_empty( ) ) ;
219+ for path in target_paths {
220+ assert ! ( Path :: new( path. trim_start_matches( ":!" ) ) . is_relative( ) ) ;
221+ }
222+
223+ let upstream_sha = if matches ! ( ci_env, CiEnv :: GitHubActions ) {
224+ // Here the situation is different for PR CI and try/auto CI.
225+ // For PR CI, we have the following history:
226+ // <merge commit made by GitHub>
227+ // 1-N PR commits
228+ // upstream merge commit made by bors
229+ //
230+ // For try/auto CI, we have the following history:
231+ // <**non-upstream** merge commit made by bors>
232+ // 1-N PR commits
233+ // upstream merge commit made by bors
234+ //
235+ // But on both cases, HEAD should be a merge commit.
236+ // So if HEAD contains modifications of `target_paths`, our PR has modified
237+ // them. If not, we can use the only available upstream commit for downloading
238+ // artifacts.
239+
240+ // Do not include HEAD, as it is never an upstream commit
241+ get_closest_upstream_commit ( git_dir, config, ci_env) ?
242+ } else {
243+ // Outside CI, we have to find the most recent upstream commit that
244+ // modified the set of paths, to have an upstream reference.
245+ let upstream_sha = get_latest_commit_that_modified_files (
246+ git_dir,
247+ target_paths,
248+ config. git_merge_commit_email ,
249+ ) ?;
250+ let Some ( upstream_sha) = upstream_sha else {
251+ eprintln ! ( "No upstream commit that modified paths {target_paths:?} found." ) ;
252+ eprintln ! ( "Try to fetch more upstream history." ) ;
253+ return Err ( "No upstream commit with modifications found" . to_string ( ) ) ;
254+ } ;
255+ upstream_sha
256+ } ;
257+
258+ if has_changed_since ( git_dir, & upstream_sha, target_paths) {
259+ Ok ( PathFreshness :: HasLocalModifications { upstream : upstream_sha } )
260+ } else {
261+ Ok ( PathFreshness :: LastModifiedUpstream { upstream : upstream_sha } )
262+ }
263+ }
264+
265+ /// Returns true if any of the passed `paths` have changed since the `base` commit.
266+ pub fn has_changed_since ( git_dir : Option < & Path > , base : & str , paths : & [ & str ] ) -> bool {
267+ let mut git = Command :: new ( "git" ) ;
268+
269+ if let Some ( git_dir) = git_dir {
270+ git. current_dir ( git_dir) ;
271+ }
272+
273+ git. args ( [ "diff-index" , "--quiet" , base, "--" ] ) . args ( paths) ;
274+
275+ // Exit code 0 => no changes
276+ // Exit code 1 => some changes were detected
277+ !git. status ( ) . expect ( "cannot run git diff-index" ) . success ( )
278+ }
279+
280+ /// Returns the latest commit that modified `target_paths`, or `None` if no such commit was found.
281+ /// If `author` is `Some`, only considers commits made by that author.
282+ fn get_latest_commit_that_modified_files (
283+ git_dir : Option < & Path > ,
284+ target_paths : & [ & str ] ,
285+ author : & str ,
286+ ) -> Result < Option < String > , String > {
287+ let mut git = Command :: new ( "git" ) ;
288+
289+ if let Some ( git_dir) = git_dir {
290+ git. current_dir ( git_dir) ;
291+ }
292+
293+ git. args ( [ "rev-list" , "-n1" , "--first-parent" , "HEAD" , "--author" , author] ) ;
294+
295+ if !target_paths. is_empty ( ) {
296+ git. arg ( "--" ) . args ( target_paths) ;
297+ }
298+ let output = output_result ( & mut git) ?. trim ( ) . to_owned ( ) ;
299+ if output. is_empty ( ) { Ok ( None ) } else { Ok ( Some ( output) ) }
300+ }
301+
302+ /// Returns the most recent commit found in the local history that should definitely
303+ /// exist upstream. We identify upstream commits by the e-mail of the commit author.
304+ ///
305+ /// If `include_head` is false, the HEAD (current) commit will be ignored and only
306+ /// its parents will be searched. This is useful for try/auto CI, where HEAD is
307+ /// actually a commit made by bors, although it is not upstream yet.
308+ fn get_closest_upstream_commit (
309+ git_dir : Option < & Path > ,
310+ config : & GitConfig < ' _ > ,
311+ env : CiEnv ,
312+ ) -> Result < String , String > {
313+ let mut git = Command :: new ( "git" ) ;
314+
315+ if let Some ( git_dir) = git_dir {
316+ git. current_dir ( git_dir) ;
317+ }
318+
319+ let base = match env {
320+ CiEnv :: None => "HEAD" ,
321+ CiEnv :: GitHubActions => {
322+ // On CI, we always have a merge commit at the tip.
323+ // We thus skip it, because although it can be creatd by
324+ // `config.git_merge_commit_email`, it should not be upstream.
325+ "HEAD^1"
326+ }
327+ } ;
328+ git. args ( [
329+ "rev-list" ,
330+ & format ! ( "--author={}" , config. git_merge_commit_email) ,
331+ "-n1" ,
332+ "--first-parent" ,
333+ & base,
334+ ] ) ;
335+
336+ Ok ( output_result ( & mut git) ?. trim ( ) . to_owned ( ) )
337+ }
338+
168339/// Returns the files that have been modified in the current branch compared to the master branch.
340+ /// This includes committed changes, uncommitted changes, and changes that are not even staged.
341+ ///
169342/// The `extensions` parameter can be used to filter the files by their extension.
170343/// Does not include removed files.
171344/// If `extensions` is empty, all files will be returned.
@@ -174,7 +347,7 @@ pub fn get_git_modified_files(
174347 git_dir : Option < & Path > ,
175348 extensions : & [ & str ] ,
176349) -> Result < Vec < String > , String > {
177- let merge_base = get_closest_merge_commit ( git_dir, config, & [ ] ) ?;
350+ let merge_base = get_closest_upstream_commit ( git_dir, config, CiEnv :: None ) ?;
178351
179352 let mut git = Command :: new ( "git" ) ;
180353 if let Some ( git_dir) = git_dir {
0 commit comments