44//!
55//! - it's less sophisticated and doesn't use any ranking of candidates. Instead, it picks the first possible match.
66//! - the set used for copy-detection is probably smaller by default.
7+
8+ use std:: collections:: VecDeque ;
79use std:: ops:: Range ;
810
9- use bstr:: BStr ;
11+ use bstr:: { BStr , ByteSlice } ;
1012use gix_object:: tree:: { EntryKind , EntryMode } ;
1113
14+ use crate :: tree:: visit:: { Action , ChangeId , Relation } ;
1215use crate :: {
1316 blob:: { platform:: prepare_diff:: Operation , DiffLineStats , ResourceKind } ,
1417 rewrites:: { CopySource , Outcome , Tracker } ,
@@ -28,11 +31,22 @@ pub enum ChangeKind {
2831
2932/// A trait providing all functionality to abstract over the concept of a change, as seen by the [`Tracker`].
3033pub trait Change : Clone {
31- /// Return the hash of this change for identification.
34+ /// Return the hash of the object behind this change for identification.
3235 ///
3336 /// Note that this is the id of the object as stored in `git`, i.e. it must have gone through workspace
34- /// conversions.
37+ /// conversions. What matters is that the IDs are comparable.
3538 fn id ( & self ) -> & gix_hash:: oid ;
39+ /// Return the relation that this change may have with other changes.
40+ ///
41+ /// It allows to associate a directory with its children that are added or removed at the same moment.
42+ /// Note that this is ignored for modifications.
43+ ///
44+ /// If rename-tracking should always be on leaf-level, this should be set to `None` consistently.
45+ /// Note that trees will never be looked up by their `id` as their children are assumed to be passed in
46+ /// with the respective relationship.
47+ ///
48+ /// Also note that the tracker only sees what's given to it, it will not lookup trees or match paths itself.
49+ fn relation ( & self ) -> Option < Relation > ;
3650 /// Return the kind of this change.
3751 fn kind ( & self ) -> ChangeKind ;
3852 /// Return more information about the kind of entry affected by this change.
@@ -55,11 +69,11 @@ impl<T: Change> Item<T> {
5569 fn location < ' a > ( & self , backing : & ' a [ u8 ] ) -> & ' a BStr {
5670 backing[ self . path . clone ( ) ] . as_ref ( )
5771 }
58- fn entry_mode_compatible ( & self , mode : EntryMode ) -> bool {
72+ fn entry_mode_compatible ( & self , other : EntryMode ) -> bool {
5973 use EntryKind :: * ;
6074 matches ! (
61- ( mode . kind( ) , self . change. entry_mode( ) . kind( ) ) ,
62- ( Blob | BlobExecutable , Blob | BlobExecutable ) | ( Link , Link )
75+ ( other . kind( ) , self . change. entry_mode( ) . kind( ) ) ,
76+ ( Blob | BlobExecutable , Blob | BlobExecutable ) | ( Link , Link ) | ( Tree , Tree )
6377 )
6478 }
6579
@@ -108,7 +122,7 @@ pub mod visit {
108122 }
109123
110124 /// A change along with a location.
111- #[ derive( Clone ) ]
125+ #[ derive( Debug , Clone ) ]
112126 pub struct Destination < ' a , T : Clone > {
113127 /// The change at the given `location`.
114128 pub change : T ,
@@ -150,23 +164,25 @@ impl<T: Change> Tracker<T> {
150164impl < T : Change > Tracker < T > {
151165 /// We may refuse the push if that information isn't needed for what we have to track.
152166 pub fn try_push_change ( & mut self , change : T , location : & BStr ) -> Option < T > {
153- if !change. entry_mode ( ) . is_blob_or_symlink ( ) {
167+ let change_kind = change. kind ( ) ;
168+ if let ( None , ChangeKind :: Modification { .. } ) = ( self . rewrites . copies , change_kind) {
154169 return Some ( change) ;
155- }
156- let keep = match ( self . rewrites . copies , change. kind ( ) ) {
157- ( Some ( _find_copies) , _) => true ,
158- ( None , ChangeKind :: Modification { .. } ) => false ,
159- ( None , _) => true ,
160170 } ;
161171
162- if !keep {
172+ let relation = change
173+ . relation ( )
174+ . filter ( |_| matches ! ( change_kind, ChangeKind :: Addition | ChangeKind :: Deletion ) ) ;
175+ let entry_kind = change. entry_mode ( ) . kind ( ) ;
176+ if let ( None | Some ( Relation :: ChildOfParent ( _) ) , EntryKind :: Commit | EntryKind :: Tree ) = ( relation, entry_kind) {
163177 return Some ( change) ;
164- }
178+ } ;
165179
166180 let start = self . path_backing . len ( ) ;
167181 self . path_backing . extend_from_slice ( location) ;
182+ let path = start..self . path_backing . len ( ) ;
183+
168184 self . items . push ( Item {
169- path : start.. self . path_backing . len ( ) ,
185+ path,
170186 change,
171187 emitted : false ,
172188 } ) ;
@@ -178,6 +194,8 @@ impl<T: Change> Tracker<T> {
178194 /// `cb(destination, source)` is called for each item, either with `Some(source)` if it's
179195 /// the destination of a copy or rename, or with `None` for source if no relation to other
180196 /// items in the tracked set exist, which is like saying 'no rename or rewrite or copy' happened.
197+ /// Note that directories with [relation](Relation) will be emitted if there is a match, along with all their matching
198+ /// child-items which are similarly bundled as rename.
181199 ///
182200 /// `objects` is used to access blob data for similarity checks if required and is taken directly from the object database.
183201 /// Worktree filters and text conversions will be applied afterwards automatically. Note that object-caching *should not*
@@ -195,7 +213,7 @@ impl<T: Change> Tracker<T> {
195213 /// will panic if `change` is not a modification, and it's valid to not call `push` at all.
196214 pub fn emit < PushSourceTreeFn , E > (
197215 & mut self ,
198- mut cb : impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> crate :: tree :: visit :: Action ,
216+ mut cb : impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> Action ,
199217 diff_cache : & mut crate :: blob:: Platform ,
200218 objects : & impl gix_object:: FindObjectOrHeader ,
201219 mut push_source_tree : PushSourceTreeFn ,
@@ -272,7 +290,7 @@ impl<T: Change> Tracker<T> {
272290 change : item. change ,
273291 } ,
274292 None ,
275- ) == crate :: tree :: visit :: Action :: Cancel
293+ ) == Action :: Cancel
276294 {
277295 break ;
278296 }
@@ -285,17 +303,15 @@ impl<T: Change> Tracker<T> {
285303 fn match_pairs_of_kind (
286304 & mut self ,
287305 kind : visit:: SourceKind ,
288- cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> crate :: tree :: visit :: Action ,
306+ cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> Action ,
289307 percentage : Option < f32 > ,
290308 out : & mut Outcome ,
291309 diff_cache : & mut crate :: blob:: Platform ,
292310 objects : & impl gix_object:: FindObjectOrHeader ,
293311 ) -> Result < ( ) , emit:: Error > {
294312 // we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively.
295313 let needs_second_pass = !needs_exact_match ( percentage) ;
296- if self . match_pairs ( cb, None /* by identity */ , kind, out, diff_cache, objects) ?
297- == crate :: tree:: visit:: Action :: Cancel
298- {
314+ if self . match_pairs ( cb, None /* by identity */ , kind, out, diff_cache, objects) ? == Action :: Cancel {
299315 return Ok ( ( ) ) ;
300316 }
301317 if needs_second_pass {
@@ -328,13 +344,13 @@ impl<T: Change> Tracker<T> {
328344
329345 fn match_pairs (
330346 & mut self ,
331- cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> crate :: tree :: visit :: Action ,
347+ cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> Action ,
332348 percentage : Option < f32 > ,
333349 kind : visit:: SourceKind ,
334350 stats : & mut Outcome ,
335351 diff_cache : & mut crate :: blob:: Platform ,
336352 objects : & impl gix_object:: FindObjectOrHeader ,
337- ) -> Result < crate :: tree :: visit :: Action , emit:: Error > {
353+ ) -> Result < Action , emit:: Error > {
338354 let mut dest_ofs = 0 ;
339355 while let Some ( ( mut dest_idx, dest) ) = self . items [ dest_ofs..] . iter ( ) . enumerate ( ) . find_map ( |( idx, item) | {
340356 ( !item. emitted && matches ! ( item. change. kind( ) , ChangeKind :: Addition ) ) . then_some ( ( idx, item) )
@@ -368,26 +384,107 @@ impl<T: Change> Tracker<T> {
368384 src_idx,
369385 )
370386 } ) ;
371- if src . is_none ( ) {
387+ let Some ( ( src , src_idx ) ) = src else {
372388 continue ;
373- }
389+ } ;
374390 let location = dest. location ( & self . path_backing ) ;
375391 let change = dest. change . clone ( ) ;
376392 let dest = visit:: Destination { change, location } ;
377- let src_idx = src. as_ref ( ) . map ( |t| t. 1 ) ;
378- let res = cb ( dest, src. map ( |t| t. 0 ) ) ;
393+ let relations = if percentage. is_none ( ) {
394+ src. change . relation ( ) . zip ( dest. change . relation ( ) )
395+ } else {
396+ None
397+ } ;
398+ let res = cb ( dest, Some ( src) ) ;
379399
380400 self . items [ dest_idx] . emitted = true ;
381- if let Some ( src_idx) = src_idx {
382- self . items [ src_idx] . emitted = true ;
401+ self . items [ src_idx] . emitted = true ;
402+
403+ if res == Action :: Cancel {
404+ return Ok ( Action :: Cancel ) ;
383405 }
384406
385- if res == crate :: tree:: visit:: Action :: Cancel {
386- return Ok ( crate :: tree:: visit:: Action :: Cancel ) ;
407+ if let Some ( ( Relation :: Parent ( src) , Relation :: Parent ( dst) ) ) = relations {
408+ let res = self . emit_child_renames_matching_identity ( cb, kind, src, dst) ?;
409+ if res == Action :: Cancel {
410+ return Ok ( Action :: Cancel ) ;
411+ }
387412 }
388413 }
389- Ok ( crate :: tree :: visit :: Action :: Continue )
414+ Ok ( Action :: Continue )
390415 }
416+
417+ /// Emit the children of `src_parent_id` and `dst_parent_id` as pairs of exact matches, which are assumed
418+ /// as `src` and `dst` were an exact match.
419+ fn emit_child_renames_matching_identity (
420+ & mut self ,
421+ cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> Action ,
422+ kind : visit:: SourceKind ,
423+ src_parent_id : ChangeId ,
424+ dst_parent_id : ChangeId ,
425+ ) -> Result < Action , emit:: Error > {
426+ debug_assert_ne ! (
427+ src_parent_id, dst_parent_id,
428+ "src and destination directories must be distinct"
429+ ) ;
430+ let mut candidates = VecDeque :: with_capacity ( 2 /* at least 1 per directory */ ) ;
431+ for item in self . items . iter_mut ( ) . filter ( |item| !item. emitted ) {
432+ match item. change . relation ( ) {
433+ Some ( Relation :: ChildOfParent ( id) ) if id == src_parent_id => {
434+ candidates. push_front ( ( item. change . id ( ) . to_owned ( ) , item) ) ;
435+ }
436+ Some ( Relation :: ChildOfParent ( id) ) if id == dst_parent_id => {
437+ candidates. push_back ( ( item. change . id ( ) . to_owned ( ) , item) ) ;
438+ }
439+ _ => continue ,
440+ } ;
441+ }
442+ let ( src_items, dst_items) = candidates. as_mut_slices ( ) ;
443+ for list in [ & mut * src_items, & mut * dst_items] {
444+ list. sort_by ( |a, b| {
445+ a. 0 . cmp ( & b. 0 )
446+ . then_with ( || a. 1 . location ( & self . path_backing ) . cmp ( b. 1 . location ( & self . path_backing ) ) )
447+ } ) ;
448+ }
449+
450+ for ( ( src_id, src_item) , ( dst_id, dst_item) ) in src_items. into_iter ( ) . zip ( dst_items) {
451+ // Since the parent items are already identical by ID, we know that the children will also match, we just
452+ // double-check to still have a chance to be correct in case some of that goes wrong.
453+ if src_id == dst_id
454+ && filename ( src_item. location ( & self . path_backing ) ) == filename ( dst_item. location ( & self . path_backing ) )
455+ {
456+ let entry_mode = src_item. change . entry_mode ( ) ;
457+ let location = src_item. location ( & self . path_backing ) ;
458+ let src = visit:: Source {
459+ entry_mode,
460+ id : * src_id,
461+ kind,
462+ location,
463+ change : & src_item. change ,
464+ diff : None ,
465+ } ;
466+ let location = dst_item. location ( & self . path_backing ) ;
467+ let change = dst_item. change . clone ( ) ;
468+ let dst = visit:: Destination { change, location } ;
469+ let res = cb ( dst, Some ( src) ) ;
470+
471+ src_item. emitted = true ;
472+ dst_item. emitted = true ;
473+
474+ if res == Action :: Cancel {
475+ return Ok ( res) ;
476+ }
477+ } else {
478+ gix_trace:: warn!( "Children of parents with change-id {src_parent_id} and {dst_parent_id} were not equal, even though their parents claimed to be" ) ;
479+ break ;
480+ }
481+ }
482+ Ok ( Action :: Continue )
483+ }
484+ }
485+
486+ fn filename ( path : & BStr ) -> & BStr {
487+ path. rfind_byte ( b'/' ) . map_or ( path, |idx| path[ idx + 1 ..] . as_bstr ( ) )
391488}
392489
393490/// Returns the amount of viable sources and destinations for `items` as eligible for the given `kind` of operation.
0 commit comments