44//!
55//! - it's less sophisticated and doesn't use any ranking of candidates. Instead, it picks the first possible match.
66//! - the set used for copy-detection is probably smaller by default.
7+
78use std:: ops:: Range ;
89
9- use bstr:: BStr ;
10+ use bstr:: { BStr , ByteSlice } ;
1011use gix_object:: tree:: { EntryKind , EntryMode } ;
1112
13+ use crate :: tree:: visit:: { Action , ChangeId , Relation } ;
1214use crate :: {
1315 blob:: { platform:: prepare_diff:: Operation , DiffLineStats , ResourceKind } ,
1416 rewrites:: { CopySource , Outcome , Tracker } ,
@@ -28,11 +30,22 @@ pub enum ChangeKind {
2830
2931/// A trait providing all functionality to abstract over the concept of a change, as seen by the [`Tracker`].
3032pub trait Change : Clone {
31- /// Return the hash of this change for identification.
33+ /// Return the hash of the object behind this change for identification.
3234 ///
3335 /// Note that this is the id of the object as stored in `git`, i.e. it must have gone through workspace
34- /// conversions.
36+ /// conversions. What matters is that the IDs are comparable.
3537 fn id ( & self ) -> & gix_hash:: oid ;
38+ /// Return the relation that this change may have with other changes.
39+ ///
40+ /// It allows to associate a directory with its children that are added or removed at the same moment.
41+ /// Note that this is ignored for modifications.
42+ ///
43+ /// If rename-tracking should always be on leaf-level, this should be set to `None` consistently.
44+ /// Note that trees will never be looked up by their `id` as their children are assumed to be passed in
45+ /// with the respective relationship.
46+ ///
47+ /// Also note that the tracker only sees what's given to it, it will not lookup trees or match paths itself.
48+ fn relation ( & self ) -> Option < Relation > ;
3649 /// Return the kind of this change.
3750 fn kind ( & self ) -> ChangeKind ;
3851 /// Return more information about the kind of entry affected by this change.
@@ -55,11 +68,11 @@ impl<T: Change> Item<T> {
5568 fn location < ' a > ( & self , backing : & ' a [ u8 ] ) -> & ' a BStr {
5669 backing[ self . path . clone ( ) ] . as_ref ( )
5770 }
58- fn entry_mode_compatible ( & self , mode : EntryMode ) -> bool {
71+ fn entry_mode_compatible ( & self , other : EntryMode ) -> bool {
5972 use EntryKind :: * ;
6073 matches ! (
61- ( mode . kind( ) , self . change. entry_mode( ) . kind( ) ) ,
62- ( Blob | BlobExecutable , Blob | BlobExecutable ) | ( Link , Link )
74+ ( other . kind( ) , self . change. entry_mode( ) . kind( ) ) ,
75+ ( Blob | BlobExecutable , Blob | BlobExecutable ) | ( Link , Link ) | ( Tree , Tree )
6376 )
6477 }
6578
@@ -108,7 +121,7 @@ pub mod visit {
108121 }
109122
110123 /// A change along with a location.
111- #[ derive( Clone ) ]
124+ #[ derive( Debug , Clone ) ]
112125 pub struct Destination < ' a , T : Clone > {
113126 /// The change at the given `location`.
114127 pub change : T ,
@@ -150,23 +163,25 @@ impl<T: Change> Tracker<T> {
150163impl < T : Change > Tracker < T > {
151164 /// We may refuse the push if that information isn't needed for what we have to track.
152165 pub fn try_push_change ( & mut self , change : T , location : & BStr ) -> Option < T > {
153- if !change. entry_mode ( ) . is_blob_or_symlink ( ) {
166+ let change_kind = change. kind ( ) ;
167+ if let ( None , ChangeKind :: Modification { .. } ) = ( self . rewrites . copies , change_kind) {
154168 return Some ( change) ;
155- }
156- let keep = match ( self . rewrites . copies , change. kind ( ) ) {
157- ( Some ( _find_copies) , _) => true ,
158- ( None , ChangeKind :: Modification { .. } ) => false ,
159- ( None , _) => true ,
160169 } ;
161170
162- if !keep {
171+ let relation = change
172+ . relation ( )
173+ . filter ( |_| matches ! ( change_kind, ChangeKind :: Addition | ChangeKind :: Deletion ) ) ;
174+ let entry_kind = change. entry_mode ( ) . kind ( ) ;
175+ if let ( None | Some ( Relation :: ChildOfParent ( _) ) , EntryKind :: Commit | EntryKind :: Tree ) = ( relation, entry_kind) {
163176 return Some ( change) ;
164- }
177+ } ;
165178
166179 let start = self . path_backing . len ( ) ;
167180 self . path_backing . extend_from_slice ( location) ;
181+ let path = start..self . path_backing . len ( ) ;
182+
168183 self . items . push ( Item {
169- path : start.. self . path_backing . len ( ) ,
184+ path,
170185 change,
171186 emitted : false ,
172187 } ) ;
@@ -178,6 +193,8 @@ impl<T: Change> Tracker<T> {
178193 /// `cb(destination, source)` is called for each item, either with `Some(source)` if it's
179194 /// the destination of a copy or rename, or with `None` for source if no relation to other
180195 /// items in the tracked set exist, which is like saying 'no rename or rewrite or copy' happened.
196+ /// Note that directories with [relation](Relation) will be emitted if there is a match, along with all their matching
197+ /// child-items which are similarly bundled as rename.
181198 ///
182199 /// `objects` is used to access blob data for similarity checks if required and is taken directly from the object database.
183200 /// Worktree filters and text conversions will be applied afterwards automatically. Note that object-caching *should not*
@@ -195,7 +212,7 @@ impl<T: Change> Tracker<T> {
195212 /// will panic if `change` is not a modification, and it's valid to not call `push` at all.
196213 pub fn emit < PushSourceTreeFn , E > (
197214 & mut self ,
198- mut cb : impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> crate :: tree :: visit :: Action ,
215+ mut cb : impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> Action ,
199216 diff_cache : & mut crate :: blob:: Platform ,
200217 objects : & impl gix_object:: FindObjectOrHeader ,
201218 mut push_source_tree : PushSourceTreeFn ,
@@ -272,7 +289,7 @@ impl<T: Change> Tracker<T> {
272289 change : item. change ,
273290 } ,
274291 None ,
275- ) == crate :: tree :: visit :: Action :: Cancel
292+ ) == Action :: Cancel
276293 {
277294 break ;
278295 }
@@ -285,17 +302,15 @@ impl<T: Change> Tracker<T> {
285302 fn match_pairs_of_kind (
286303 & mut self ,
287304 kind : visit:: SourceKind ,
288- cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> crate :: tree :: visit :: Action ,
305+ cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> Action ,
289306 percentage : Option < f32 > ,
290307 out : & mut Outcome ,
291308 diff_cache : & mut crate :: blob:: Platform ,
292309 objects : & impl gix_object:: FindObjectOrHeader ,
293310 ) -> Result < ( ) , emit:: Error > {
294311 // we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively.
295312 let needs_second_pass = !needs_exact_match ( percentage) ;
296- if self . match_pairs ( cb, None /* by identity */ , kind, out, diff_cache, objects) ?
297- == crate :: tree:: visit:: Action :: Cancel
298- {
313+ if self . match_pairs ( cb, None /* by identity */ , kind, out, diff_cache, objects) ? == Action :: Cancel {
299314 return Ok ( ( ) ) ;
300315 }
301316 if needs_second_pass {
@@ -328,13 +343,13 @@ impl<T: Change> Tracker<T> {
328343
329344 fn match_pairs (
330345 & mut self ,
331- cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> crate :: tree :: visit :: Action ,
346+ cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> Action ,
332347 percentage : Option < f32 > ,
333348 kind : visit:: SourceKind ,
334349 stats : & mut Outcome ,
335350 diff_cache : & mut crate :: blob:: Platform ,
336351 objects : & impl gix_object:: FindObjectOrHeader ,
337- ) -> Result < crate :: tree :: visit :: Action , emit:: Error > {
352+ ) -> Result < Action , emit:: Error > {
338353 let mut dest_ofs = 0 ;
339354 while let Some ( ( mut dest_idx, dest) ) = self . items [ dest_ofs..] . iter ( ) . enumerate ( ) . find_map ( |( idx, item) | {
340355 ( !item. emitted && matches ! ( item. change. kind( ) , ChangeKind :: Addition ) ) . then_some ( ( idx, item) )
@@ -368,28 +383,102 @@ impl<T: Change> Tracker<T> {
368383 src_idx,
369384 )
370385 } ) ;
371- if src . is_none ( ) {
386+ let Some ( ( src , src_idx ) ) = src else {
372387 continue ;
373- }
388+ } ;
374389 let location = dest. location ( & self . path_backing ) ;
375390 let change = dest. change . clone ( ) ;
376391 let dest = visit:: Destination { change, location } ;
377- let src_idx = src. as_ref ( ) . map ( |t| t. 1 ) ;
378- let res = cb ( dest, src. map ( |t| t. 0 ) ) ;
392+ let relations = if percentage. is_none ( ) {
393+ src. change . relation ( ) . zip ( dest. change . relation ( ) )
394+ } else {
395+ None
396+ } ;
397+ let res = cb ( dest, Some ( src) ) ;
379398
380399 self . items [ dest_idx] . emitted = true ;
381- if let Some ( src_idx) = src_idx {
382- self . items [ src_idx] . emitted = true ;
400+ self . items [ src_idx] . emitted = true ;
401+
402+ if res == Action :: Cancel {
403+ return Ok ( Action :: Cancel ) ;
404+ }
405+
406+ if let Some ( ( Relation :: Parent ( src) , Relation :: Parent ( dst) ) ) = relations {
407+ let res = self . emit_child_renames_matching_identity ( cb, kind, src, dst) ?;
408+ if res == Action :: Cancel {
409+ return Ok ( Action :: Cancel ) ;
410+ }
383411 }
412+ }
413+ Ok ( Action :: Continue )
414+ }
384415
385- if res == crate :: tree:: visit:: Action :: Cancel {
386- return Ok ( crate :: tree:: visit:: Action :: Cancel ) ;
416+ /// Emit the children of `src_parent_id` and `dst_parent_id` as pairs of exact matches, which are assumed
417+ /// as `src` and `dst` were an exact match (so all children have to match exactly).
418+ fn emit_child_renames_matching_identity (
419+ & mut self ,
420+ cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> Action ,
421+ kind : visit:: SourceKind ,
422+ src_parent_id : ChangeId ,
423+ dst_parent_id : ChangeId ,
424+ ) -> Result < Action , emit:: Error > {
425+ debug_assert_ne ! (
426+ src_parent_id, dst_parent_id,
427+ "src and destination directories must be distinct"
428+ ) ;
429+ let ( mut src_items, mut dst_items) = ( Vec :: with_capacity ( 1 ) , Vec :: with_capacity ( 1 ) ) ;
430+ for item in self . items . iter_mut ( ) . filter ( |item| !item. emitted ) {
431+ match item. change . relation ( ) {
432+ Some ( Relation :: ChildOfParent ( id) ) if id == src_parent_id => {
433+ src_items. push ( ( item. change . id ( ) . to_owned ( ) , item) ) ;
434+ }
435+ Some ( Relation :: ChildOfParent ( id) ) if id == dst_parent_id => {
436+ dst_items. push ( ( item. change . id ( ) . to_owned ( ) , item) ) ;
437+ }
438+ _ => continue ,
439+ } ;
440+ }
441+
442+ for ( ( src_id, src_item) , ( dst_id, dst_item) ) in src_items. into_iter ( ) . zip ( dst_items) {
443+ // Since the parent items are already identical by ID, we know that the children will also match, we just
444+ // double-check to still have a chance to be correct in case some of that goes wrong.
445+ if src_id == dst_id
446+ && filename ( src_item. location ( & self . path_backing ) ) == filename ( dst_item. location ( & self . path_backing ) )
447+ {
448+ let entry_mode = src_item. change . entry_mode ( ) ;
449+ let location = src_item. location ( & self . path_backing ) ;
450+ let src = visit:: Source {
451+ entry_mode,
452+ id : src_id,
453+ kind,
454+ location,
455+ change : & src_item. change ,
456+ diff : None ,
457+ } ;
458+ let location = dst_item. location ( & self . path_backing ) ;
459+ let change = dst_item. change . clone ( ) ;
460+ let dst = visit:: Destination { change, location } ;
461+ let res = cb ( dst, Some ( src) ) ;
462+
463+ src_item. emitted = true ;
464+ dst_item. emitted = true ;
465+
466+ if res == Action :: Cancel {
467+ return Ok ( res) ;
468+ }
469+ } else {
470+ gix_trace:: warn!( "Children of parents with change-id {src_parent_id} and {dst_parent_id} were not equal, even though their parents claimed to be" ) ;
471+ break ;
387472 }
388473 }
389- Ok ( crate :: tree :: visit :: Action :: Continue )
474+ Ok ( Action :: Continue )
390475 }
391476}
392477
478+ fn filename ( path : & BStr ) -> & BStr {
479+ path. rfind_byte ( b'/' ) . map_or ( path, |idx| path[ idx + 1 ..] . as_bstr ( ) )
480+ }
481+
393482/// Returns the amount of viable sources and destinations for `items` as eligible for the given `kind` of operation.
394483fn estimate_involved_items (
395484 items : impl IntoIterator < Item = ( bool , ChangeKind ) > ,
@@ -473,13 +562,9 @@ fn find_match<'a, T: Change>(
473562 if let Some ( src) = res {
474563 return Ok ( Some ( src) ) ;
475564 }
476- } else {
565+ } else if item_mode . is_blob ( ) {
477566 let mut has_new = false ;
478567 let percentage = percentage. expect ( "it's set to something below 1.0 and we assured this" ) ;
479- debug_assert ! (
480- item_mode. is_blob( ) ,
481- "symlinks are matched exactly, and trees aren't used here"
482- ) ;
483568
484569 for ( can_idx, src) in items
485570 . iter ( )
0 commit comments