Skip to content

Commit e12e772

Browse files
committed
feat!: optional rename tracking for directories.
Depending on the source of the rename-information, items that are children of renamed parents may be provided to enable rename tracking based on these containers, instead of always resorting to tracking leaf nodes (i.e. blobs).
1 parent e058f27 commit e12e772

File tree

5 files changed

+402
-48
lines changed

5 files changed

+402
-48
lines changed

crate-status.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -311,10 +311,13 @@ Check out the [performance discussion][gix-diff-performance] as well.
311311
* **lines**
312312
* [x] Simple line-by-line diffs powered by the `imara-diff` crate.
313313
* **generic rename tracker to find renames and copies**
314-
* [x] find by exact match
315-
* [x] find by similarity check
314+
* [x] find blobs by exact match
315+
* [x] find blobs by similarity check
316316
* [ ] heuristics to find best candidate
317-
* [ ] find by basename to help detecting simple moves
317+
* [ ] find by basename to support similarity check
318+
* [x] directory tracking
319+
- [x] by identity
320+
- [ ] by similarity
318321
* **blob**
319322
* [x] a choice of to-worktree, to-git and to-worktree-if-needed conversions
320323
* [x] `textconv` filters

gix-diff/src/rewrites/tracker.rs

Lines changed: 131 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,14 @@
44
//!
55
//! - it's less sophisticated and doesn't use any ranking of candidates. Instead, it picks the first possible match.
66
//! - the set used for copy-detection is probably smaller by default.
7+
8+
use std::collections::VecDeque;
79
use std::ops::Range;
810

9-
use bstr::BStr;
11+
use bstr::{BStr, ByteSlice};
1012
use gix_object::tree::{EntryKind, EntryMode};
1113

14+
use crate::tree::visit::{Action, ChangeId, Relation};
1215
use crate::{
1316
blob::{platform::prepare_diff::Operation, DiffLineStats, ResourceKind},
1417
rewrites::{CopySource, Outcome, Tracker},
@@ -28,11 +31,22 @@ pub enum ChangeKind {
2831

2932
/// A trait providing all functionality to abstract over the concept of a change, as seen by the [`Tracker`].
3033
pub trait Change: Clone {
31-
/// Return the hash of this change for identification.
34+
/// Return the hash of the object behind this change for identification.
3235
///
3336
/// Note that this is the id of the object as stored in `git`, i.e. it must have gone through workspace
34-
/// conversions.
37+
/// conversions. What matters is that the IDs are comparable.
3538
fn id(&self) -> &gix_hash::oid;
39+
/// Return the relation that this change may have with other changes.
40+
///
41+
/// It allows to associate a directory with its children that are added or removed at the same moment.
42+
/// Note that this is ignored for modifications.
43+
///
44+
/// If rename-tracking should always be on leaf-level, this should be set to `None` consistently.
45+
/// Note that trees will never be looked up by their `id` as their children are assumed to be passed in
46+
/// with the respective relationship.
47+
///
48+
/// Also note that the tracker only sees what's given to it, it will not lookup trees or match paths itself.
49+
fn relation(&self) -> Option<Relation>;
3650
/// Return the kind of this change.
3751
fn kind(&self) -> ChangeKind;
3852
/// Return more information about the kind of entry affected by this change.
@@ -55,11 +69,11 @@ impl<T: Change> Item<T> {
5569
fn location<'a>(&self, backing: &'a [u8]) -> &'a BStr {
5670
backing[self.path.clone()].as_ref()
5771
}
58-
fn entry_mode_compatible(&self, mode: EntryMode) -> bool {
72+
fn entry_mode_compatible(&self, other: EntryMode) -> bool {
5973
use EntryKind::*;
6074
matches!(
61-
(mode.kind(), self.change.entry_mode().kind()),
62-
(Blob | BlobExecutable, Blob | BlobExecutable) | (Link, Link)
75+
(other.kind(), self.change.entry_mode().kind()),
76+
(Blob | BlobExecutable, Blob | BlobExecutable) | (Link, Link) | (Tree, Tree)
6377
)
6478
}
6579

@@ -108,7 +122,7 @@ pub mod visit {
108122
}
109123

110124
/// A change along with a location.
111-
#[derive(Clone)]
125+
#[derive(Debug, Clone)]
112126
pub struct Destination<'a, T: Clone> {
113127
/// The change at the given `location`.
114128
pub change: T,
@@ -150,23 +164,25 @@ impl<T: Change> Tracker<T> {
150164
impl<T: Change> Tracker<T> {
151165
/// We may refuse the push if that information isn't needed for what we have to track.
152166
pub fn try_push_change(&mut self, change: T, location: &BStr) -> Option<T> {
153-
if !change.entry_mode().is_blob_or_symlink() {
167+
let change_kind = change.kind();
168+
if let (None, ChangeKind::Modification { .. }) = (self.rewrites.copies, change_kind) {
154169
return Some(change);
155-
}
156-
let keep = match (self.rewrites.copies, change.kind()) {
157-
(Some(_find_copies), _) => true,
158-
(None, ChangeKind::Modification { .. }) => false,
159-
(None, _) => true,
160170
};
161171

162-
if !keep {
172+
let relation = change
173+
.relation()
174+
.filter(|_| matches!(change_kind, ChangeKind::Addition | ChangeKind::Deletion));
175+
let entry_kind = change.entry_mode().kind();
176+
if let (None | Some(Relation::ChildOfParent(_)), EntryKind::Commit | EntryKind::Tree) = (relation, entry_kind) {
163177
return Some(change);
164-
}
178+
};
165179

166180
let start = self.path_backing.len();
167181
self.path_backing.extend_from_slice(location);
182+
let path = start..self.path_backing.len();
183+
168184
self.items.push(Item {
169-
path: start..self.path_backing.len(),
185+
path,
170186
change,
171187
emitted: false,
172188
});
@@ -178,6 +194,8 @@ impl<T: Change> Tracker<T> {
178194
/// `cb(destination, source)` is called for each item, either with `Some(source)` if it's
179195
/// the destination of a copy or rename, or with `None` for source if no relation to other
180196
/// items in the tracked set exist, which is like saying 'no rename or rewrite or copy' happened.
197+
/// Note that directories with [relation](Relation) will be emitted if there is a match, along with all their matching
198+
/// child-items which are similarly bundled as rename.
181199
///
182200
/// `objects` is used to access blob data for similarity checks if required and is taken directly from the object database.
183201
/// Worktree filters and text conversions will be applied afterwards automatically. Note that object-caching *should not*
@@ -195,7 +213,7 @@ impl<T: Change> Tracker<T> {
195213
/// will panic if `change` is not a modification, and it's valid to not call `push` at all.
196214
pub fn emit<PushSourceTreeFn, E>(
197215
&mut self,
198-
mut cb: impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> crate::tree::visit::Action,
216+
mut cb: impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> Action,
199217
diff_cache: &mut crate::blob::Platform,
200218
objects: &impl gix_object::FindObjectOrHeader,
201219
mut push_source_tree: PushSourceTreeFn,
@@ -272,7 +290,7 @@ impl<T: Change> Tracker<T> {
272290
change: item.change,
273291
},
274292
None,
275-
) == crate::tree::visit::Action::Cancel
293+
) == Action::Cancel
276294
{
277295
break;
278296
}
@@ -285,17 +303,15 @@ impl<T: Change> Tracker<T> {
285303
fn match_pairs_of_kind(
286304
&mut self,
287305
kind: visit::SourceKind,
288-
cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> crate::tree::visit::Action,
306+
cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> Action,
289307
percentage: Option<f32>,
290308
out: &mut Outcome,
291309
diff_cache: &mut crate::blob::Platform,
292310
objects: &impl gix_object::FindObjectOrHeader,
293311
) -> Result<(), emit::Error> {
294312
// we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively.
295313
let needs_second_pass = !needs_exact_match(percentage);
296-
if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects)?
297-
== crate::tree::visit::Action::Cancel
298-
{
314+
if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects)? == Action::Cancel {
299315
return Ok(());
300316
}
301317
if needs_second_pass {
@@ -328,13 +344,13 @@ impl<T: Change> Tracker<T> {
328344

329345
fn match_pairs(
330346
&mut self,
331-
cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> crate::tree::visit::Action,
347+
cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> Action,
332348
percentage: Option<f32>,
333349
kind: visit::SourceKind,
334350
stats: &mut Outcome,
335351
diff_cache: &mut crate::blob::Platform,
336352
objects: &impl gix_object::FindObjectOrHeader,
337-
) -> Result<crate::tree::visit::Action, emit::Error> {
353+
) -> Result<Action, emit::Error> {
338354
let mut dest_ofs = 0;
339355
while let Some((mut dest_idx, dest)) = self.items[dest_ofs..].iter().enumerate().find_map(|(idx, item)| {
340356
(!item.emitted && matches!(item.change.kind(), ChangeKind::Addition)).then_some((idx, item))
@@ -368,28 +384,109 @@ impl<T: Change> Tracker<T> {
368384
src_idx,
369385
)
370386
});
371-
if src.is_none() {
387+
let Some((src, src_idx)) = src else {
372388
continue;
373-
}
389+
};
374390
let location = dest.location(&self.path_backing);
375391
let change = dest.change.clone();
376392
let dest = visit::Destination { change, location };
377-
let src_idx = src.as_ref().map(|t| t.1);
378-
let res = cb(dest, src.map(|t| t.0));
393+
let relations = if percentage.is_none() {
394+
src.change.relation().zip(dest.change.relation())
395+
} else {
396+
None
397+
};
398+
let res = cb(dest, Some(src));
379399

380400
self.items[dest_idx].emitted = true;
381-
if let Some(src_idx) = src_idx {
382-
self.items[src_idx].emitted = true;
401+
self.items[src_idx].emitted = true;
402+
403+
if res == Action::Cancel {
404+
return Ok(Action::Cancel);
383405
}
384406

385-
if res == crate::tree::visit::Action::Cancel {
386-
return Ok(crate::tree::visit::Action::Cancel);
407+
if let Some((Relation::Parent(src), Relation::Parent(dst))) = relations {
408+
let res = self.emit_child_renames_matching_identity(cb, kind, src, dst)?;
409+
if res == Action::Cancel {
410+
return Ok(Action::Cancel);
411+
}
412+
}
413+
}
414+
Ok(Action::Continue)
415+
}
416+
417+
/// Emit the children of `src_parent_id` and `dst_parent_id` as pairs of exact matches, which are assumed
418+
/// as `src` and `dst` were an exact match.
419+
fn emit_child_renames_matching_identity(
420+
&mut self,
421+
cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> Action,
422+
kind: visit::SourceKind,
423+
src_parent_id: ChangeId,
424+
dst_parent_id: ChangeId,
425+
) -> Result<Action, emit::Error> {
426+
debug_assert_ne!(
427+
src_parent_id, dst_parent_id,
428+
"src and destination directories must be distinct"
429+
);
430+
let mut candidates = VecDeque::with_capacity(2 /* at least 1 per directory */);
431+
for item in self.items.iter_mut().filter(|item| !item.emitted) {
432+
match item.change.relation() {
433+
Some(Relation::ChildOfParent(id)) if id == src_parent_id => {
434+
candidates.push_front((item.change.id().to_owned(), item));
435+
}
436+
Some(Relation::ChildOfParent(id)) if id == dst_parent_id => {
437+
candidates.push_back((item.change.id().to_owned(), item));
438+
}
439+
_ => continue,
440+
};
441+
}
442+
let (src_items, dst_items) = candidates.as_mut_slices();
443+
for list in [&mut *src_items, &mut *dst_items] {
444+
list.sort_by(|a, b| {
445+
a.0.cmp(&b.0)
446+
.then_with(|| a.1.location(&self.path_backing).cmp(b.1.location(&self.path_backing)))
447+
});
448+
}
449+
450+
for ((src_id, src_item), (dst_id, dst_item)) in src_items.iter_mut().zip(dst_items) {
451+
// Since the parent items are already identical by ID, we know that the children will also match, we just
452+
// double-check to still have a chance to be correct in case some of that goes wrong.
453+
if src_id == dst_id
454+
&& filename(src_item.location(&self.path_backing)) == filename(dst_item.location(&self.path_backing))
455+
{
456+
let entry_mode = src_item.change.entry_mode();
457+
let location = src_item.location(&self.path_backing);
458+
let src = visit::Source {
459+
entry_mode,
460+
id: *src_id,
461+
kind,
462+
location,
463+
change: &src_item.change,
464+
diff: None,
465+
};
466+
let location = dst_item.location(&self.path_backing);
467+
let change = dst_item.change.clone();
468+
let dst = visit::Destination { change, location };
469+
let res = cb(dst, Some(src));
470+
471+
src_item.emitted = true;
472+
dst_item.emitted = true;
473+
474+
if res == Action::Cancel {
475+
return Ok(res);
476+
}
477+
} else {
478+
gix_trace::warn!("Children of parents with change-id {src_parent_id} and {dst_parent_id} were not equal, even though their parents claimed to be");
479+
break;
387480
}
388481
}
389-
Ok(crate::tree::visit::Action::Continue)
482+
Ok(Action::Continue)
390483
}
391484
}
392485

486+
fn filename(path: &BStr) -> &BStr {
487+
path.rfind_byte(b'/').map_or(path, |idx| path[idx + 1..].as_bstr())
488+
}
489+
393490
/// Returns the amount of viable sources and destinations for `items` as eligible for the given `kind` of operation.
394491
fn estimate_involved_items(
395492
items: impl IntoIterator<Item = (bool, ChangeKind)>,
@@ -473,13 +570,9 @@ fn find_match<'a, T: Change>(
473570
if let Some(src) = res {
474571
return Ok(Some(src));
475572
}
476-
} else {
573+
} else if item_mode.is_blob() {
477574
let mut has_new = false;
478575
let percentage = percentage.expect("it's set to something below 1.0 and we assured this");
479-
debug_assert!(
480-
item_mode.is_blob(),
481-
"symlinks are matched exactly, and trees aren't used here"
482-
);
483576

484577
for (can_idx, src) in items
485578
.iter()

gix-diff/src/tree/visit.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ mod change_impls {
110110
use gix_hash::oid;
111111
use gix_object::tree::EntryMode;
112112

113+
use crate::tree::visit::Relation;
113114
use crate::{rewrites::tracker::ChangeKind, tree::visit::Change};
114115

115116
impl crate::rewrites::tracker::Change for crate::tree::visit::Change {
@@ -119,6 +120,13 @@ mod change_impls {
119120
}
120121
}
121122

123+
fn relation(&self) -> Option<Relation> {
124+
match self {
125+
Change::Addition { relation, .. } | Change::Deletion { relation, .. } => *relation,
126+
Change::Modification { .. } => None,
127+
}
128+
}
129+
122130
fn kind(&self) -> ChangeKind {
123131
match self {
124132
Change::Addition { .. } => ChangeKind::Addition,

0 commit comments

Comments
 (0)