Skip to content

Commit 41481e9

Browse files
committed
feat!: optional rename tracking for directories.
Depending on the source of the rename-information, items that are children of renamed parents may be provided to enable rename tracking based on these containers, instead of always resorting to tracking leaf nodes (i.e. blobs).
1 parent e058f27 commit 41481e9

File tree

5 files changed

+210
-20
lines changed

5 files changed

+210
-20
lines changed

gix-diff/src/rewrites/mod.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1-
use crate::Rewrites;
1+
use crate::{tree, Rewrites};
2+
use std::collections::HashMap;
3+
use std::ops::Range;
24

35
/// Types related to the rename tracker for renames, rewrites and copies.
46
pub mod tracker;
57

8+
type ItemId = (gix_hash::ObjectId, Range<usize>);
9+
610
/// A type to retain state related to an ongoing tracking operation to retain sets of interesting changes
711
/// of which some are retained to at a later stage compute the ones that seem to be renames or copies.
812
pub struct Tracker<T> {
@@ -12,6 +16,9 @@ pub struct Tracker<T> {
1216
path_backing: Vec<u8>,
1317
/// How to track copies and/or rewrites.
1418
rewrites: Rewrites,
19+
/// A mapping of parents (and their IDs) to their children.
20+
/// We use IDs + path-ranges as a cheap way to refer to children so they can be located in `items` quickly.
21+
parent_child_map: HashMap<tree::visit::ChangeId, Vec<ItemId>>,
1522
}
1623

1724
/// Determine in which set of files to search for copies.

gix-diff/src/rewrites/tracker.rs

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use std::ops::Range;
99
use bstr::BStr;
1010
use gix_object::tree::{EntryKind, EntryMode};
1111

12+
use crate::tree::visit::Relation;
1213
use crate::{
1314
blob::{platform::prepare_diff::Operation, DiffLineStats, ResourceKind},
1415
rewrites::{CopySource, Outcome, Tracker},
@@ -28,11 +29,22 @@ pub enum ChangeKind {
2829

2930
/// A trait providing all functionality to abstract over the concept of a change, as seen by the [`Tracker`].
3031
pub trait Change: Clone {
31-
/// Return the hash of this change for identification.
32+
/// Return the hash of the object behind this change for identification.
3233
///
3334
/// Note that this is the id of the object as stored in `git`, i.e. it must have gone through workspace
34-
/// conversions.
35+
/// conversions. What matters is that the IDs are comparable.
3536
fn id(&self) -> &gix_hash::oid;
37+
/// Return the relation that this change may have with other changes.
38+
///
39+
/// It allows to associate a directory with its children that are added or removed at the same moment.
40+
/// Note that this is ignored for modifications.
41+
///
42+
/// If rename-tracking should always be on leaf-level, this should be set to `None` consistently.
43+
/// Note that trees will never be looked up by their `id` as their children are assumed to be passed in
44+
/// with the respective relationship.
45+
///
46+
/// Also note that the tracker only sees what's given to it, it will not lookup trees or match paths itself.
47+
fn relation(&self) -> Option<crate::tree::visit::Relation>;
3648
/// Return the kind of this change.
3749
fn kind(&self) -> ChangeKind;
3850
/// Return more information about the kind of entry affected by this change.
@@ -142,6 +154,7 @@ impl<T: Change> Tracker<T> {
142154
items: vec![],
143155
path_backing: vec![],
144156
rewrites,
157+
parent_child_map: Default::default(),
145158
}
146159
}
147160
}
@@ -150,23 +163,40 @@ impl<T: Change> Tracker<T> {
150163
impl<T: Change> Tracker<T> {
151164
/// We may refuse the push if that information isn't needed for what we have to track.
152165
pub fn try_push_change(&mut self, change: T, location: &BStr) -> Option<T> {
153-
if !change.entry_mode().is_blob_or_symlink() {
166+
let change_kind = change.kind();
167+
if let (None, ChangeKind::Modification { .. }) = (self.rewrites.copies, change_kind) {
154168
return Some(change);
155-
}
156-
let keep = match (self.rewrites.copies, change.kind()) {
157-
(Some(_find_copies), _) => true,
158-
(None, ChangeKind::Modification { .. }) => false,
159-
(None, _) => true,
160169
};
161170

162-
if !keep {
163-
return Some(change);
164-
}
171+
let relation = change
172+
.relation()
173+
.filter(|_| matches!(change_kind, ChangeKind::Addition | ChangeKind::Deletion));
174+
let entry_kind = change.entry_mode().kind();
175+
let parent_info = match (relation, entry_kind) {
176+
(None | Some(Relation::ChildOfParent(_)), EntryKind::Commit | EntryKind::Tree) => return Some(change),
177+
(Some(Relation::Parent(id)), EntryKind::Tree) => {
178+
let previous = self.parent_child_map.insert(id, vec![]);
179+
assert!(
180+
previous.is_none(),
181+
"change-producer must assure parents only occour once with the same id"
182+
);
183+
None
184+
}
185+
(Some(Relation::ChildOfParent(id)), EntryKind::Blob | EntryKind::BlobExecutable | EntryKind::Link) => {
186+
Some(id)
187+
}
188+
_ => None,
189+
};
165190

166191
let start = self.path_backing.len();
167192
self.path_backing.extend_from_slice(location);
193+
let path = start..self.path_backing.len();
194+
195+
if let Some(children) = parent_info.and_then(|id| self.parent_child_map.get_mut(&id)) {
196+
children.push((change.id().to_owned(), path.clone()));
197+
}
168198
self.items.push(Item {
169-
path: start..self.path_backing.len(),
199+
path,
170200
change,
171201
emitted: false,
172202
});

gix-diff/src/tree/visit.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ mod change_impls {
110110
use gix_hash::oid;
111111
use gix_object::tree::EntryMode;
112112

113+
use crate::tree::visit::Relation;
113114
use crate::{rewrites::tracker::ChangeKind, tree::visit::Change};
114115

115116
impl crate::rewrites::tracker::Change for crate::tree::visit::Change {
@@ -119,6 +120,13 @@ mod change_impls {
119120
}
120121
}
121122

123+
fn relation(&self) -> Option<Relation> {
124+
match self {
125+
Change::Addition { relation, .. } | Change::Deletion { relation, .. } => *relation,
126+
Change::Modification { .. } => None,
127+
}
128+
}
129+
122130
fn kind(&self) -> ChangeKind {
123131
match self {
124132
Change::Addition { .. } => ChangeKind::Addition,

gix-diff/tests/rewrites/mod.rs

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,27 @@
11
use gix_diff::rewrites::tracker::ChangeKind;
2+
use gix_diff::tree::visit::{ChangeId, Relation};
23
use gix_hash::{oid, ObjectId};
34
use gix_object::tree::{EntryKind, EntryMode};
45

56
mod tracker;
67

7-
#[derive(Debug, Clone, PartialEq, Eq)]
8+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
89
pub struct Change {
910
id: ObjectId,
1011
kind: ChangeKind,
1112
mode: EntryMode,
13+
relation: Option<Relation>,
1214
}
1315

1416
impl gix_diff::rewrites::tracker::Change for Change {
1517
fn id(&self) -> &oid {
1618
&self.id
1719
}
1820

21+
fn relation(&self) -> Option<Relation> {
22+
self.relation
23+
}
24+
1925
fn kind(&self) -> ChangeKind {
2026
self.kind
2127
}
@@ -37,20 +43,59 @@ impl Change {
3743
id: NULL_ID,
3844
kind: ChangeKind::Modification,
3945
mode: EntryKind::Blob.into(),
46+
relation: None,
4047
}
4148
}
4249
fn deletion() -> Self {
4350
Change {
4451
id: NULL_ID,
4552
kind: ChangeKind::Deletion,
4653
mode: EntryKind::Blob.into(),
54+
relation: None,
4755
}
4856
}
4957
fn addition() -> Self {
5058
Change {
5159
id: NULL_ID,
5260
kind: ChangeKind::Addition,
5361
mode: EntryKind::Blob.into(),
62+
relation: None,
63+
}
64+
}
65+
66+
fn addition_in_tree(id: ChangeId) -> Self {
67+
Change {
68+
id: NULL_ID,
69+
kind: ChangeKind::Addition,
70+
mode: EntryKind::Blob.into(),
71+
relation: Some(Relation::ChildOfParent(id)),
72+
}
73+
}
74+
75+
fn deletion_in_tree(id: ChangeId) -> Self {
76+
Change {
77+
id: NULL_ID,
78+
kind: ChangeKind::Deletion,
79+
mode: EntryKind::Blob.into(),
80+
relation: Some(Relation::ChildOfParent(id)),
81+
}
82+
}
83+
84+
fn tree_addition(id: ChangeId) -> Self {
85+
Change {
86+
id: NULL_ID,
87+
kind: ChangeKind::Addition,
88+
mode: EntryKind::Tree.into(),
89+
relation: Some(Relation::Parent(id)),
90+
}
91+
}
92+
93+
fn tree_deletion(id: ChangeId) -> Self {
94+
Change {
95+
id: NULL_ID,
96+
kind: ChangeKind::Deletion,
97+
mode: EntryKind::Tree.into(),
98+
relation: Some(Relation::Parent(id)),
5499
}
55100
}
56101
}

gix-diff/tests/rewrites/tracker.rs

Lines changed: 106 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
use crate::{
2+
hex_to_id,
3+
rewrites::{Change, NULL_ID},
4+
};
5+
use gix_diff::tree::visit::Relation;
16
use gix_diff::{
27
blob::DiffLineStats,
38
rewrites,
@@ -14,11 +19,6 @@ use gix_diff::{
1419
use gix_object::tree::EntryKind;
1520
use pretty_assertions::assert_eq;
1621

17-
use crate::{
18-
hex_to_id,
19-
rewrites::{Change, NULL_ID},
20-
};
21-
2222
#[test]
2323
fn rename_by_id() -> crate::Result {
2424
// Limits are only applied when doing rewrite-checks
@@ -522,6 +522,106 @@ fn rename_by_50_percent_similarity() -> crate::Result {
522522
Ok(())
523523
}
524524

525+
#[test]
526+
fn directories_without_relation_are_ignored() -> crate::Result {
527+
let mut track = util::new_tracker(Default::default());
528+
let tree_without_relation = Change {
529+
id: NULL_ID,
530+
kind: ChangeKind::Deletion,
531+
mode: EntryKind::Tree.into(),
532+
relation: None,
533+
};
534+
assert_eq!(
535+
track.try_push_change(tree_without_relation, "dir".into()),
536+
Some(tree_without_relation),
537+
"trees, or non-blobs, are ignored, particularly when they have no relation"
538+
);
539+
Ok(())
540+
}
541+
542+
#[test]
543+
#[ignore = "cannot match directories if their content doesn't match"]
544+
fn simple_directory_rename_with_changes() -> crate::Result {
545+
Ok(())
546+
}
547+
548+
#[test]
549+
#[ignore = "one side has relationships, the other side does not, it still works on leaf-level"]
550+
fn directory_rename_with_ids_one_side_without_on_the_other() -> crate::Result {
551+
Ok(())
552+
}
553+
554+
#[test]
555+
#[ignore = "TBD"]
556+
fn simple_directory_rename_by_id() -> crate::Result {
557+
let rewrites = Rewrites {
558+
copies: None,
559+
percentage: Some(0.5),
560+
limit: 0,
561+
};
562+
let mut track = util::new_tracker(Default::default());
563+
let tree_dst_id = 1;
564+
assert!(track
565+
.try_push_change(Change::tree_addition(tree_dst_id), "d-renamed".into())
566+
.is_none());
567+
let tree_src_id = 3;
568+
assert!(track
569+
.try_push_change(Change::tree_deletion(tree_src_id), "d".into())
570+
.is_none());
571+
assert!(
572+
track
573+
.try_push_change(
574+
Change {
575+
id: NULL_ID, /* does not matter for trees */
576+
kind: ChangeKind::Deletion,
577+
mode: EntryKind::Tree.into(),
578+
relation: Some(Relation::ChildOfParent(tree_src_id)),
579+
},
580+
"d/subdir".into(),
581+
)
582+
.is_some(),
583+
"trees that are children are simply ignored. It's easier to compare views of worktrees (`gix-dirwalk`) \
584+
and trees/indices that way."
585+
);
586+
assert!(track
587+
.try_push_change(
588+
Change {
589+
id: NULL_ID,
590+
kind: ChangeKind::Addition,
591+
mode: EntryKind::Tree.into(),
592+
relation: Some(Relation::ChildOfParent(tree_dst_id)),
593+
},
594+
"d-renamed/subdir".into(),
595+
)
596+
.is_some());
597+
let _odb = util::add_retained_blobs(
598+
&mut track,
599+
[
600+
(Change::deletion_in_tree(tree_src_id), "d/a", "a"),
601+
(Change::deletion_in_tree(tree_src_id), "d/b", "b"),
602+
(Change::deletion_in_tree(tree_src_id), "d/c", "c"),
603+
(Change::deletion_in_tree(tree_src_id), "d/subdir/d", "d"),
604+
(Change::addition_in_tree(tree_dst_id), "d-renamed/a", "a"),
605+
(Change::addition_in_tree(tree_dst_id), "d-renamed/b", "b"),
606+
(Change::addition_in_tree(tree_dst_id), "d-renamed/c", "c"),
607+
(Change::addition_in_tree(tree_dst_id), "d-renamed/subdir/d", "d"),
608+
(Change::deletion(), "a", "first\nsecond\n"),
609+
(Change::addition(), "b", "firt\nsecond\n"),
610+
],
611+
);
612+
let mut called = false;
613+
let out = util::assert_emit(&mut track, |dst, src| {
614+
called = true;
615+
assert_eq!(src, None, "there is just a single deletion, no pair");
616+
assert_eq!(dst.location, "a");
617+
assert_eq!(dst.change.kind, ChangeKind::Deletion);
618+
Action::Continue
619+
});
620+
assert_eq!(out, Default::default());
621+
assert!(called);
622+
Ok(())
623+
}
624+
525625
#[test]
526626
fn remove_only() -> crate::Result {
527627
let mut track = util::new_tracker(Default::default());
@@ -554,7 +654,7 @@ fn add_only() -> crate::Result {
554654
let out = util::assert_emit(&mut track, |dst, src| {
555655
assert!(!called);
556656
called = true;
557-
assert!(src.is_none(), "there is just a single deletion, no pair");
657+
assert!(src.is_none(), "there is just a single addition, no pair");
558658
assert_eq!(dst.location, "a");
559659
assert_eq!(dst.change.kind, ChangeKind::Addition);
560660
Action::Continue

0 commit comments

Comments
 (0)