Skip to content

Commit 436117d

Browse files
committed
fix: improve directory matching
Previously the sorting wasn't accounted for, so an assumption about the order of changes weren't actually true.
1 parent ddc99b5 commit 436117d

File tree

3 files changed

+113
-83
lines changed

3 files changed

+113
-83
lines changed

gix-diff/src/rewrites/tracker.rs

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,10 @@ impl<T: Change> Tracker<T> {
172172
.relation()
173173
.filter(|_| matches!(change_kind, ChangeKind::Addition | ChangeKind::Deletion));
174174
let entry_kind = change.entry_mode().kind();
175-
if let (None | Some(Relation::ChildOfParent(_)), EntryKind::Commit | EntryKind::Tree) = (relation, entry_kind) {
175+
if entry_kind == EntryKind::Commit {
176+
return Some(change);
177+
}
178+
if let (None, EntryKind::Tree) = (relation, entry_kind) {
176179
return Some(change);
177180
};
178181

@@ -221,27 +224,46 @@ impl<T: Change> Tracker<T> {
221224
PushSourceTreeFn: FnMut(&mut dyn FnMut(T, &BStr)) -> Result<(), E>,
222225
E: std::error::Error + Send + Sync + 'static,
223226
{
227+
fn is_parent(change: &impl Change) -> bool {
228+
matches!(change.relation(), Some(Relation::Parent(_)))
229+
}
224230
diff_cache.options.skip_internal_diff_if_external_is_configured = false;
225231

232+
// The point of this is to optimize for identity-based lookups, which should be easy to find
233+
// by partitioning.
226234
fn by_id_and_location<T: Change>(a: &Item<T>, b: &Item<T>) -> std::cmp::Ordering {
227235
a.change
228236
.id()
229237
.cmp(b.change.id())
230238
.then_with(|| a.path.start.cmp(&b.path.start).then(a.path.end.cmp(&b.path.end)))
231239
}
232-
self.items.sort_by(by_id_and_location);
233240

234241
let mut out = Outcome {
235242
options: self.rewrites,
236243
..Default::default()
237244
};
245+
self.items.sort_by(by_id_and_location);
246+
247+
// Rewrites by directory can be pruned out quickly, quickly pruning candidates
248+
// for the following per-item steps.
249+
self.match_pairs_of_kind(
250+
visit::SourceKind::Rename,
251+
&mut cb,
252+
None, /* by identity for parents */
253+
&mut out,
254+
diff_cache,
255+
objects,
256+
Some(is_parent),
257+
)?;
258+
238259
self.match_pairs_of_kind(
239260
visit::SourceKind::Rename,
240261
&mut cb,
241262
self.rewrites.percentage,
242263
&mut out,
243264
diff_cache,
244265
objects,
266+
None,
245267
)?;
246268

247269
if let Some(copies) = self.rewrites.copies {
@@ -252,6 +274,7 @@ impl<T: Change> Tracker<T> {
252274
&mut out,
253275
diff_cache,
254276
objects,
277+
None,
255278
)?;
256279

257280
match copies.source {
@@ -275,6 +298,7 @@ impl<T: Change> Tracker<T> {
275298
&mut out,
276299
diff_cache,
277300
objects,
301+
None,
278302
)?;
279303
}
280304
}
@@ -307,10 +331,11 @@ impl<T: Change> Tracker<T> {
307331
out: &mut Outcome,
308332
diff_cache: &mut crate::blob::Platform,
309333
objects: &impl gix_object::FindObjectOrHeader,
334+
filter: Option<fn(&T) -> bool>,
310335
) -> Result<(), emit::Error> {
311336
// we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively.
312337
let needs_second_pass = !needs_exact_match(percentage);
313-
if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects)? == Action::Cancel {
338+
if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects, filter)? == Action::Cancel {
314339
return Ok(());
315340
}
316341
if needs_second_pass {
@@ -335,7 +360,7 @@ impl<T: Change> Tracker<T> {
335360
}
336361
};
337362
if !is_limited {
338-
self.match_pairs(cb, percentage, kind, out, diff_cache, objects)?;
363+
self.match_pairs(cb, percentage, kind, out, diff_cache, objects, None)?;
339364
}
340365
}
341366
Ok(())
@@ -349,10 +374,14 @@ impl<T: Change> Tracker<T> {
349374
stats: &mut Outcome,
350375
diff_cache: &mut crate::blob::Platform,
351376
objects: &impl gix_object::FindObjectOrHeader,
377+
filter: Option<fn(&T) -> bool>,
352378
) -> Result<Action, emit::Error> {
353379
let mut dest_ofs = 0;
354380
while let Some((mut dest_idx, dest)) = self.items[dest_ofs..].iter().enumerate().find_map(|(idx, item)| {
355-
(!item.emitted && matches!(item.change.kind(), ChangeKind::Addition)).then_some((idx, item))
381+
(!item.emitted
382+
&& matches!(item.change.kind(), ChangeKind::Addition)
383+
&& filter.map_or(true, |f| f(&item.change)))
384+
.then_some((idx, item))
356385
}) {
357386
dest_idx += dest_ofs;
358387
dest_ofs = dest_idx + 1;

gix-diff/tests/diff/rewrites/tracker.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -650,9 +650,9 @@ fn simple_directory_rename_by_id() -> crate::Result {
650650
},
651651
"d/subdir".into(),
652652
)
653-
.is_some(),
654-
"trees that are children are simply ignored. It's easier to compare views of worktrees (`gix-dirwalk`) \
655-
and trees/indices that way."
653+
.is_none(),
654+
"trees that are children are kept and matched. That way, they can quickly be pruned which is done first.\
655+
Those who don't need them can prune them in a later step."
656656
);
657657
assert!(track
658658
.try_push_change(
@@ -664,7 +664,7 @@ fn simple_directory_rename_by_id() -> crate::Result {
664664
},
665665
"d-renamed/subdir".into(),
666666
)
667-
.is_some());
667+
.is_none());
668668
let _odb = util::add_retained_blobs(
669669
&mut track,
670670
[
@@ -692,22 +692,23 @@ fn simple_directory_rename_by_id() -> crate::Result {
692692
assert_eq!(dst.change.relation, Some(Relation::Parent(1)));
693693
assert_eq!(dst.change.mode.kind(), EntryKind::Tree);
694694
}
695-
1..=4 => {
695+
1..=5 => {
696696
let src = src.unwrap();
697697
let (expected_src, expected_dst) = &[
698698
("d/a", "d-renamed/a"),
699699
("d/c", "d-renamed/c"),
700700
("d/b", "d-renamed/b"),
701+
("d/subdir", "d-renamed/subdir"),
701702
("d/subdir/d", "d-renamed/subdir/d"),
702703
][calls - 1];
703704
assert_eq!(src.location, expected_src);
704705
assert_eq!(dst.location, expected_dst);
705706
}
706-
5 => {
707+
6 => {
707708
assert_eq!(src, None);
708709
assert_eq!(dst.location, "a");
709710
}
710-
6 => {
711+
7 => {
711712
assert_eq!(src, None);
712713
assert_eq!(dst.location, "b");
713714
}
@@ -723,7 +724,7 @@ fn simple_directory_rename_by_id() -> crate::Result {
723724
..Default::default()
724725
}
725726
);
726-
assert_eq!(calls, 7, "Should not have too few calls");
727+
assert_eq!(calls, 8, "Should not have too few calls");
727728
Ok(())
728729
}
729730

gix/tests/gix/object/tree/diff.rs

Lines changed: 70 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -510,119 +510,95 @@ mod track_rewrites {
510510
.collect();
511511
insta::assert_debug_snapshot!(actual, @r#"
512512
[
513-
Addition {
514-
location: "c/src",
515-
relation: Some(
516-
ChildOfParent(
517-
1,
518-
),
519-
),
520-
entry_mode: EntryMode(
513+
Rewrite {
514+
source_location: "cli",
515+
source_entry_mode: EntryMode(
521516
16384,
522517
),
523-
id: Sha1(80e5b08f25f75c2050afbcb794e8434f4cf082f1),
524-
},
525-
Addition {
526-
location: "c/tests",
527-
relation: Some(
528-
ChildOfParent(
529-
1,
518+
source_relation: Some(
519+
Parent(
520+
2,
530521
),
531522
),
523+
source_id: Sha1(f203064a6a81df47498fb415a2064a8ec568ed67),
524+
diff: None,
532525
entry_mode: EntryMode(
533526
16384,
534527
),
535-
id: Sha1(17be3b367831653883a36a2f2a8dea418b8d96b7),
536-
},
537-
Deletion {
538-
location: "cli/src",
528+
id: Sha1(f203064a6a81df47498fb415a2064a8ec568ed67),
529+
location: "c",
539530
relation: Some(
540-
ChildOfParent(
541-
2,
531+
Parent(
532+
1,
542533
),
543534
),
544-
entry_mode: EntryMode(
545-
16384,
546-
),
547-
id: Sha1(80e5b08f25f75c2050afbcb794e8434f4cf082f1),
535+
copy: false,
548536
},
549-
Deletion {
550-
location: "cli/tests",
551-
relation: Some(
537+
Rewrite {
538+
source_location: "cli/src/commands/file/print.rs",
539+
source_entry_mode: EntryMode(
540+
33188,
541+
),
542+
source_relation: Some(
552543
ChildOfParent(
553544
2,
554545
),
555546
),
547+
source_id: Sha1(081093be2ba0d2be62d14363f43859355bee2aa2),
548+
diff: None,
556549
entry_mode: EntryMode(
557-
16384,
550+
33188,
558551
),
559-
id: Sha1(17be3b367831653883a36a2f2a8dea418b8d96b7),
560-
},
561-
Addition {
562-
location: "c/src/commands",
552+
id: Sha1(081093be2ba0d2be62d14363f43859355bee2aa2),
553+
location: "c/src/commands/file/print.rs",
563554
relation: Some(
564555
ChildOfParent(
565556
1,
566557
),
567558
),
568-
entry_mode: EntryMode(
559+
copy: false,
560+
},
561+
Rewrite {
562+
source_location: "cli/src/commands/file",
563+
source_entry_mode: EntryMode(
569564
16384,
570565
),
571-
id: Sha1(f414de88468352d59c129d0e7686fb1e1f387929),
572-
},
573-
Deletion {
574-
location: "cli/src/commands",
575-
relation: Some(
566+
source_relation: Some(
576567
ChildOfParent(
577568
2,
578569
),
579570
),
571+
source_id: Sha1(0f3bc154b577b84fb5ce31383e25acc99c2f24a5),
572+
diff: None,
580573
entry_mode: EntryMode(
581574
16384,
582575
),
583-
id: Sha1(f414de88468352d59c129d0e7686fb1e1f387929),
584-
},
585-
Addition {
576+
id: Sha1(0f3bc154b577b84fb5ce31383e25acc99c2f24a5),
586577
location: "c/src/commands/file",
587578
relation: Some(
588579
ChildOfParent(
589580
1,
590581
),
591582
),
592-
entry_mode: EntryMode(
593-
16384,
594-
),
595-
id: Sha1(0f3bc154b577b84fb5ce31383e25acc99c2f24a5),
596-
},
597-
Deletion {
598-
location: "cli/src/commands/file",
599-
relation: Some(
600-
ChildOfParent(
601-
2,
602-
),
603-
),
604-
entry_mode: EntryMode(
605-
16384,
606-
),
607-
id: Sha1(0f3bc154b577b84fb5ce31383e25acc99c2f24a5),
583+
copy: false,
608584
},
609585
Rewrite {
610-
source_location: "cli/src/commands/file/print.rs",
586+
source_location: "cli/tests",
611587
source_entry_mode: EntryMode(
612-
33188,
588+
16384,
613589
),
614590
source_relation: Some(
615591
ChildOfParent(
616592
2,
617593
),
618594
),
619-
source_id: Sha1(081093be2ba0d2be62d14363f43859355bee2aa2),
595+
source_id: Sha1(17be3b367831653883a36a2f2a8dea418b8d96b7),
620596
diff: None,
621597
entry_mode: EntryMode(
622-
33188,
598+
16384,
623599
),
624-
id: Sha1(081093be2ba0d2be62d14363f43859355bee2aa2),
625-
location: "c/src/commands/file/print.rs",
600+
id: Sha1(17be3b367831653883a36a2f2a8dea418b8d96b7),
601+
location: "c/tests",
626602
relation: Some(
627603
ChildOfParent(
628604
1,
@@ -702,6 +678,30 @@ mod track_rewrites {
702678
),
703679
copy: false,
704680
},
681+
Rewrite {
682+
source_location: "cli/src",
683+
source_entry_mode: EntryMode(
684+
16384,
685+
),
686+
source_relation: Some(
687+
ChildOfParent(
688+
2,
689+
),
690+
),
691+
source_id: Sha1(80e5b08f25f75c2050afbcb794e8434f4cf082f1),
692+
diff: None,
693+
entry_mode: EntryMode(
694+
16384,
695+
),
696+
id: Sha1(80e5b08f25f75c2050afbcb794e8434f4cf082f1),
697+
location: "c/src",
698+
relation: Some(
699+
ChildOfParent(
700+
1,
701+
),
702+
),
703+
copy: false,
704+
},
705705
Rewrite {
706706
source_location: "cli/tests/test_file_chmod_command.rs",
707707
source_entry_mode: EntryMode(
@@ -943,24 +943,24 @@ mod track_rewrites {
943943
copy: false,
944944
},
945945
Rewrite {
946-
source_location: "cli",
946+
source_location: "cli/src/commands",
947947
source_entry_mode: EntryMode(
948948
16384,
949949
),
950950
source_relation: Some(
951-
Parent(
951+
ChildOfParent(
952952
2,
953953
),
954954
),
955-
source_id: Sha1(f203064a6a81df47498fb415a2064a8ec568ed67),
955+
source_id: Sha1(f414de88468352d59c129d0e7686fb1e1f387929),
956956
diff: None,
957957
entry_mode: EntryMode(
958958
16384,
959959
),
960-
id: Sha1(f203064a6a81df47498fb415a2064a8ec568ed67),
961-
location: "c",
960+
id: Sha1(f414de88468352d59c129d0e7686fb1e1f387929),
961+
location: "c/src/commands",
962962
relation: Some(
963-
Parent(
963+
ChildOfParent(
964964
1,
965965
),
966966
),

0 commit comments

Comments
 (0)