Skip to content

Commit 6e1ea6d

Browse files
committed
Correctly process overlapping unblamed hunks
Previously, `process_changes` assumed that `UnblamedHunk`s would never overlap. This constraint has been lifted. As a consequence, the high-level assumption that two different lines from a *Blamed File* can never map to the same line in a *Source File* has been lifted as well. There is not really a way to enforce this constraint in the blame algorithm alone as it heavily depends on the algorithm used for diffing.
1 parent d46766a commit 6e1ea6d

File tree

6 files changed

+71
-50
lines changed

6 files changed

+71
-50
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-blame/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,4 @@ gix-fs = { path = "../gix-fs" }
3131
gix-index = { path = "../gix-index" }
3232
gix-odb = { path = "../gix-odb" }
3333
gix-testtools = { path = "../tests/tools" }
34+
pretty_assertions = "1.4.0"

gix-blame/src/file/function.rs

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -303,33 +303,6 @@ pub fn file(
303303
unblamed_hunk.remove_blame(suspect);
304304
true
305305
});
306-
307-
// This block asserts that line ranges for each suspect never overlap. If they did overlap
308-
// this would mean that the same line in a *Source File* would map to more than one line in
309-
// the *Blamed File* and this is not possible.
310-
#[cfg(debug_assertions)]
311-
{
312-
let ranges = hunks_to_blame.iter().fold(
313-
std::collections::BTreeMap::<ObjectId, Vec<Range<u32>>>::new(),
314-
|mut acc, hunk| {
315-
for (suspect, range) in hunk.suspects.clone() {
316-
acc.entry(suspect).or_default().push(range);
317-
}
318-
319-
acc
320-
},
321-
);
322-
323-
for (_, mut ranges) in ranges {
324-
ranges.sort_by(|a, b| a.start.cmp(&b.start));
325-
326-
for window in ranges.windows(2) {
327-
if let [a, b] = window {
328-
assert!(a.end <= b.start, "#{hunks_to_blame:#?}");
329-
}
330-
}
331-
}
332-
}
333306
}
334307

335308
debug_assert_eq!(

gix-blame/src/file/mod.rs

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -320,36 +320,43 @@ fn process_change(
320320

321321
/// Consume `hunks_to_blame` and `changes` to pair up matches ranges (also overlapping) with each other.
322322
/// Once a match is found, it's pushed onto `out`.
323+
///
324+
/// `process_changes` assumes that ranges coming from the same *Source File* can and do
325+
/// occasionally overlap. If it were a desirable property of the blame algorithm as a whole to
326+
/// never have two different lines from a *Blamed File* mapped to the same line in a *Source File*,
327+
/// this property would need to be enforced at a higher level than `process_changes` if which case
328+
/// the nested loops could potentially be flattened into one.
323329
fn process_changes(
324330
hunks_to_blame: Vec<UnblamedHunk>,
325331
changes: Vec<Change>,
326332
suspect: ObjectId,
327333
parent: ObjectId,
328334
) -> Vec<UnblamedHunk> {
329-
let mut hunks_iter = hunks_to_blame.into_iter();
330-
let mut changes_iter = changes.into_iter();
335+
let mut new_hunks_to_blame = Vec::new();
331336

332-
let mut hunk = hunks_iter.next();
333-
let mut change = changes_iter.next();
337+
for hunk in hunks_to_blame {
338+
let mut hunk = Some(hunk);
334339

335-
let mut new_hunks_to_blame = Vec::new();
336-
let mut offset_in_destination = Offset::Added(0);
337-
338-
loop {
339-
(hunk, change) = process_change(
340-
&mut new_hunks_to_blame,
341-
&mut offset_in_destination,
342-
suspect,
343-
parent,
344-
hunk,
345-
change,
346-
);
347-
348-
hunk = hunk.or_else(|| hunks_iter.next());
349-
change = change.or_else(|| changes_iter.next());
350-
351-
if hunk.is_none() && change.is_none() {
352-
break;
340+
let mut offset_in_destination = Offset::Added(0);
341+
342+
let mut changes_iter = changes.iter();
343+
let mut change: Option<Change> = changes_iter.next().map(|change| change.clone());
344+
345+
loop {
346+
(hunk, change) = process_change(
347+
&mut new_hunks_to_blame,
348+
&mut offset_in_destination,
349+
suspect,
350+
parent,
351+
hunk,
352+
change,
353+
);
354+
355+
change = change.or_else(|| changes_iter.next().map(|change| change.clone()));
356+
357+
if hunk.is_none() {
358+
break;
359+
}
353360
}
354361
}
355362
new_hunks_to_blame

gix-blame/src/file/tests.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,8 @@ mod process_change {
959959
}
960960

961961
mod process_changes {
962+
use pretty_assertions::assert_eq;
963+
962964
use crate::file::{
963965
process_changes,
964966
tests::{new_unblamed_hunk, one_sha, zero_sha},
@@ -1337,4 +1339,38 @@ mod process_changes {
13371339
]
13381340
);
13391341
}
1342+
1343+
#[test]
1344+
fn subsequent_hunks_overlapping_end_of_addition() {
1345+
let suspect = zero_sha();
1346+
let parent = one_sha();
1347+
let hunks_to_blame = vec![
1348+
new_unblamed_hunk(13..16, suspect, Offset::Added(0)),
1349+
new_unblamed_hunk(10..17, suspect, Offset::Added(0)),
1350+
];
1351+
let changes = vec![Change::AddedOrReplaced(10..14, 0)];
1352+
let new_hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent);
1353+
1354+
assert_eq!(
1355+
new_hunks_to_blame,
1356+
[
1357+
UnblamedHunk {
1358+
range_in_blamed_file: 13..14,
1359+
suspects: [(suspect, 13..14)].into()
1360+
},
1361+
UnblamedHunk {
1362+
range_in_blamed_file: 14..16,
1363+
suspects: [(parent, 10..12)].into(),
1364+
},
1365+
UnblamedHunk {
1366+
range_in_blamed_file: 10..14,
1367+
suspects: [(suspect, 10..14)].into(),
1368+
},
1369+
UnblamedHunk {
1370+
range_in_blamed_file: 14..17,
1371+
suspects: [(parent, 10..13)].into(),
1372+
},
1373+
]
1374+
);
1375+
}
13401376
}

gix-blame/src/types.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,10 @@ pub(crate) enum Either<T, U> {
353353
}
354354

355355
/// A single change between two blobs, or an unchanged region.
356-
#[derive(Debug, PartialEq)]
356+
///
357+
/// Line numbers refer to the file that is referred to as `after` or `NewOrDestination`, depending
358+
/// on the context.
359+
#[derive(Clone, Debug, PartialEq)]
357360
pub enum Change {
358361
/// A range of tokens that wasn't changed.
359362
Unchanged(Range<u32>),

0 commit comments

Comments
 (0)