Skip to content

Commit a590b99

Browse files
committed
make it easy to display blamed lines
Currently it's not quite clear where to get them from.
1 parent 35f94d4 commit a590b99

File tree

2 files changed

+64
-58
lines changed

2 files changed

+64
-58
lines changed

crate-status.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,9 @@ Check out the [performance discussion][gix-diff-performance] as well.
365365

366366
* [ ] commit-annotations for a single file
367367
- [ ] progress
368-
- [ ] interruptability
368+
- [ ] interruptibility
369369
- [ ] streaming
370+
- [ ] support for worktree changes (creates virtual commit on top of `HEAD`)
370371
* [x] API documentation
371372
* [ ] Examples
372373

gix-blame/src/lib.rs

Lines changed: 62 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,39 @@
11
//! A crate to implement an algorithm to annotate lines in tracked files with the commits that changed them.
2+
//!
3+
//! ### Terminology
4+
//!
5+
//! * **Original File**
6+
//! - The file as it exists in `HEAD`.
7+
//! - the initial state with all lines that we need to associate with a *Blamed File*.
8+
//! * **Blamed File**
9+
//! - A file at a version (i.e. commit) that introduces hunks into the final 'image'.
10+
//! * **Suspects**
11+
//! - The versions of the files that can contain hunks that we could use in the final 'image'
12+
//! - multiple at the same time as the commit-graph may split up.
13+
//! - turns into *Blamed File* once we have found an association into the *Original File*.
14+
//! - every [`UnblamedHunk`] can have multiple suspects of which we find the best match.
215
#![deny(rust_2018_idioms)]
316
#![forbid(unsafe_code)]
417

518
use std::{
619
collections::BTreeMap,
7-
ops::{Add, AddAssign, Range, SubAssign},
20+
ops::{AddAssign, Range, SubAssign},
821
path::PathBuf,
922
};
1023

1124
use gix_hash::ObjectId;
1225
use gix_object::bstr::BStr;
1326
use gix_object::FindExt;
1427

28+
/// Describes the offset of a particular hunk relative to the *Original File*.
1529
#[derive(Clone, Copy, Debug, PartialEq)]
1630
pub enum Offset {
31+
/// The amount of lines to add.
1732
Added(u32),
33+
/// The amount of lines to remove.
1834
Deleted(u32),
1935
}
2036

21-
impl Add<u32> for Offset {
22-
type Output = Offset;
23-
24-
fn add(self, rhs: u32) -> Self::Output {
25-
let Self::Added(added) = self else { todo!() };
26-
27-
Self::Added(added + rhs)
28-
}
29-
}
30-
31-
impl Add<Offset> for Offset {
32-
type Output = Offset;
33-
34-
fn add(self, rhs: Offset) -> Self::Output {
35-
match (self, rhs) {
36-
(Self::Added(added), Offset::Added(added_rhs)) => Self::Added(added + added_rhs),
37-
(Self::Added(added), Offset::Deleted(deleted_rhs)) => {
38-
if deleted_rhs > added {
39-
Self::Deleted(deleted_rhs - added)
40-
} else {
41-
Self::Added(added - deleted_rhs)
42-
}
43-
}
44-
(Self::Deleted(deleted), Offset::Added(added_rhs)) => {
45-
if added_rhs > deleted {
46-
Self::Added(added_rhs - deleted)
47-
} else {
48-
Self::Deleted(deleted - added_rhs)
49-
}
50-
}
51-
(Self::Deleted(deleted), Offset::Deleted(deleted_rhs)) => Self::Deleted(deleted + deleted_rhs),
52-
}
53-
}
54-
}
55-
5637
impl AddAssign<u32> for Offset {
5738
fn add_assign(&mut self, rhs: u32) {
5839
match self {
@@ -83,23 +64,33 @@ impl SubAssign<u32> for Offset {
8364
}
8465
}
8566

67+
/// A mapping of a section of the *Original File* to the section in a *Blamed File* that introduced it.
68+
///
69+
/// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally,
70+
/// they have the same content, which is the reason they are in what is returned by [`blame_file()`].
71+
// TODO: see if this can be encoded as `start_in_original_file` and `start_in_blamed_file` and a single `len`.
8672
#[derive(Debug, PartialEq)]
8773
pub struct BlameEntry {
74+
/// The section of tokens in the tokenized version of the *Blamed File* (typically lines).
8875
pub range_in_blamed_file: Range<u32>,
76+
/// The section of tokens in the tokenized version of the *Original File* (typically lines).
8977
pub range_in_original_file: Range<u32>,
78+
/// The commit that introduced the section into the *Blamed File*.
9079
pub commit_id: ObjectId,
9180
}
9281

9382
impl BlameEntry {
83+
/// Create a new instance.
9484
pub fn new(range_in_blamed_file: Range<u32>, range_in_original_file: Range<u32>, commit_id: ObjectId) -> Self {
95-
assert!(
85+
debug_assert!(
9686
range_in_blamed_file.end > range_in_blamed_file.start,
9787
"{range_in_blamed_file:?}"
9888
);
99-
assert!(
89+
debug_assert!(
10090
range_in_original_file.end > range_in_original_file.start,
10191
"{range_in_original_file:?}"
10292
);
93+
debug_assert_eq!(range_in_original_file.len(), range_in_blamed_file.len());
10394

10495
Self {
10596
range_in_blamed_file: range_in_blamed_file.clone(),
@@ -108,8 +99,9 @@ impl BlameEntry {
10899
}
109100
}
110101

102+
/// Create a new instance by creating `range_in_blamed_file` after applying `offset` to `range_in_original_file`.
111103
fn with_offset(range_in_original_file: Range<u32>, commit_id: ObjectId, offset: Offset) -> Self {
112-
assert!(
104+
debug_assert!(
113105
range_in_original_file.end > range_in_original_file.start,
114106
"{range_in_original_file:?}"
115107
);
@@ -121,7 +113,7 @@ impl BlameEntry {
121113
commit_id,
122114
},
123115
Offset::Deleted(deleted) => {
124-
assert!(
116+
debug_assert!(
125117
range_in_original_file.start >= deleted,
126118
"{range_in_original_file:?} {offset:?}"
127119
);
@@ -136,8 +128,9 @@ impl BlameEntry {
136128
}
137129
}
138130

131+
///
139132
fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Self {
140-
let range_in_original_file = unblamed_hunk.suspects.get(&commit_id).expect("TODO");
133+
let range_in_original_file = unblamed_hunk.suspects.get(&commit_id).unwrap();
141134

142135
Self {
143136
range_in_blamed_file: unblamed_hunk.range_in_blamed_file.clone(),
@@ -170,6 +163,7 @@ impl LineRange for Range<u32> {
170163
}
171164
}
172165

166+
/// A hunk in the *Original File* which
173167
#[derive(Clone, Debug, PartialEq)]
174168
pub struct UnblamedHunk {
175169
pub range_in_blamed_file: Range<u32>,
@@ -761,6 +755,33 @@ fn coalesce_blame_entries(lines_blamed: Vec<BlameEntry>) -> Vec<BlameEntry> {
761755
}
762756

763757
// TODO: do not instantiate anything, get everything passed as argument.
758+
/// ## The algorithm
759+
///
760+
/// *For brevity, `HEAD` denotes the starting point of the blame operation. It could be any commit, or even commits that
761+
/// represent the worktree state.
762+
/// We begin with a single [`UnblamedHunk`] and a single suspect, usually `HEAD` as the commit containing the *Original File*.
763+
/// We traverse the commit graph starting at `HEAD`, and see if there have been changes to `worktree_path`. If so, we have found
764+
/// a *Blamed File* and a *Suspect* commit, and have hunks that represent these changes. Now the [`UnblamedHunk`]s is split at
765+
/// the boundaries of each matching hunk, creating a new [`UnblamedHunk`] on each side, along with a [`BlameEntry`] to represent
766+
/// the match.
767+
/// This is repeated until there are no non-empty [`UnblamedHunk`]s left.
768+
///
769+
/// At a high level, what we want to do is the following:
770+
///
771+
/// - get the commit that belongs to a commit id
772+
/// - walk through parents
773+
/// - for each parent, do a diff and mark lines that don’t have a suspect (this is the term
774+
/// used in `libgit2`) yet, but that have been changed in this commit
775+
///
776+
/// The algorithm in `libgit2` works by going through parents and keeping a linked list of blame
777+
/// suspects. It can be visualized as follows:
778+
//
779+
// <---------------------------------------->
780+
// <---------------><----------------------->
781+
// <---><----------><----------------------->
782+
// <---><----------><-------><-----><------->
783+
// <---><---><-----><-------><-----><------->
784+
// <---><---><-----><-------><-----><-><-><->
764785
pub fn blame_file<E>(
765786
odb: impl gix_object::Find + gix_object::FindHeader,
766787
traverse: impl IntoIterator<Item = Result<gix_traverse::commit::Info, E>>,
@@ -770,22 +791,6 @@ pub fn blame_file<E>(
770791
file_path: &BStr,
771792
) -> Result<Vec<BlameEntry>, E> {
772793
// TODO
773-
// At a high level, what we want to do is the following:
774-
//
775-
// - get the commit that belongs to a commit id
776-
// - walk through parents
777-
// - for each parent, do a diff and mark lines that don’t have a suspect (this is the term
778-
// used in `libgit2`) yet, but that have been changed in this commit
779-
//
780-
// The algorithm in `libgit2` works by going through parents and keeping a linked list of blame
781-
// suspects. It can be visualized as follows:
782-
//
783-
// <---------------------------------------->
784-
// <---------------><----------------------->
785-
// <---><----------><----------------------->
786-
// <---><----------><-------><-----><------->
787-
// <---><---><-----><-------><-----><------->
788-
// <---><---><-----><-------><-----><-><-><->
789794

790795
// Needed for `to_str`.
791796
use gix_object::bstr::ByteSlice;

0 commit comments

Comments
 (0)