11//! A crate to implement an algorithm to annotate lines in tracked files with the commits that changed them.
2+ //!
3+ //! ### Terminology
4+ //!
5+ //! * **Original File**
6+ //! - The file as it exists in `HEAD`.
7+ //! - the initial state with all lines that we need to associate with a *Blamed File*.
8+ //! * **Blamed File**
9+ //! - A file at a version (i.e. commit) that introduces hunks into the final 'image'.
10+ //! * **Suspects**
11+ //! - The versions of the files that can contain hunks that we could use in the final 'image'
12+ //! - multiple at the same time as the commit-graph may split up.
13+ //! - turns into *Blamed File* once we have found an association into the *Original File*.
14+ //! - every [`UnblamedHunk`] can have multiple suspects of which we find the best match.
215#![ deny( rust_2018_idioms) ]
316#![ forbid( unsafe_code) ]
417
518use std:: {
619 collections:: BTreeMap ,
7- ops:: { Add , AddAssign , Range , SubAssign } ,
20+ ops:: { AddAssign , Range , SubAssign } ,
821 path:: PathBuf ,
922} ;
1023
1124use gix_hash:: ObjectId ;
1225use gix_object:: bstr:: BStr ;
1326use gix_object:: FindExt ;
1427
28+ /// Describes the offset of a particular hunk relative to the *Original File*.
1529#[ derive( Clone , Copy , Debug , PartialEq ) ]
1630pub enum Offset {
31+ /// The amount of lines to add.
1732 Added ( u32 ) ,
33+ /// The amount of lines to remove.
1834 Deleted ( u32 ) ,
1935}
2036
21- impl Add < u32 > for Offset {
22- type Output = Offset ;
23-
24- fn add ( self , rhs : u32 ) -> Self :: Output {
25- let Self :: Added ( added) = self else { todo ! ( ) } ;
26-
27- Self :: Added ( added + rhs)
28- }
29- }
30-
31- impl Add < Offset > for Offset {
32- type Output = Offset ;
33-
34- fn add ( self , rhs : Offset ) -> Self :: Output {
35- match ( self , rhs) {
36- ( Self :: Added ( added) , Offset :: Added ( added_rhs) ) => Self :: Added ( added + added_rhs) ,
37- ( Self :: Added ( added) , Offset :: Deleted ( deleted_rhs) ) => {
38- if deleted_rhs > added {
39- Self :: Deleted ( deleted_rhs - added)
40- } else {
41- Self :: Added ( added - deleted_rhs)
42- }
43- }
44- ( Self :: Deleted ( deleted) , Offset :: Added ( added_rhs) ) => {
45- if added_rhs > deleted {
46- Self :: Added ( added_rhs - deleted)
47- } else {
48- Self :: Deleted ( deleted - added_rhs)
49- }
50- }
51- ( Self :: Deleted ( deleted) , Offset :: Deleted ( deleted_rhs) ) => Self :: Deleted ( deleted + deleted_rhs) ,
52- }
53- }
54- }
55-
5637impl AddAssign < u32 > for Offset {
5738 fn add_assign ( & mut self , rhs : u32 ) {
5839 match self {
@@ -83,23 +64,33 @@ impl SubAssign<u32> for Offset {
8364 }
8465}
8566
67+ /// A mapping of a section of the *Original File* to the section in a *Blamed File* that introduced it.
68+ ///
69+ /// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally,
70+ /// they have the same content, which is the reason they are in what is returned by [`blame_file()`].
71+ // TODO: see if this can be encoded as `start_in_original_file` and `start_in_blamed_file` and a single `len`.
8672#[ derive( Debug , PartialEq ) ]
8773pub struct BlameEntry {
74+ /// The section of tokens in the tokenized version of the *Blamed File* (typically lines).
8875 pub range_in_blamed_file : Range < u32 > ,
76+ /// The section of tokens in the tokenized version of the *Original File* (typically lines).
8977 pub range_in_original_file : Range < u32 > ,
78+ /// The commit that introduced the section into the *Blamed File*.
9079 pub commit_id : ObjectId ,
9180}
9281
9382impl BlameEntry {
83+ /// Create a new instance.
9484 pub fn new ( range_in_blamed_file : Range < u32 > , range_in_original_file : Range < u32 > , commit_id : ObjectId ) -> Self {
95- assert ! (
85+ debug_assert ! (
9686 range_in_blamed_file. end > range_in_blamed_file. start,
9787 "{range_in_blamed_file:?}"
9888 ) ;
99- assert ! (
89+ debug_assert ! (
10090 range_in_original_file. end > range_in_original_file. start,
10191 "{range_in_original_file:?}"
10292 ) ;
93+ debug_assert_eq ! ( range_in_original_file. len( ) , range_in_blamed_file. len( ) ) ;
10394
10495 Self {
10596 range_in_blamed_file : range_in_blamed_file. clone ( ) ,
@@ -108,8 +99,9 @@ impl BlameEntry {
10899 }
109100 }
110101
102+ /// Create a new instance by creating `range_in_blamed_file` after applying `offset` to `range_in_original_file`.
111103 fn with_offset ( range_in_original_file : Range < u32 > , commit_id : ObjectId , offset : Offset ) -> Self {
112- assert ! (
104+ debug_assert ! (
113105 range_in_original_file. end > range_in_original_file. start,
114106 "{range_in_original_file:?}"
115107 ) ;
@@ -121,7 +113,7 @@ impl BlameEntry {
121113 commit_id,
122114 } ,
123115 Offset :: Deleted ( deleted) => {
124- assert ! (
116+ debug_assert ! (
125117 range_in_original_file. start >= deleted,
126118 "{range_in_original_file:?} {offset:?}"
127119 ) ;
@@ -136,8 +128,9 @@ impl BlameEntry {
136128 }
137129 }
138130
131+ ///
139132 fn from_unblamed_hunk ( unblamed_hunk : & UnblamedHunk , commit_id : ObjectId ) -> Self {
140- let range_in_original_file = unblamed_hunk. suspects . get ( & commit_id) . expect ( "TODO" ) ;
133+ let range_in_original_file = unblamed_hunk. suspects . get ( & commit_id) . unwrap ( ) ;
141134
142135 Self {
143136 range_in_blamed_file : unblamed_hunk. range_in_blamed_file . clone ( ) ,
@@ -170,6 +163,7 @@ impl LineRange for Range<u32> {
170163 }
171164}
172165
166+ /// A hunk in the *Original File* which
173167#[ derive( Clone , Debug , PartialEq ) ]
174168pub struct UnblamedHunk {
175169 pub range_in_blamed_file : Range < u32 > ,
@@ -761,6 +755,33 @@ fn coalesce_blame_entries(lines_blamed: Vec<BlameEntry>) -> Vec<BlameEntry> {
761755}
762756
763757// TODO: do not instantiate anything, get everything passed as argument.
758+ /// ## The algorithm
759+ ///
760+ /// *For brevity, `HEAD` denotes the starting point of the blame operation. It could be any commit, or even commits that
761+ /// represent the worktree state.
762+ /// We begin with a single [`UnblamedHunk`] and a single suspect, usually `HEAD` as the commit containing the *Original File*.
763+ /// We traverse the commit graph starting at `HEAD`, and see if there have been changes to `worktree_path`. If so, we have found
764+ /// a *Blamed File* and a *Suspect* commit, and have hunks that represent these changes. Now the [`UnblamedHunk`]s is split at
765+ /// the boundaries of each matching hunk, creating a new [`UnblamedHunk`] on each side, along with a [`BlameEntry`] to represent
766+ /// the match.
767+ /// This is repeated until there are no non-empty [`UnblamedHunk`]s left.
768+ ///
769+ /// At a high level, what we want to do is the following:
770+ ///
771+ /// - get the commit that belongs to a commit id
772+ /// - walk through parents
773+ /// - for each parent, do a diff and mark lines that don’t have a suspect (this is the term
774+ /// used in `libgit2`) yet, but that have been changed in this commit
775+ ///
776+ /// The algorithm in `libgit2` works by going through parents and keeping a linked list of blame
777+ /// suspects. It can be visualized as follows:
778+ //
779+ // <---------------------------------------->
780+ // <---------------><----------------------->
781+ // <---><----------><----------------------->
782+ // <---><----------><-------><-----><------->
783+ // <---><---><-----><-------><-----><------->
784+ // <---><---><-----><-------><-----><-><-><->
764785pub fn blame_file < E > (
765786 odb : impl gix_object:: Find + gix_object:: FindHeader ,
766787 traverse : impl IntoIterator < Item = Result < gix_traverse:: commit:: Info , E > > ,
@@ -770,22 +791,6 @@ pub fn blame_file<E>(
770791 file_path : & BStr ,
771792) -> Result < Vec < BlameEntry > , E > {
772793 // TODO
773- // At a high level, what we want to do is the following:
774- //
775- // - get the commit that belongs to a commit id
776- // - walk through parents
777- // - for each parent, do a diff and mark lines that don’t have a suspect (this is the term
778- // used in `libgit2`) yet, but that have been changed in this commit
779- //
780- // The algorithm in `libgit2` works by going through parents and keeping a linked list of blame
781- // suspects. It can be visualized as follows:
782- //
783- // <---------------------------------------->
784- // <---------------><----------------------->
785- // <---><----------><----------------------->
786- // <---><----------><-------><-----><------->
787- // <---><---><-----><-------><-----><------->
788- // <---><---><-----><-------><-----><-><-><->
789794
790795 // Needed for `to_str`.
791796 use gix_object:: bstr:: ByteSlice ;
0 commit comments