@@ -7,22 +7,23 @@ use gix_object::{
77 bstr:: { BStr , BString } ,
88 FindExt ,
99} ;
10+ use gix_traverse:: commit:: find;
11+ use smallvec:: SmallVec ;
1012use std:: num:: NonZeroU32 ;
1113use std:: ops:: Range ;
1214
1315/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file
14- /// at `traverse[0] :<file_path>` originated in.
16+ /// at `suspect :<file_path>` originated in.
1517///
1618/// ## Paramters
1719///
1820/// * `odb`
1921/// - Access to database objects, also for used for diffing.
2022/// - Should have an object cache for good diff performance.
21- /// * `traverse`
22- /// - The list of commits from the most recent to prior ones, following all parents sorted
23- /// by time.
24- /// - It's paramount that older commits are returned after newer ones.
25- /// - The first commit returned here is the first eligible commit to be responsible for parts of `file_path`.
23+ /// * `suspect`
24+ /// - The first commit to be responsible for parts of `file_path`.
25+ /// * `cache`
26+ /// - Optionally, the commitgraph cache.
2627/// * `file_path`
2728/// - A *slash-separated* worktree-relative path to the file to blame.
2829/// * `range`
@@ -60,20 +61,14 @@ use std::ops::Range;
6061// <---><----------><-------><-----><------->
6162// <---><---><-----><-------><-----><------->
6263// <---><---><-----><-------><-----><-><-><->
63- pub fn file < E > (
64+ pub fn file (
6465 odb : impl gix_object:: Find + gix_object:: FindHeader ,
65- traverse : impl IntoIterator < Item = Result < gix_traverse:: commit:: Info , E > > ,
66+ suspect : ObjectId ,
67+ cache : Option < gix_commitgraph:: Graph > ,
6668 resource_cache : & mut gix_diff:: blob:: Platform ,
6769 file_path : & BStr ,
6870 range : Option < Range < u32 > > ,
69- ) -> Result < Outcome , Error >
70- where
71- E : Into < Box < dyn std:: error:: Error + Send + Sync + ' static > > ,
72- {
73- let mut traverse = traverse. into_iter ( ) . peekable ( ) ;
74- let Some ( Ok ( suspect) ) = traverse. peek ( ) . map ( |res| res. as_ref ( ) . map ( |item| item. id ) ) else {
75- return Err ( Error :: EmptyTraversal ) ;
76- } ;
71+ ) -> Result < Outcome , Error > {
7772 let _span = gix_trace:: coarse!( "gix_blame::file()" , ?file_path, ?suspect) ;
7873
7974 let mut stats = Statistics :: default ( ) ;
@@ -103,25 +98,43 @@ where
10398 suspects: [ ( suspect, range_in_blamed_file) ] . into( ) ,
10499 } ] ;
105100
101+ let mut buf = Vec :: new ( ) ;
102+ let commit = find ( cache. as_ref ( ) , & odb, & suspect, & mut buf) ?;
103+
104+ let mut queue: gix_revwalk:: PriorityQueue < CommitTime , ObjectId > = gix_revwalk:: PriorityQueue :: new ( ) ;
105+
106+ let commit_time = commit_time ( commit) ;
107+ queue. insert ( commit_time, suspect) ;
108+
106109 let mut out = Vec :: new ( ) ;
107110 let mut diff_state = gix_diff:: tree:: State :: default ( ) ;
108111 let mut previous_entry: Option < ( ObjectId , ObjectId ) > = None ;
109- ' outer: while let Some ( item ) = traverse . next ( ) {
112+ ' outer: while let Some ( suspect ) = queue . pop_value ( ) {
110113 if hunks_to_blame. is_empty ( ) {
111114 break ;
112115 }
113- let commit = item. map_err ( |err| Error :: Traverse ( err. into ( ) ) ) ?;
114- let suspect = commit. id ;
116+
117+ let is_still_suspect = hunks_to_blame. iter ( ) . any ( |hunk| hunk. suspects . contains_key ( & suspect) ) ;
118+
119+ if !is_still_suspect {
120+ // There are no `UnblamedHunk`s associated with this `suspect`, so we can continue with
121+ // the next one.
122+ continue ' outer;
123+ }
124+
115125 stats. commits_traversed += 1 ;
116126
117- let parent_ids = commit. parent_ids ;
127+ let commit = find ( cache. as_ref ( ) , & odb, & suspect, & mut buf) ?;
128+
129+ let parent_ids: ParentIds = collect_parents ( commit, & odb, cache. as_ref ( ) ) ;
130+
118131 if parent_ids. is_empty ( ) {
119- if traverse . peek ( ) . is_none ( ) {
120- // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of
121- // the last `item` that was yielded by `traverse `, so it makes sense to assign the
122- // remaining lines to it, even though we don’t explicitly check whether that is true
123- // here. We could perhaps use diff-tree-to-tree to compare `suspect`
124- // against an empty tree to validate this assumption.
132+ if queue . is_empty ( ) {
133+ // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the
134+ // `id` of the last `item` that was yielded by `queue `, so it makes sense to assign
135+ // the remaining lines to it, even though we don’t explicitly check whether that is
136+ // true here. We could perhaps use diff-tree-to-tree to compare `suspect` against
137+ // an empty tree to validate this assumption.
125138 if unblamed_to_out_is_done ( & mut hunks_to_blame, & mut out, suspect) {
126139 break ' outer;
127140 }
@@ -143,7 +156,41 @@ where
143156 continue ;
144157 } ;
145158
146- for ( pid, parent_id) in parent_ids. iter ( ) . enumerate ( ) {
159+ // This block asserts that, for every `UnblamedHunk`, all lines in the *Blamed File* are
160+ // identical to the corresponding lines in the *Source File*.
161+ #[ cfg( debug_assertions) ]
162+ {
163+ let source_blob = odb. find_blob ( & entry_id, & mut buf) ?. data . to_vec ( ) ;
164+ let mut source_interner = gix_diff:: blob:: intern:: Interner :: new ( source_blob. len ( ) / 100 ) ;
165+ let source_lines_as_tokens: Vec < _ > = tokens_for_diffing ( & source_blob)
166+ . tokenize ( )
167+ . map ( |token| source_interner. intern ( token) )
168+ . collect ( ) ;
169+
170+ let mut blamed_interner = gix_diff:: blob:: intern:: Interner :: new ( blamed_file_blob. len ( ) / 100 ) ;
171+ let blamed_lines_as_tokens: Vec < _ > = tokens_for_diffing ( & blamed_file_blob)
172+ . tokenize ( )
173+ . map ( |token| blamed_interner. intern ( token) )
174+ . collect ( ) ;
175+
176+ for hunk in hunks_to_blame. iter ( ) {
177+ if let Some ( range_in_suspect) = hunk. suspects . get ( & suspect) {
178+ let range_in_blamed_file = hunk. range_in_blamed_file . clone ( ) ;
179+
180+ for ( blamed_line_number, source_line_number) in range_in_blamed_file. zip ( range_in_suspect. clone ( ) ) {
181+ let source_token = source_lines_as_tokens[ source_line_number as usize ] ;
182+ let blame_token = blamed_lines_as_tokens[ blamed_line_number as usize ] ;
183+
184+ let source_line = BString :: new ( source_interner[ source_token] . into ( ) ) ;
185+ let blamed_line = BString :: new ( blamed_interner[ blame_token] . into ( ) ) ;
186+
187+ assert_eq ! ( source_line, blamed_line) ;
188+ }
189+ }
190+ }
191+ }
192+
193+ for ( pid, ( parent_id, parent_commit_time) ) in parent_ids. iter ( ) . enumerate ( ) {
147194 if let Some ( parent_entry_id) =
148195 find_path_entry_in_commit ( & odb, parent_id, file_path, & mut buf, & mut buf2, & mut stats) ?
149196 {
@@ -153,17 +200,19 @@ where
153200 }
154201 if no_change_in_entry {
155202 pass_blame_from_to ( suspect, * parent_id, & mut hunks_to_blame) ;
203+ queue. insert ( * parent_commit_time, * parent_id) ;
156204 continue ' outer;
157205 }
158206 }
159207 }
160208
161209 let more_than_one_parent = parent_ids. len ( ) > 1 ;
162- for parent_id in parent_ids {
210+ for ( parent_id, parent_commit_time) in parent_ids {
211+ queue. insert ( parent_commit_time, parent_id) ;
163212 let changes_for_file_path = tree_diff_at_file_path (
164213 & odb,
165214 file_path,
166- commit . id ,
215+ suspect ,
167216 parent_id,
168217 & mut stats,
169218 & mut diff_state,
@@ -588,8 +637,82 @@ fn find_path_entry_in_commit(
588637 Ok ( res. map ( |e| e. oid ) )
589638}
590639
591- /// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them
592- /// so the later access shows the right thing.
640+ type CommitTime = i64 ;
641+
642+ fn commit_time ( commit : gix_traverse:: commit:: Either < ' _ , ' _ > ) -> CommitTime {
643+ match commit {
644+ gix_traverse:: commit:: Either :: CommitRefIter ( commit_ref_iter) => {
645+ let mut commit_time = 0 ;
646+ for token in commit_ref_iter {
647+ use gix_object:: commit:: ref_iter:: Token as T ;
648+ match token {
649+ Ok ( T :: Tree { .. } ) => continue ,
650+ Ok ( T :: Parent { .. } ) => continue ,
651+ Ok ( T :: Author { .. } ) => continue ,
652+ Ok ( T :: Committer { signature } ) => {
653+ commit_time = signature. time . seconds ;
654+ break ;
655+ }
656+ Ok ( _unused_token) => break ,
657+ Err ( _err) => todo ! ( ) ,
658+ }
659+ }
660+ commit_time
661+ }
662+ gix_traverse:: commit:: Either :: CachedCommit ( commit) => commit. committer_timestamp ( ) as i64 ,
663+ }
664+ }
665+
666+ type ParentIds = SmallVec < [ ( gix_hash:: ObjectId , i64 ) ; 2 ] > ;
667+
668+ fn collect_parents (
669+ commit : gix_traverse:: commit:: Either < ' _ , ' _ > ,
670+ odb : & impl gix_object:: Find ,
671+ cache : Option < & gix_commitgraph:: Graph > ,
672+ ) -> ParentIds {
673+ let mut parent_ids: ParentIds = Default :: default ( ) ;
674+
675+ match commit {
676+ gix_traverse:: commit:: Either :: CachedCommit ( commit) => {
677+ let cache = cache
678+ . as_ref ( )
679+ . expect ( "find returned a cached commit, so we expect cache to be present" ) ;
680+ for parent_id in commit. iter_parents ( ) {
681+ match parent_id {
682+ Ok ( pos) => {
683+ let parent = cache. commit_at ( pos) ;
684+
685+ parent_ids. push ( ( parent. id ( ) . to_owned ( ) , parent. committer_timestamp ( ) as i64 ) ) ;
686+ }
687+ Err ( _) => todo ! ( ) ,
688+ }
689+ }
690+ }
691+ gix_traverse:: commit:: Either :: CommitRefIter ( commit_ref_iter) => {
692+ for token in commit_ref_iter {
693+ match token {
694+ Ok ( gix_object:: commit:: ref_iter:: Token :: Tree { .. } ) => continue ,
695+ Ok ( gix_object:: commit:: ref_iter:: Token :: Parent { id } ) => {
696+ let mut buf = Vec :: new ( ) ;
697+ let parent = odb. find_commit_iter ( id. as_ref ( ) , & mut buf) . ok ( ) ;
698+ let parent_commit_time = parent
699+ . and_then ( |parent| parent. committer ( ) . ok ( ) . map ( |committer| committer. time . seconds ) )
700+ . unwrap_or_default ( ) ;
701+
702+ parent_ids. push ( ( id, parent_commit_time) ) ;
703+ }
704+ Ok ( _unused_token) => break ,
705+ Err ( _err) => todo ! ( ) ,
706+ }
707+ }
708+ }
709+ } ;
710+
711+ parent_ids
712+ }
713+
714+ /// Return an iterator over tokens for use in diffing. These are usually lines, but it's important
715+ /// to unify them so the later access shows the right thing.
593716pub ( crate ) fn tokens_for_diffing ( data : & [ u8 ] ) -> impl TokenSource < Token = & [ u8 ] > {
594717 gix_diff:: blob:: sources:: byte_lines_with_terminator ( data)
595718}
0 commit comments