@@ -7,22 +7,23 @@ use gix_object::{
77 bstr:: { BStr , BString } ,
88 FindExt ,
99} ;
10+ use gix_traverse:: commit:: find as find_commit;
11+ use smallvec:: SmallVec ;
1012use std:: num:: NonZeroU32 ;
1113use std:: ops:: Range ;
1214
1315/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file
14- /// at `traverse[0] :<file_path>` originated in.
16+ /// at `suspect :<file_path>` originated in.
1517///
1618/// ## Paramters
1719///
1820/// * `odb`
1921/// - Access to database objects, also for used for diffing.
2022/// - Should have an object cache for good diff performance.
21- /// * `traverse`
22- /// - The list of commits from the most recent to prior ones, following all parents sorted
23- /// by time.
24- /// - It's paramount that older commits are returned after newer ones.
25- /// - The first commit returned here is the first eligible commit to be responsible for parts of `file_path`.
23+ /// * `suspect`
24+ /// - The first commit to be responsible for parts of `file_path`.
25+ /// * `cache`
26+ /// - Optionally, the commitgraph cache.
2627/// * `file_path`
2728/// - A *slash-separated* worktree-relative path to the file to blame.
2829/// * `range`
@@ -60,20 +61,14 @@ use std::ops::Range;
6061// <---><----------><-------><-----><------->
6162// <---><---><-----><-------><-----><------->
6263// <---><---><-----><-------><-----><-><-><->
63- pub fn file < E > (
64+ pub fn file (
6465 odb : impl gix_object:: Find + gix_object:: FindHeader ,
65- traverse : impl IntoIterator < Item = Result < gix_traverse:: commit:: Info , E > > ,
66+ suspect : ObjectId ,
67+ cache : Option < gix_commitgraph:: Graph > ,
6668 resource_cache : & mut gix_diff:: blob:: Platform ,
6769 file_path : & BStr ,
6870 range : Option < Range < u32 > > ,
69- ) -> Result < Outcome , Error >
70- where
71- E : Into < Box < dyn std:: error:: Error + Send + Sync + ' static > > ,
72- {
73- let mut traverse = traverse. into_iter ( ) . peekable ( ) ;
74- let Some ( Ok ( suspect) ) = traverse. peek ( ) . map ( |res| res. as_ref ( ) . map ( |item| item. id ) ) else {
75- return Err ( Error :: EmptyTraversal ) ;
76- } ;
71+ ) -> Result < Outcome , Error > {
7772 let _span = gix_trace:: coarse!( "gix_blame::file()" , ?file_path, ?suspect) ;
7873
7974 let mut stats = Statistics :: default ( ) ;
8479 commit_id : suspect,
8580 } ) ?;
8681 let blamed_file_blob = odb. find_blob ( & blamed_file_entry_id, & mut buf) ?. data . to_vec ( ) ;
87- let num_lines_in_blamed = {
88- let mut interner = gix_diff:: blob:: intern:: Interner :: new ( blamed_file_blob. len ( ) / 100 ) ;
89- tokens_for_diffing ( & blamed_file_blob)
90- . tokenize ( )
91- . map ( |token| interner. intern ( token) )
92- . count ( )
93- } as u32 ;
82+ let num_lines_in_blamed = tokens_for_diffing ( & blamed_file_blob) . tokenize ( ) . count ( ) as u32 ;
9483
9584 // Binary or otherwise empty?
9685 if num_lines_in_blamed == 0 {
@@ -103,30 +92,40 @@ where
10392 suspects: [ ( suspect, range_in_blamed_file) ] . into( ) ,
10493 } ] ;
10594
95+ let ( mut buf, mut buf2) = ( Vec :: new ( ) , Vec :: new ( ) ) ;
96+ let commit = find_commit ( cache. as_ref ( ) , & odb, & suspect, & mut buf) ?;
97+ let mut queue: gix_revwalk:: PriorityQueue < CommitTime , ObjectId > = gix_revwalk:: PriorityQueue :: new ( ) ;
98+ queue. insert ( commit_time ( commit) ?, suspect) ;
99+
106100 let mut out = Vec :: new ( ) ;
107101 let mut diff_state = gix_diff:: tree:: State :: default ( ) ;
108102 let mut previous_entry: Option < ( ObjectId , ObjectId ) > = None ;
109- ' outer: while let Some ( item) = traverse. next ( ) {
103+ ' outer: while let Some ( suspect) = queue. pop_value ( ) {
104+ stats. commits_traversed += 1 ;
110105 if hunks_to_blame. is_empty ( ) {
111106 break ;
112107 }
113- let commit = item. map_err ( |err| Error :: Traverse ( err. into ( ) ) ) ?;
114- let suspect = commit. id ;
115- stats. commits_traversed += 1 ;
116108
117- let parent_ids = commit. parent_ids ;
109+ let is_still_suspect = hunks_to_blame. iter ( ) . any ( |hunk| hunk. suspects . contains_key ( & suspect) ) ;
110+ if !is_still_suspect {
111+ // There are no `UnblamedHunk`s associated with this `suspect`, so we can continue with
112+ // the next one.
113+ continue ' outer;
114+ }
115+
116+ let commit = find_commit ( cache. as_ref ( ) , & odb, & suspect, & mut buf) ?;
117+ let parent_ids: ParentIds = collect_parents ( commit, & odb, cache. as_ref ( ) , & mut buf2) ?;
118118 if parent_ids. is_empty ( ) {
119- if traverse . peek ( ) . is_none ( ) {
120- // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of
121- // the last `item` that was yielded by `traverse `, so it makes sense to assign the
122- // remaining lines to it, even though we don’t explicitly check whether that is true
123- // here. We could perhaps use diff-tree-to-tree to compare `suspect`
124- // against an empty tree to validate this assumption.
119+ if queue . is_empty ( ) {
120+ // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the
121+ // `id` of the last `item` that was yielded by `queue `, so it makes sense to assign
122+ // the remaining lines to it, even though we don’t explicitly check whether that is
123+ // true here. We could perhaps use diff-tree-to-tree to compare `suspect` against
124+ // an empty tree to validate this assumption.
125125 if unblamed_to_out_is_done ( & mut hunks_to_blame, & mut out, suspect) {
126126 break ' outer;
127127 }
128128 }
129-
130129 // There is more, keep looking.
131130 continue ;
132131 }
@@ -143,7 +142,41 @@ where
143142 continue ;
144143 } ;
145144
146- for ( pid, parent_id) in parent_ids. iter ( ) . enumerate ( ) {
145+ // This block asserts that, for every `UnblamedHunk`, all lines in the *Blamed File* are
146+ // identical to the corresponding lines in the *Source File*.
147+ #[ cfg( debug_assertions) ]
148+ {
149+ let source_blob = odb. find_blob ( & entry_id, & mut buf) ?. data . to_vec ( ) ;
150+ let mut source_interner = gix_diff:: blob:: intern:: Interner :: new ( source_blob. len ( ) / 100 ) ;
151+ let source_lines_as_tokens: Vec < _ > = tokens_for_diffing ( & source_blob)
152+ . tokenize ( )
153+ . map ( |token| source_interner. intern ( token) )
154+ . collect ( ) ;
155+
156+ let mut blamed_interner = gix_diff:: blob:: intern:: Interner :: new ( blamed_file_blob. len ( ) / 100 ) ;
157+ let blamed_lines_as_tokens: Vec < _ > = tokens_for_diffing ( & blamed_file_blob)
158+ . tokenize ( )
159+ . map ( |token| blamed_interner. intern ( token) )
160+ . collect ( ) ;
161+
162+ for hunk in hunks_to_blame. iter ( ) {
163+ if let Some ( range_in_suspect) = hunk. suspects . get ( & suspect) {
164+ let range_in_blamed_file = hunk. range_in_blamed_file . clone ( ) ;
165+
166+ for ( blamed_line_number, source_line_number) in range_in_blamed_file. zip ( range_in_suspect. clone ( ) ) {
167+ let source_token = source_lines_as_tokens[ source_line_number as usize ] ;
168+ let blame_token = blamed_lines_as_tokens[ blamed_line_number as usize ] ;
169+
170+ let source_line = BString :: new ( source_interner[ source_token] . into ( ) ) ;
171+ let blamed_line = BString :: new ( blamed_interner[ blame_token] . into ( ) ) ;
172+
173+ assert_eq ! ( source_line, blamed_line) ;
174+ }
175+ }
176+ }
177+ }
178+
179+ for ( pid, ( parent_id, parent_commit_time) ) in parent_ids. iter ( ) . enumerate ( ) {
147180 if let Some ( parent_entry_id) =
148181 find_path_entry_in_commit ( & odb, parent_id, file_path, & mut buf, & mut buf2, & mut stats) ?
149182 {
@@ -153,17 +186,19 @@ where
153186 }
154187 if no_change_in_entry {
155188 pass_blame_from_to ( suspect, * parent_id, & mut hunks_to_blame) ;
189+ queue. insert ( * parent_commit_time, * parent_id) ;
156190 continue ' outer;
157191 }
158192 }
159193 }
160194
161195 let more_than_one_parent = parent_ids. len ( ) > 1 ;
162- for parent_id in parent_ids {
196+ for ( parent_id, parent_commit_time) in parent_ids {
197+ queue. insert ( parent_commit_time, parent_id) ;
163198 let changes_for_file_path = tree_diff_at_file_path (
164199 & odb,
165200 file_path,
166- commit . id ,
201+ suspect ,
167202 parent_id,
168203 & mut stats,
169204 & mut diff_state,
@@ -588,8 +623,51 @@ fn find_path_entry_in_commit(
588623 Ok ( res. map ( |e| e. oid ) )
589624}
590625
591- /// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them
592- /// so the later access shows the right thing.
626+ type CommitTime = i64 ;
627+
628+ fn commit_time ( commit : gix_traverse:: commit:: Either < ' _ , ' _ > ) -> Result < CommitTime , gix_object:: decode:: Error > {
629+ match commit {
630+ gix_traverse:: commit:: Either :: CommitRefIter ( commit_ref_iter) => {
631+ commit_ref_iter. committer ( ) . map ( |c| c. time . seconds )
632+ }
633+ gix_traverse:: commit:: Either :: CachedCommit ( commit) => Ok ( commit. committer_timestamp ( ) as i64 ) ,
634+ }
635+ }
636+
637+ type ParentIds = SmallVec < [ ( gix_hash:: ObjectId , i64 ) ; 2 ] > ;
638+
639+ fn collect_parents (
640+ commit : gix_traverse:: commit:: Either < ' _ , ' _ > ,
641+ odb : & impl gix_object:: Find ,
642+ cache : Option < & gix_commitgraph:: Graph > ,
643+ buf : & mut Vec < u8 > ,
644+ ) -> Result < ParentIds , Error > {
645+ let mut parent_ids: ParentIds = Default :: default ( ) ;
646+ match commit {
647+ gix_traverse:: commit:: Either :: CachedCommit ( commit) => {
648+ let cache = cache
649+ . as_ref ( )
650+ . expect ( "find returned a cached commit, so we expect cache to be present" ) ;
651+ for parent_pos in commit. iter_parents ( ) {
652+ let parent = cache. commit_at ( parent_pos?) ;
653+ parent_ids. push ( ( parent. id ( ) . to_owned ( ) , parent. committer_timestamp ( ) as i64 ) ) ;
654+ }
655+ }
656+ gix_traverse:: commit:: Either :: CommitRefIter ( commit_ref_iter) => {
657+ for id in commit_ref_iter. parent_ids ( ) {
658+ let parent = odb. find_commit_iter ( id. as_ref ( ) , buf) . ok ( ) ;
659+ let parent_commit_time = parent
660+ . and_then ( |parent| parent. committer ( ) . ok ( ) . map ( |committer| committer. time . seconds ) )
661+ . unwrap_or_default ( ) ;
662+ parent_ids. push ( ( id, parent_commit_time) ) ;
663+ }
664+ }
665+ } ;
666+ Ok ( parent_ids)
667+ }
668+
669+ /// Return an iterator over tokens for use in diffing. These are usually lines, but it's important
670+ /// to unify them so the later access shows the right thing.
593671pub ( crate ) fn tokens_for_diffing ( data : & [ u8 ] ) -> impl TokenSource < Token = & [ u8 ] > {
594672 gix_diff:: blob:: sources:: byte_lines_with_terminator ( data)
595673}
0 commit comments