@@ -7,22 +7,23 @@ use gix_object::{
77    bstr:: { BStr ,  BString } , 
88    FindExt , 
99} ; 
10+ use  gix_traverse:: commit:: find; 
11+ use  smallvec:: SmallVec ; 
1012use  std:: num:: NonZeroU32 ; 
1113use  std:: ops:: Range ; 
1214
1315/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file 
14- /// at `traverse[0] :<file_path>` originated in. 
16+ /// at `suspect :<file_path>` originated in. 
1517/// 
1618/// ## Paramters 
1719/// 
1820/// * `odb` 
1921///    - Access to database objects, also for used for diffing. 
2022///    - Should have an object cache for good diff performance. 
21- /// * `traverse` 
22- ///    - The list of commits from the most recent to prior ones, following all parents sorted 
23- ///      by time. 
24- ///    - It's paramount that older commits are returned after newer ones. 
25- ///    - The first commit returned here is the first eligible commit to be responsible for parts of `file_path`. 
23+ /// * `suspect` 
24+ ///    - The first commit to be responsible for parts of `file_path`. 
25+ /// * `cache` 
26+ ///    - Optionally, the commitgraph cache. 
2627/// * `file_path` 
2728///    - A *slash-separated* worktree-relative path to the file to blame. 
2829/// * `range` 
@@ -60,20 +61,14 @@ use std::ops::Range;
6061// <---><----------><-------><-----><-------> 
6162// <---><---><-----><-------><-----><-------> 
6263// <---><---><-----><-------><-----><-><-><-> 
63- pub  fn  file < E > ( 
64+ pub  fn  file ( 
6465    odb :  impl  gix_object:: Find  + gix_object:: FindHeader , 
65-     traverse :  impl  IntoIterator < Item  = Result < gix_traverse:: commit:: Info ,  E > > , 
66+     suspect :  ObjectId , 
67+     cache :  Option < gix_commitgraph:: Graph > , 
6668    resource_cache :  & mut  gix_diff:: blob:: Platform , 
6769    file_path :  & BStr , 
6870    range :  Option < Range < u32 > > , 
69- )  -> Result < Outcome ,  Error > 
70- where 
71-     E :  Into < Box < dyn  std:: error:: Error  + Send  + Sync  + ' static > > , 
72- { 
73-     let  mut  traverse = traverse. into_iter ( ) . peekable ( ) ; 
74-     let  Some ( Ok ( suspect) )  = traverse. peek ( ) . map ( |res| res. as_ref ( ) . map ( |item| item. id ) )  else  { 
75-         return  Err ( Error :: EmptyTraversal ) ; 
76-     } ; 
71+ )  -> Result < Outcome ,  Error >  { 
7772    let  _span = gix_trace:: coarse!( "gix_blame::file()" ,  ?file_path,  ?suspect) ; 
7873
7974    let  mut  stats = Statistics :: default ( ) ; 
@@ -103,25 +98,43 @@ where
10398        suspects:  [ ( suspect,  range_in_blamed_file) ] . into( ) , 
10499    } ] ; 
105100
101+     let  mut  buf = Vec :: new ( ) ; 
102+     let  commit = find ( cache. as_ref ( ) ,  & odb,  & suspect,  & mut  buf) ?; 
103+ 
104+     let  mut  queue:  gix_revwalk:: PriorityQueue < CommitTime ,  ObjectId >  = gix_revwalk:: PriorityQueue :: new ( ) ; 
105+ 
106+     let  commit_time = commit_time ( commit) ; 
107+     queue. insert ( commit_time,  suspect) ; 
108+ 
106109    let  mut  out = Vec :: new ( ) ; 
107110    let  mut  diff_state = gix_diff:: tree:: State :: default ( ) ; 
108111    let  mut  previous_entry:  Option < ( ObjectId ,  ObjectId ) >  = None ; 
109-     ' outer:  while  let  Some ( item )  = traverse . next ( )  { 
112+     ' outer:  while  let  Some ( suspect )  = queue . pop_value ( )  { 
110113        if  hunks_to_blame. is_empty ( )  { 
111114            break ; 
112115        } 
113-         let  commit = item. map_err ( |err| Error :: Traverse ( err. into ( ) ) ) ?; 
114-         let  suspect = commit. id ; 
116+ 
117+         let  is_still_suspect = hunks_to_blame. iter ( ) . any ( |hunk| hunk. suspects . contains_key ( & suspect) ) ; 
118+ 
119+         if  !is_still_suspect { 
120+             // There are no `UnblamedHunk`s associated with this `suspect`, so we can continue with 
121+             // the next one. 
122+             continue  ' outer; 
123+         } 
124+ 
115125        stats. commits_traversed  += 1 ; 
116126
117-         let  parent_ids = commit. parent_ids ; 
127+         let  commit = find ( cache. as_ref ( ) ,  & odb,  & suspect,  & mut  buf) ?; 
128+ 
129+         let  parent_ids:  ParentIds  = collect_parents ( commit,  & odb,  cache. as_ref ( ) ) ; 
130+ 
118131        if  parent_ids. is_empty ( )  { 
119-             if  traverse . peek ( ) . is_none ( )  { 
120-                 // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of  
121-                 // the last `item` that was yielded by `traverse `, so it makes sense to assign the  
122-                 // remaining lines to it, even though we don’t explicitly check whether that is true  
123-                 // here. We could perhaps use diff-tree-to-tree to compare `suspect` 
124-                 // against  an empty tree to validate this assumption. 
132+             if  queue . is_empty ( )  { 
133+                 // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the 
134+                 // `id` of  the last `item` that was yielded by `queue `, so it makes sense to assign 
135+                 // the  remaining lines to it, even though we don’t explicitly check whether that is 
136+                 // true  here. We could perhaps use diff-tree-to-tree to compare `suspect` against  
137+                 // an empty tree to validate this assumption. 
125138                if  unblamed_to_out_is_done ( & mut  hunks_to_blame,  & mut  out,  suspect)  { 
126139                    break  ' outer; 
127140                } 
@@ -143,7 +156,41 @@ where
143156            continue ; 
144157        } ; 
145158
146-         for  ( pid,  parent_id)  in  parent_ids. iter ( ) . enumerate ( )  { 
159+         // This block asserts that, for every `UnblamedHunk`, all lines in the *Blamed File* are 
160+         // identical to the corresponding lines in the *Source File*. 
161+         #[ cfg( debug_assertions) ]  
162+         { 
163+             let  source_blob = odb. find_blob ( & entry_id,  & mut  buf) ?. data . to_vec ( ) ; 
164+             let  mut  source_interner = gix_diff:: blob:: intern:: Interner :: new ( source_blob. len ( )  / 100 ) ; 
165+             let  source_lines_as_tokens:  Vec < _ >  = tokens_for_diffing ( & source_blob) 
166+                 . tokenize ( ) 
167+                 . map ( |token| source_interner. intern ( token) ) 
168+                 . collect ( ) ; 
169+ 
170+             let  mut  blamed_interner = gix_diff:: blob:: intern:: Interner :: new ( blamed_file_blob. len ( )  / 100 ) ; 
171+             let  blamed_lines_as_tokens:  Vec < _ >  = tokens_for_diffing ( & blamed_file_blob) 
172+                 . tokenize ( ) 
173+                 . map ( |token| blamed_interner. intern ( token) ) 
174+                 . collect ( ) ; 
175+ 
176+             for  hunk in  hunks_to_blame. iter ( )  { 
177+                 if  let  Some ( range_in_suspect)  = hunk. suspects . get ( & suspect)  { 
178+                     let  range_in_blamed_file = hunk. range_in_blamed_file . clone ( ) ; 
179+ 
180+                     for  ( blamed_line_number,  source_line_number)  in  range_in_blamed_file. zip ( range_in_suspect. clone ( ) )  { 
181+                         let  source_token = source_lines_as_tokens[ source_line_number as  usize ] ; 
182+                         let  blame_token = blamed_lines_as_tokens[ blamed_line_number as  usize ] ; 
183+ 
184+                         let  source_line = BString :: new ( source_interner[ source_token] . into ( ) ) ; 
185+                         let  blamed_line = BString :: new ( blamed_interner[ blame_token] . into ( ) ) ; 
186+ 
187+                         assert_eq ! ( source_line,  blamed_line) ; 
188+                     } 
189+                 } 
190+             } 
191+         } 
192+ 
193+         for  ( pid,  ( parent_id,  parent_commit_time) )  in  parent_ids. iter ( ) . enumerate ( )  { 
147194            if  let  Some ( parent_entry_id)  =
148195                find_path_entry_in_commit ( & odb,  parent_id,  file_path,  & mut  buf,  & mut  buf2,  & mut  stats) ?
149196            { 
@@ -153,17 +200,19 @@ where
153200                } 
154201                if  no_change_in_entry { 
155202                    pass_blame_from_to ( suspect,  * parent_id,  & mut  hunks_to_blame) ; 
203+                     queue. insert ( * parent_commit_time,  * parent_id) ; 
156204                    continue  ' outer; 
157205                } 
158206            } 
159207        } 
160208
161209        let  more_than_one_parent = parent_ids. len ( )  > 1 ; 
162-         for  parent_id in  parent_ids { 
210+         for  ( parent_id,  parent_commit_time)  in  parent_ids { 
211+             queue. insert ( parent_commit_time,  parent_id) ; 
163212            let  changes_for_file_path = tree_diff_at_file_path ( 
164213                & odb, 
165214                file_path, 
166-                 commit . id , 
215+                 suspect , 
167216                parent_id, 
168217                & mut  stats, 
169218                & mut  diff_state, 
@@ -588,8 +637,82 @@ fn find_path_entry_in_commit(
588637    Ok ( res. map ( |e| e. oid ) ) 
589638} 
590639
591- /// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them 
592- /// so the later access shows the right thing. 
640+ type  CommitTime  = i64 ; 
641+ 
642+ fn  commit_time ( commit :  gix_traverse:: commit:: Either < ' _ ,  ' _ > )  -> CommitTime  { 
643+     match  commit { 
644+         gix_traverse:: commit:: Either :: CommitRefIter ( commit_ref_iter)  => { 
645+             let  mut  commit_time = 0 ; 
646+             for  token in  commit_ref_iter { 
647+                 use  gix_object:: commit:: ref_iter:: Token  as  T ; 
648+                 match  token { 
649+                     Ok ( T :: Tree  {  .. } )  => continue , 
650+                     Ok ( T :: Parent  {  .. } )  => continue , 
651+                     Ok ( T :: Author  {  .. } )  => continue , 
652+                     Ok ( T :: Committer  {  signature } )  => { 
653+                         commit_time = signature. time . seconds ; 
654+                         break ; 
655+                     } 
656+                     Ok ( _unused_token)  => break , 
657+                     Err ( _err)  => todo ! ( ) , 
658+                 } 
659+             } 
660+             commit_time
661+         } 
662+         gix_traverse:: commit:: Either :: CachedCommit ( commit)  => commit. committer_timestamp ( )  as  i64 , 
663+     } 
664+ } 
665+ 
666+ type  ParentIds  = SmallVec < [ ( gix_hash:: ObjectId ,  i64 ) ;  2 ] > ; 
667+ 
668+ fn  collect_parents ( 
669+     commit :  gix_traverse:: commit:: Either < ' _ ,  ' _ > , 
670+     odb :  & impl  gix_object:: Find , 
671+     cache :  Option < & gix_commitgraph:: Graph > , 
672+ )  -> ParentIds  { 
673+     let  mut  parent_ids:  ParentIds  = Default :: default ( ) ; 
674+ 
675+     match  commit { 
676+         gix_traverse:: commit:: Either :: CachedCommit ( commit)  => { 
677+             let  cache = cache
678+                 . as_ref ( ) 
679+                 . expect ( "find returned a cached commit, so we expect cache to be present" ) ; 
680+             for  parent_id in  commit. iter_parents ( )  { 
681+                 match  parent_id { 
682+                     Ok ( pos)  => { 
683+                         let  parent = cache. commit_at ( pos) ; 
684+ 
685+                         parent_ids. push ( ( parent. id ( ) . to_owned ( ) ,  parent. committer_timestamp ( )  as  i64 ) ) ; 
686+                     } 
687+                     Err ( _)  => todo ! ( ) , 
688+                 } 
689+             } 
690+         } 
691+         gix_traverse:: commit:: Either :: CommitRefIter ( commit_ref_iter)  => { 
692+             for  token in  commit_ref_iter { 
693+                 match  token { 
694+                     Ok ( gix_object:: commit:: ref_iter:: Token :: Tree  {  .. } )  => continue , 
695+                     Ok ( gix_object:: commit:: ref_iter:: Token :: Parent  {  id } )  => { 
696+                         let  mut  buf = Vec :: new ( ) ; 
697+                         let  parent = odb. find_commit_iter ( id. as_ref ( ) ,  & mut  buf) . ok ( ) ; 
698+                         let  parent_commit_time = parent
699+                             . and_then ( |parent| parent. committer ( ) . ok ( ) . map ( |committer| committer. time . seconds ) ) 
700+                             . unwrap_or_default ( ) ; 
701+ 
702+                         parent_ids. push ( ( id,  parent_commit_time) ) ; 
703+                     } 
704+                     Ok ( _unused_token)  => break , 
705+                     Err ( _err)  => todo ! ( ) , 
706+                 } 
707+             } 
708+         } 
709+     } ; 
710+ 
711+     parent_ids
712+ } 
713+ 
714+ /// Return an iterator over tokens for use in diffing. These are usually lines, but it's important 
715+ /// to unify them so the later access shows the right thing. 
593716pub ( crate )  fn  tokens_for_diffing ( data :  & [ u8 ] )  -> impl  TokenSource < Token  = & [ u8 ] >  { 
594717    gix_diff:: blob:: sources:: byte_lines_with_terminator ( data) 
595718} 
0 commit comments