11use std:: io;
2+ use std:: sync:: atomic:: Ordering ;
23
34use anyhow:: bail;
45
@@ -50,6 +51,8 @@ pub mod statistics {
5051 pub struct Options {
5152 pub format : OutputFormat ,
5253 pub thread_limit : Option < usize > ,
54+ /// A debug-flag that triggers looking up the headers of all objects again, but without indices preloaded
55+ pub extra_header_lookup : bool ,
5356 }
5457}
5558
@@ -59,7 +62,11 @@ pub fn statistics(
5962 mut progress : impl gix:: Progress ,
6063 out : impl io:: Write ,
6164 mut err : impl io:: Write ,
62- statistics:: Options { format, thread_limit } : statistics:: Options ,
65+ statistics:: Options {
66+ format,
67+ thread_limit,
68+ extra_header_lookup,
69+ } : statistics:: Options ,
6370) -> anyhow:: Result < ( ) > {
6471 use bytesize:: ByteSize ;
6572 use gix:: odb:: { find, HeaderExt } ;
@@ -76,6 +83,10 @@ pub fn statistics(
7683 #[ cfg_attr( feature = "serde" , derive( serde:: Serialize ) ) ]
7784 #[ derive( Default ) ]
7885 struct Statistics {
86+ /// All objects that were used to produce these statistics.
87+ /// Only `Some` if we are doing an extra round of header queries on a repository without loaded indices.
88+ #[ cfg_attr( feature = "serde" , serde( skip_serializing) ) ]
89+ ids : Option < Vec < gix:: ObjectId > > ,
7990 total_objects : usize ,
8091 loose_objects : usize ,
8192 packed_objects : usize ,
@@ -135,14 +146,17 @@ pub fn statistics(
135146 }
136147
137148 impl gix:: parallel:: Reduce for Reduce {
138- type Input = Result < Vec < gix:: odb:: find:: Header > , anyhow:: Error > ;
149+ type Input = Result < Vec < ( gix:: ObjectId , gix :: odb:: find:: Header ) > , anyhow:: Error > ;
139150 type FeedProduce = ( ) ;
140151 type Output = Statistics ;
141152 type Error = anyhow:: Error ;
142153
143154 fn feed ( & mut self , items : Self :: Input ) -> Result < Self :: FeedProduce , Self :: Error > {
144- for item in items? {
155+ for ( id , item) in items? {
145156 self . stats . consume ( item) ;
157+ if let Some ( ids) = self . stats . ids . as_mut ( ) {
158+ ids. push ( id) ;
159+ }
146160 }
147161 Ok ( ( ) )
148162 }
@@ -154,9 +168,9 @@ pub fn statistics(
154168 }
155169
156170 let cancelled = || anyhow:: anyhow!( "Cancelled by user" ) ;
157- let object_ids = repo. objects . store_ref ( ) . iter ( ) ?. filter_map ( Result :: ok) ;
171+ let object_ids = repo. objects . iter ( ) ?. filter_map ( Result :: ok) ;
158172 let chunk_size = 1_000 ;
159- let stats = if gix:: parallel:: num_threads ( thread_limit) > 1 {
173+ let mut stats = if gix:: parallel:: num_threads ( thread_limit) > 1 {
160174 gix:: parallel:: in_parallel (
161175 gix:: interrupt:: Iter :: new (
162176 gix:: features:: iter:: Chunks {
@@ -166,19 +180,30 @@ pub fn statistics(
166180 cancelled,
167181 ) ,
168182 thread_limit,
169- move |_| ( repo. objects . clone ( ) . into_inner ( ) , counter) ,
183+ {
184+ let objects = repo. objects . clone ( ) ;
185+ move |_| ( objects. clone ( ) . into_inner ( ) , counter)
186+ } ,
170187 |ids, ( handle, counter) | {
171188 let ids = ids?;
172- counter. fetch_add ( ids. len ( ) , std :: sync :: atomic :: Ordering :: Relaxed ) ;
189+ counter. fetch_add ( ids. len ( ) , Ordering :: Relaxed ) ;
173190 let out = ids
174191 . into_iter ( )
175- . map ( |id| handle. header ( id) )
192+ . map ( |id| handle. header ( id) . map ( |hdr| ( id , hdr ) ) )
176193 . collect :: < Result < Vec < _ > , _ > > ( ) ?;
177194 Ok ( out)
178195 } ,
179- Reduce :: default ( ) ,
196+ Reduce {
197+ stats : Statistics {
198+ ids : extra_header_lookup. then ( Vec :: new) ,
199+ ..Default :: default ( )
200+ } ,
201+ } ,
180202 ) ?
181203 } else {
204+ if extra_header_lookup {
205+ bail ! ( "extra-header-lookup is only meaningful in threaded mode" ) ;
206+ }
182207 let mut stats = Statistics :: default ( ) ;
183208
184209 for ( count, id) in object_ids. enumerate ( ) {
@@ -193,6 +218,39 @@ pub fn statistics(
193218
194219 progress. show_throughput ( start) ;
195220
221+ if let Some ( mut ids) = stats. ids . take ( ) {
222+ // Critical to re-open the repo to assure we don't have any ODB state and start fresh.
223+ let start = std:: time:: Instant :: now ( ) ;
224+ let repo = gix:: open_opts ( repo. git_dir ( ) , repo. open_options ( ) . to_owned ( ) ) ?;
225+ progress. set_name ( "re-counting" . into ( ) ) ;
226+ progress. init ( Some ( ids. len ( ) ) , gix:: progress:: count ( "objects" ) ) ;
227+ let counter = progress. counter ( ) ;
228+ counter. store ( 0 , Ordering :: Relaxed ) ;
229+ let errors = gix:: parallel:: in_parallel_with_slice (
230+ & mut ids,
231+ thread_limit,
232+ {
233+ let objects = repo. objects . clone ( ) ;
234+ move |_| ( objects. clone ( ) . into_inner ( ) , counter, false )
235+ } ,
236+ |id, ( odb, counter, has_error) , _threads_left, _stop_everything| -> anyhow:: Result < ( ) > {
237+ counter. fetch_add ( 1 , Ordering :: Relaxed ) ;
238+ if let Err ( _err) = odb. header ( id) {
239+ * has_error = true ;
240+ gix:: trace:: error!( err = ?_err, "Object that is known to be present wasn't found" ) ;
241+ }
242+ Ok ( ( ) )
243+ } ,
244+ || Some ( std:: time:: Duration :: from_millis ( 100 ) ) ,
245+ |( _, _, has_error) | has_error,
246+ ) ?;
247+
248+ progress. show_throughput ( start) ;
249+ if errors. contains ( & true ) {
250+ bail ! ( "At least one object couldn't be looked up even though it must exist" ) ;
251+ }
252+ }
253+
196254 #[ cfg( feature = "serde" ) ]
197255 {
198256 serde_json:: to_writer_pretty ( out, & stats) ?;
0 commit comments