@@ -6,9 +6,9 @@ mod google_cpu_profiler;
6
6
7
7
use std:: {
8
8
cell:: RefCell ,
9
+ collections:: BTreeMap ,
9
10
collections:: HashSet ,
10
11
io:: { stderr, Write } ,
11
- iter:: repeat,
12
12
mem,
13
13
sync:: {
14
14
atomic:: { AtomicBool , Ordering } ,
@@ -17,7 +17,6 @@ use std::{
17
17
time:: { Duration , Instant } ,
18
18
} ;
19
19
20
- use itertools:: Itertools ;
21
20
use once_cell:: sync:: Lazy ;
22
21
23
22
pub use crate :: memory_usage:: { Bytes , MemoryUsage } ;
@@ -216,7 +215,7 @@ impl Drop for Profiler {
216
215
// (otherwise we could print `0ms` despite user's `>0` filter when
217
216
// `duration` is just a few nanos).
218
217
if duration. as_millis ( ) > longer_than. as_millis ( ) {
219
- print ( 0 , & stack. messages , & mut stdout. lock ( ) , longer_than , None ) ;
218
+ print ( & stack. messages , longer_than , & mut stdout. lock ( ) ) ;
220
219
}
221
220
stack. messages . clear ( ) ;
222
221
}
@@ -227,59 +226,85 @@ impl Drop for Profiler {
227
226
}
228
227
}
229
228
230
- fn print (
231
- lvl : usize ,
232
- msgs : & [ Message ] ,
233
- out : & mut impl Write ,
234
- longer_than : Duration ,
235
- total : Option < Duration > ,
236
- ) {
229
+ fn print ( msgs : & [ Message ] , longer_than : Duration , out : & mut impl Write ) {
237
230
if msgs. is_empty ( ) {
238
231
return ;
239
232
}
240
- // The index of the first element that will be included in the slice when we recurse.
241
- let mut next_start = 0 ;
242
- let indent = repeat ( " " ) . take ( lvl) . collect :: < String > ( ) ;
243
- // We output hierarchy for long calls, but sum up all short calls
244
- let mut short = Vec :: new ( ) ;
233
+ let children_map = idx_to_children ( msgs) ;
234
+ let root_idx = msgs. len ( ) - 1 ;
235
+ print_for_idx ( root_idx, & children_map, msgs, longer_than, out) ;
236
+ }
237
+
238
+ fn print_for_idx (
239
+ current_idx : usize ,
240
+ children_map : & [ Vec < usize > ] ,
241
+ msgs : & [ Message ] ,
242
+ longer_than : Duration ,
243
+ out : & mut impl Write ,
244
+ ) {
245
+ let current = & msgs[ current_idx] ;
246
+ let current_indent = " " . repeat ( current. level ) ;
247
+ writeln ! ( out, "{}{:5}ms - {}" , current_indent, current. duration. as_millis( ) , current. message)
248
+ . expect ( "printing profiling info" ) ;
249
+
250
+ let longer_than_millis = longer_than. as_millis ( ) ;
251
+ let children_indices = & children_map[ current_idx] ;
245
252
let mut accounted_for = Duration :: default ( ) ;
246
- for ( i, & Message { level, duration, message : ref msg } ) in msgs. iter ( ) . enumerate ( ) {
247
- if level != lvl {
248
- continue ;
249
- }
250
- accounted_for += duration;
251
- if duration. as_millis ( ) > longer_than. as_millis ( ) {
252
- writeln ! ( out, "{}{:5}ms - {}" , indent, duration. as_millis( ) , msg)
253
- . expect ( "printing profiling info to stdout" ) ;
253
+ let mut short_children = BTreeMap :: new ( ) ; // Use `BTreeMap` to get deterministic output.
254
254
255
- print ( lvl + 1 , & msgs[ next_start..i] , out, longer_than, Some ( duration) ) ;
255
+ for child_idx in children_indices. iter ( ) {
256
+ let child = & msgs[ * child_idx] ;
257
+ if child. duration . as_millis ( ) > longer_than_millis {
258
+ print_for_idx ( * child_idx, children_map, msgs, longer_than, out) ;
256
259
} else {
257
- short. push ( ( msg, duration) )
260
+ let pair = short_children. entry ( & child. message ) . or_insert ( ( Duration :: default ( ) , 0 ) ) ;
261
+ pair. 0 += child. duration ;
262
+ pair. 1 += 1 ;
258
263
}
264
+ accounted_for += child. duration ;
265
+ }
259
266
260
- next_start = i + 1 ;
267
+ for ( child_msg, ( duration, count) ) in short_children. iter ( ) {
268
+ let millis = duration. as_millis ( ) ;
269
+ writeln ! ( out, " {}{:5}ms - {} ({} calls)" , current_indent, millis, child_msg, count)
270
+ . expect ( "printing profiling info" ) ;
261
271
}
262
- short. sort_by_key ( |( msg, _time) | * msg) ;
263
- for ( msg, entires) in short. iter ( ) . group_by ( |( msg, _time) | msg) . into_iter ( ) {
264
- let mut count = 0 ;
265
- let mut total_duration = Duration :: default ( ) ;
266
- entires. for_each ( |( _msg, time) | {
267
- count += 1 ;
268
- total_duration += * time;
269
- } ) ;
270
- writeln ! ( out, "{}{:5}ms - {} ({} calls)" , indent, total_duration. as_millis( ) , msg, count)
271
- . expect ( "printing profiling info to stdout" ) ;
272
+
273
+ let unaccounted_millis = ( current. duration - accounted_for) . as_millis ( ) ;
274
+ if !children_indices. is_empty ( )
275
+ && unaccounted_millis > 0
276
+ && unaccounted_millis > longer_than_millis
277
+ {
278
+ writeln ! ( out, " {}{:5}ms - ???" , current_indent, unaccounted_millis)
279
+ . expect ( "printing profiling info" ) ;
272
280
}
281
+ }
273
282
274
- if let Some ( total) = total {
275
- if let Some ( unaccounted) = total. checked_sub ( accounted_for) {
276
- let unaccounted_millis = unaccounted. as_millis ( ) ;
277
- if unaccounted_millis > longer_than. as_millis ( ) && unaccounted_millis > 0 {
278
- writeln ! ( out, "{}{:5}ms - ???" , indent, unaccounted_millis)
279
- . expect ( "printing profiling info to stdout" ) ;
280
- }
283
+ /// Returns a mapping from an index in the `msgs` to the vector with the indices of its children.
284
+ ///
285
+ /// This assumes that the entries in `msgs` are in the order of when the calls to `profile` finish.
286
+ /// In other words, a postorder of the call graph. In particular, the root is the last element of
287
+ /// `msgs`.
288
+ fn idx_to_children ( msgs : & [ Message ] ) -> Vec < Vec < usize > > {
289
+ // Initialize with the index of the root; `msgs` and `ancestors` should be never empty.
290
+ assert ! ( !msgs. is_empty( ) ) ;
291
+ let mut ancestors = vec ! [ msgs. len( ) - 1 ] ;
292
+ let mut result: Vec < Vec < usize > > = vec ! [ vec![ ] ; msgs. len( ) ] ;
293
+ for ( idx, msg) in msgs[ ..msgs. len ( ) - 1 ] . iter ( ) . enumerate ( ) . rev ( ) {
294
+ // We need to find the parent of the current message, i.e., the last ancestor that has a
295
+ // level lower than the current message.
296
+ while msgs[ * ancestors. last ( ) . unwrap ( ) ] . level >= msg. level {
297
+ ancestors. pop ( ) ;
281
298
}
299
+ result[ * ancestors. last ( ) . unwrap ( ) ] . push ( idx) ;
300
+ ancestors. push ( idx) ;
301
+ }
302
+ // Note that above we visited all children from the last to the first one. Let's reverse vectors
303
+ // to get the more natural order where the first element is the first child.
304
+ for vec in result. iter_mut ( ) {
305
+ vec. reverse ( ) ;
282
306
}
307
+ result
283
308
}
284
309
285
310
/// Prints backtrace to stderr, useful for debugging.
@@ -388,7 +413,7 @@ mod tests {
388
413
Message { level: 1 , duration: Duration :: from_nanos( 2 ) , message: "bar" . to_owned( ) } ,
389
414
Message { level: 0 , duration: Duration :: from_millis( 1 ) , message: "foo" . to_owned( ) } ,
390
415
] ;
391
- print ( 0 , & msgs, & mut result , Duration :: from_millis ( 0 ) , Some ( Duration :: from_millis ( 1 ) ) ) ;
416
+ print ( & msgs, Duration :: from_millis ( 0 ) , & mut result ) ;
392
417
// The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed
393
418
// when printing.
394
419
assert_eq ! (
@@ -404,7 +429,7 @@ mod tests {
404
429
Message { level: 1 , duration: Duration :: from_millis( 2 ) , message: "bar" . to_owned( ) } ,
405
430
Message { level: 0 , duration: Duration :: from_millis( 5 ) , message: "foo" . to_owned( ) } ,
406
431
] ;
407
- print ( 0 , & msgs, & mut result , Duration :: from_millis ( 0 ) , Some ( Duration :: from_millis ( 1 ) ) ) ;
432
+ print ( & msgs, Duration :: from_millis ( 0 ) , & mut result ) ;
408
433
assert_eq ! (
409
434
std:: str :: from_utf8( & result) . unwrap( ) . lines( ) . collect:: <Vec <_>>( ) ,
410
435
vec![
@@ -426,7 +451,7 @@ mod tests {
426
451
Message { level: 1 , duration: Duration :: from_millis( 4 ) , message: "bar" . to_owned( ) } ,
427
452
Message { level: 0 , duration: Duration :: from_millis( 9 ) , message: "foo" . to_owned( ) } ,
428
453
] ;
429
- print ( 0 , & msgs, & mut result , Duration :: from_millis ( 0 ) , None ) ;
454
+ print ( & msgs, Duration :: from_millis ( 0 ) , & mut result ) ;
430
455
assert_eq ! (
431
456
std:: str :: from_utf8( & result) . unwrap( ) . lines( ) . collect:: <Vec <_>>( ) ,
432
457
vec![
0 commit comments