@@ -6,9 +6,9 @@ mod google_cpu_profiler;
6
6
7
7
use std:: {
8
8
cell:: RefCell ,
9
+ collections:: BTreeMap ,
9
10
collections:: HashSet ,
10
11
io:: { stderr, Write } ,
11
- iter:: repeat,
12
12
mem,
13
13
sync:: {
14
14
atomic:: { AtomicBool , Ordering } ,
@@ -17,7 +17,6 @@ use std::{
17
17
time:: { Duration , Instant } ,
18
18
} ;
19
19
20
- use itertools:: Itertools ;
21
20
use once_cell:: sync:: Lazy ;
22
21
23
22
pub use crate :: memory_usage:: { Bytes , MemoryUsage } ;
@@ -201,7 +200,7 @@ impl Drop for Profiler {
201
200
// (otherwise we could print `0ms` despite user's `>0` filter when
202
201
// `duration` is just a few nanos).
203
202
if duration. as_millis ( ) > longer_than. as_millis ( ) {
204
- print ( 0 , & stack. messages , & mut stdout. lock ( ) , longer_than , None ) ;
203
+ print ( & stack. messages , longer_than , & mut stdout. lock ( ) ) ;
205
204
}
206
205
stack. messages . clear ( ) ;
207
206
}
@@ -212,59 +211,85 @@ impl Drop for Profiler {
212
211
}
213
212
}
214
213
215
- fn print (
216
- lvl : usize ,
217
- msgs : & [ Message ] ,
218
- out : & mut impl Write ,
219
- longer_than : Duration ,
220
- total : Option < Duration > ,
221
- ) {
214
+ fn print ( msgs : & [ Message ] , longer_than : Duration , out : & mut impl Write ) {
222
215
if msgs. is_empty ( ) {
223
216
return ;
224
217
}
225
- // The index of the first element that will be included in the slice when we recurse.
226
- let mut next_start = 0 ;
227
- let indent = repeat ( " " ) . take ( lvl) . collect :: < String > ( ) ;
228
- // We output hierarchy for long calls, but sum up all short calls
229
- let mut short = Vec :: new ( ) ;
218
+ let children_map = idx_to_children ( msgs) ;
219
+ let root_idx = msgs. len ( ) - 1 ;
220
+ print_for_idx ( root_idx, & children_map, msgs, longer_than, out) ;
221
+ }
222
+
223
+ fn print_for_idx (
224
+ current_idx : usize ,
225
+ children_map : & [ Vec < usize > ] ,
226
+ msgs : & [ Message ] ,
227
+ longer_than : Duration ,
228
+ out : & mut impl Write ,
229
+ ) {
230
+ let current = & msgs[ current_idx] ;
231
+ let current_indent = " " . repeat ( current. level ) ;
232
+ writeln ! ( out, "{}{:5}ms - {}" , current_indent, current. duration. as_millis( ) , current. message)
233
+ . expect ( "printing profiling info" ) ;
234
+
235
+ let longer_than_millis = longer_than. as_millis ( ) ;
236
+ let children_indices = & children_map[ current_idx] ;
230
237
let mut accounted_for = Duration :: default ( ) ;
231
- for ( i, & Message { level, duration, message : ref msg } ) in msgs. iter ( ) . enumerate ( ) {
232
- if level != lvl {
233
- continue ;
234
- }
235
- accounted_for += duration;
236
- if duration. as_millis ( ) > longer_than. as_millis ( ) {
237
- writeln ! ( out, "{}{:5}ms - {}" , indent, duration. as_millis( ) , msg)
238
- . expect ( "printing profiling info to stdout" ) ;
238
+ let mut short_children = BTreeMap :: new ( ) ; // Use `BTreeMap` to get deterministic output.
239
239
240
- print ( lvl + 1 , & msgs[ next_start..i] , out, longer_than, Some ( duration) ) ;
240
+ for child_idx in children_indices. iter ( ) {
241
+ let child = & msgs[ * child_idx] ;
242
+ if child. duration . as_millis ( ) > longer_than_millis {
243
+ print_for_idx ( * child_idx, children_map, msgs, longer_than, out) ;
241
244
} else {
242
- short. push ( ( msg, duration) )
245
+ let pair = short_children. entry ( & child. message ) . or_insert ( ( Duration :: default ( ) , 0 ) ) ;
246
+ pair. 0 += child. duration ;
247
+ pair. 1 += 1 ;
243
248
}
249
+ accounted_for += child. duration ;
250
+ }
244
251
245
- next_start = i + 1 ;
252
+ for ( child_msg, ( duration, count) ) in short_children. iter ( ) {
253
+ let millis = duration. as_millis ( ) ;
254
+ writeln ! ( out, " {}{:5}ms - {} ({} calls)" , current_indent, millis, child_msg, count)
255
+ . expect ( "printing profiling info" ) ;
246
256
}
247
- short. sort_by_key ( |( msg, _time) | * msg) ;
248
- for ( msg, entires) in short. iter ( ) . group_by ( |( msg, _time) | msg) . into_iter ( ) {
249
- let mut count = 0 ;
250
- let mut total_duration = Duration :: default ( ) ;
251
- entires. for_each ( |( _msg, time) | {
252
- count += 1 ;
253
- total_duration += * time;
254
- } ) ;
255
- writeln ! ( out, "{}{:5}ms - {} ({} calls)" , indent, total_duration. as_millis( ) , msg, count)
256
- . expect ( "printing profiling info to stdout" ) ;
257
+
258
+ let unaccounted_millis = ( current. duration - accounted_for) . as_millis ( ) ;
259
+ if !children_indices. is_empty ( )
260
+ && unaccounted_millis > 0
261
+ && unaccounted_millis > longer_than_millis
262
+ {
263
+ writeln ! ( out, " {}{:5}ms - ???" , current_indent, unaccounted_millis)
264
+ . expect ( "printing profiling info" ) ;
257
265
}
266
+ }
258
267
259
- if let Some ( total) = total {
260
- if let Some ( unaccounted) = total. checked_sub ( accounted_for) {
261
- let unaccounted_millis = unaccounted. as_millis ( ) ;
262
- if unaccounted_millis > longer_than. as_millis ( ) && unaccounted_millis > 0 {
263
- writeln ! ( out, "{}{:5}ms - ???" , indent, unaccounted_millis)
264
- . expect ( "printing profiling info to stdout" ) ;
265
- }
268
+ /// Returns a mapping from an index in the `msgs` to the vector with the indices of its children.
269
+ ///
270
+ /// This assumes that the entries in `msgs` are in the order of when the calls to `profile` finish.
271
+ /// In other words, a postorder of the call graph. In particular, the root is the last element of
272
+ /// `msgs`.
273
+ fn idx_to_children ( msgs : & [ Message ] ) -> Vec < Vec < usize > > {
274
+ // Initialize with the index of the root; `ancestors` should be never empty.
275
+ let mut ancestors = vec ! [ msgs. len( ) - 1 ] ;
276
+ let mut result: Vec < Vec < usize > > = vec ! [ ] ;
277
+ result. resize_with ( msgs. len ( ) , Default :: default) ;
278
+ for ( idx, msg) in msgs[ ..msgs. len ( ) - 1 ] . iter ( ) . enumerate ( ) . rev ( ) {
279
+ // We need to find the parent of the current message, i.e., the last ancestor that has a
280
+ // level lower than the current message.
281
+ while msgs[ * ancestors. last ( ) . unwrap ( ) ] . level >= msg. level {
282
+ ancestors. pop ( ) ;
266
283
}
284
+ result[ * ancestors. last ( ) . unwrap ( ) ] . push ( idx) ;
285
+ ancestors. push ( idx) ;
286
+ }
287
+ // Note that above we visited all children from the last to the first one. Let's reverse vectors
288
+ // to get the more natural order where the first element is the first child.
289
+ for vec in result. iter_mut ( ) {
290
+ vec. reverse ( ) ;
267
291
}
292
+ result
268
293
}
269
294
270
295
/// Prints backtrace to stderr, useful for debugging.
@@ -373,7 +398,7 @@ mod tests {
373
398
Message { level: 1 , duration: Duration :: from_nanos( 2 ) , message: "bar" . to_owned( ) } ,
374
399
Message { level: 0 , duration: Duration :: from_millis( 1 ) , message: "foo" . to_owned( ) } ,
375
400
] ;
376
- print ( 0 , & msgs, & mut result , Duration :: from_millis ( 0 ) , Some ( Duration :: from_millis ( 1 ) ) ) ;
401
+ print ( & msgs, Duration :: from_millis ( 0 ) , & mut result ) ;
377
402
// The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed
378
403
// when printing.
379
404
assert_eq ! (
@@ -389,7 +414,7 @@ mod tests {
389
414
Message { level: 1 , duration: Duration :: from_millis( 2 ) , message: "bar" . to_owned( ) } ,
390
415
Message { level: 0 , duration: Duration :: from_millis( 5 ) , message: "foo" . to_owned( ) } ,
391
416
] ;
392
- print ( 0 , & msgs, & mut result , Duration :: from_millis ( 0 ) , Some ( Duration :: from_millis ( 1 ) ) ) ;
417
+ print ( & msgs, Duration :: from_millis ( 0 ) , & mut result ) ;
393
418
assert_eq ! (
394
419
std:: str :: from_utf8( & result) . unwrap( ) . lines( ) . collect:: <Vec <_>>( ) ,
395
420
vec![
@@ -411,7 +436,7 @@ mod tests {
411
436
Message { level: 1 , duration: Duration :: from_millis( 4 ) , message: "bar" . to_owned( ) } ,
412
437
Message { level: 0 , duration: Duration :: from_millis( 9 ) , message: "foo" . to_owned( ) } ,
413
438
] ;
414
- print ( 0 , & msgs, & mut result , Duration :: from_millis ( 0 ) , None ) ;
439
+ print ( & msgs, Duration :: from_millis ( 0 ) , & mut result ) ;
415
440
assert_eq ! (
416
441
std:: str :: from_utf8( & result) . unwrap( ) . lines( ) . collect:: <Vec <_>>( ) ,
417
442
vec![
0 commit comments