Skip to content

Commit 5dcd9fd

Browse files
bors[bot]michalt
andauthored
Merge #2895
2895: Rewrite ra_prof's profile printing r=michalt a=michalt This changes the way we print things to first construct a mapping from events to the children and uses that mapping to actually print things. It should not change the actual output that we produce. The new approach two benefits: * It avoids a potential quadratic behavior of the previous approach. For instance, for a vector of N elements: ``` [Message{level: (N - 1)}, ..., Message{level: 1}, Message{level: 0}] ``` we would first do a linear scan to find entry with level 0, then another scan to find one with level 1, etc. * It makes it much easier to improve the output in the future, because we now pre-compute the children for each entry and can easily take that into account when printing. Signed-off-by: Michal Terepeta <[email protected]> Co-authored-by: Michal Terepeta <[email protected]>
2 parents d92a15c + 644c383 commit 5dcd9fd

File tree

1 file changed

+71
-46
lines changed

1 file changed

+71
-46
lines changed

crates/ra_prof/src/lib.rs

Lines changed: 71 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ mod google_cpu_profiler;
66

77
use std::{
88
cell::RefCell,
9+
collections::BTreeMap,
910
collections::HashSet,
1011
io::{stderr, Write},
11-
iter::repeat,
1212
mem,
1313
sync::{
1414
atomic::{AtomicBool, Ordering},
@@ -17,7 +17,6 @@ use std::{
1717
time::{Duration, Instant},
1818
};
1919

20-
use itertools::Itertools;
2120
use once_cell::sync::Lazy;
2221

2322
pub use crate::memory_usage::{Bytes, MemoryUsage};
@@ -216,7 +215,7 @@ impl Drop for Profiler {
216215
// (otherwise we could print `0ms` despite user's `>0` filter when
217216
// `duration` is just a few nanos).
218217
if duration.as_millis() > longer_than.as_millis() {
219-
print(0, &stack.messages, &mut stdout.lock(), longer_than, None);
218+
print(&stack.messages, longer_than, &mut stdout.lock());
220219
}
221220
stack.messages.clear();
222221
}
@@ -227,59 +226,85 @@ impl Drop for Profiler {
227226
}
228227
}
229228

230-
fn print(
231-
lvl: usize,
232-
msgs: &[Message],
233-
out: &mut impl Write,
234-
longer_than: Duration,
235-
total: Option<Duration>,
236-
) {
229+
fn print(msgs: &[Message], longer_than: Duration, out: &mut impl Write) {
237230
if msgs.is_empty() {
238231
return;
239232
}
240-
// The index of the first element that will be included in the slice when we recurse.
241-
let mut next_start = 0;
242-
let indent = repeat(" ").take(lvl).collect::<String>();
243-
// We output hierarchy for long calls, but sum up all short calls
244-
let mut short = Vec::new();
233+
let children_map = idx_to_children(msgs);
234+
let root_idx = msgs.len() - 1;
235+
print_for_idx(root_idx, &children_map, msgs, longer_than, out);
236+
}
237+
238+
fn print_for_idx(
239+
current_idx: usize,
240+
children_map: &[Vec<usize>],
241+
msgs: &[Message],
242+
longer_than: Duration,
243+
out: &mut impl Write,
244+
) {
245+
let current = &msgs[current_idx];
246+
let current_indent = " ".repeat(current.level);
247+
writeln!(out, "{}{:5}ms - {}", current_indent, current.duration.as_millis(), current.message)
248+
.expect("printing profiling info");
249+
250+
let longer_than_millis = longer_than.as_millis();
251+
let children_indices = &children_map[current_idx];
245252
let mut accounted_for = Duration::default();
246-
for (i, &Message { level, duration, message: ref msg }) in msgs.iter().enumerate() {
247-
if level != lvl {
248-
continue;
249-
}
250-
accounted_for += duration;
251-
if duration.as_millis() > longer_than.as_millis() {
252-
writeln!(out, "{}{:5}ms - {}", indent, duration.as_millis(), msg)
253-
.expect("printing profiling info to stdout");
253+
let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
254254

255-
print(lvl + 1, &msgs[next_start..i], out, longer_than, Some(duration));
255+
for child_idx in children_indices.iter() {
256+
let child = &msgs[*child_idx];
257+
if child.duration.as_millis() > longer_than_millis {
258+
print_for_idx(*child_idx, children_map, msgs, longer_than, out);
256259
} else {
257-
short.push((msg, duration))
260+
let pair = short_children.entry(&child.message).or_insert((Duration::default(), 0));
261+
pair.0 += child.duration;
262+
pair.1 += 1;
258263
}
264+
accounted_for += child.duration;
265+
}
259266

260-
next_start = i + 1;
267+
for (child_msg, (duration, count)) in short_children.iter() {
268+
let millis = duration.as_millis();
269+
writeln!(out, " {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count)
270+
.expect("printing profiling info");
261271
}
262-
short.sort_by_key(|(msg, _time)| *msg);
263-
for (msg, entires) in short.iter().group_by(|(msg, _time)| msg).into_iter() {
264-
let mut count = 0;
265-
let mut total_duration = Duration::default();
266-
entires.for_each(|(_msg, time)| {
267-
count += 1;
268-
total_duration += *time;
269-
});
270-
writeln!(out, "{}{:5}ms - {} ({} calls)", indent, total_duration.as_millis(), msg, count)
271-
.expect("printing profiling info to stdout");
272+
273+
let unaccounted_millis = (current.duration - accounted_for).as_millis();
274+
if !children_indices.is_empty()
275+
&& unaccounted_millis > 0
276+
&& unaccounted_millis > longer_than_millis
277+
{
278+
writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted_millis)
279+
.expect("printing profiling info");
272280
}
281+
}
273282

274-
if let Some(total) = total {
275-
if let Some(unaccounted) = total.checked_sub(accounted_for) {
276-
let unaccounted_millis = unaccounted.as_millis();
277-
if unaccounted_millis > longer_than.as_millis() && unaccounted_millis > 0 {
278-
writeln!(out, "{}{:5}ms - ???", indent, unaccounted_millis)
279-
.expect("printing profiling info to stdout");
280-
}
283+
/// Returns a mapping from an index in the `msgs` to the vector with the indices of its children.
284+
///
285+
/// This assumes that the entries in `msgs` are in the order of when the calls to `profile` finish.
286+
/// In other words, a postorder of the call graph. In particular, the root is the last element of
287+
/// `msgs`.
288+
fn idx_to_children(msgs: &[Message]) -> Vec<Vec<usize>> {
289+
// Initialize with the index of the root; `msgs` and `ancestors` should be never empty.
290+
assert!(!msgs.is_empty());
291+
let mut ancestors = vec![msgs.len() - 1];
292+
let mut result: Vec<Vec<usize>> = vec![vec![]; msgs.len()];
293+
for (idx, msg) in msgs[..msgs.len() - 1].iter().enumerate().rev() {
294+
// We need to find the parent of the current message, i.e., the last ancestor that has a
295+
// level lower than the current message.
296+
while msgs[*ancestors.last().unwrap()].level >= msg.level {
297+
ancestors.pop();
281298
}
299+
result[*ancestors.last().unwrap()].push(idx);
300+
ancestors.push(idx);
301+
}
302+
// Note that above we visited all children from the last to the first one. Let's reverse vectors
303+
// to get the more natural order where the first element is the first child.
304+
for vec in result.iter_mut() {
305+
vec.reverse();
282306
}
307+
result
283308
}
284309

285310
/// Prints backtrace to stderr, useful for debugging.
@@ -388,7 +413,7 @@ mod tests {
388413
Message { level: 1, duration: Duration::from_nanos(2), message: "bar".to_owned() },
389414
Message { level: 0, duration: Duration::from_millis(1), message: "foo".to_owned() },
390415
];
391-
print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1)));
416+
print(&msgs, Duration::from_millis(0), &mut result);
392417
// The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed
393418
// when printing.
394419
assert_eq!(
@@ -404,7 +429,7 @@ mod tests {
404429
Message { level: 1, duration: Duration::from_millis(2), message: "bar".to_owned() },
405430
Message { level: 0, duration: Duration::from_millis(5), message: "foo".to_owned() },
406431
];
407-
print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1)));
432+
print(&msgs, Duration::from_millis(0), &mut result);
408433
assert_eq!(
409434
std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
410435
vec![
@@ -426,7 +451,7 @@ mod tests {
426451
Message { level: 1, duration: Duration::from_millis(4), message: "bar".to_owned() },
427452
Message { level: 0, duration: Duration::from_millis(9), message: "foo".to_owned() },
428453
];
429-
print(0, &msgs, &mut result, Duration::from_millis(0), None);
454+
print(&msgs, Duration::from_millis(0), &mut result);
430455
assert_eq!(
431456
std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
432457
vec![

0 commit comments

Comments
 (0)