Skip to content

Commit 39cbb6b

Browse files
committed
Rewrite ra_prof's profile printing
This changes the way we print things to first construct a mapping from events to the children and uses that mapping to actually print things. It should not change the actual output that we produce. The new approach two benefits: * It avoids a potential quadratic behavior of the previous approach. For instance, for a vector of N elements: ``` [Message{level: (N - 1)}, ..., Message{level: 1}, Message{level: 0}] ``` we would first do a linear scan to find entry with level 0, then another scan to find one with level 1, etc. * It makes it much easier to improve the output in the future, because we now pre-compute the children for each entry and can easily take that into account when printing. Signed-off-by: Michal Terepeta <[email protected]>
1 parent 9b1465a commit 39cbb6b

File tree

1 file changed

+71
-46
lines changed

1 file changed

+71
-46
lines changed

crates/ra_prof/src/lib.rs

Lines changed: 71 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ mod google_cpu_profiler;
66

77
use std::{
88
cell::RefCell,
9+
collections::BTreeMap,
910
collections::HashSet,
1011
io::{stderr, Write},
11-
iter::repeat,
1212
mem,
1313
sync::{
1414
atomic::{AtomicBool, Ordering},
@@ -17,7 +17,6 @@ use std::{
1717
time::{Duration, Instant},
1818
};
1919

20-
use itertools::Itertools;
2120
use once_cell::sync::Lazy;
2221

2322
pub use crate::memory_usage::{Bytes, MemoryUsage};
@@ -201,7 +200,7 @@ impl Drop for Profiler {
201200
// (otherwise we could print `0ms` despite user's `>0` filter when
202201
// `duration` is just a few nanos).
203202
if duration.as_millis() > longer_than.as_millis() {
204-
print(0, &stack.messages, &mut stdout.lock(), longer_than, None);
203+
print(&stack.messages, longer_than, &mut stdout.lock());
205204
}
206205
stack.messages.clear();
207206
}
@@ -212,59 +211,85 @@ impl Drop for Profiler {
212211
}
213212
}
214213

215-
fn print(
216-
lvl: usize,
217-
msgs: &[Message],
218-
out: &mut impl Write,
219-
longer_than: Duration,
220-
total: Option<Duration>,
221-
) {
214+
fn print(msgs: &[Message], longer_than: Duration, out: &mut impl Write) {
222215
if msgs.is_empty() {
223216
return;
224217
}
225-
// The index of the first element that will be included in the slice when we recurse.
226-
let mut next_start = 0;
227-
let indent = repeat(" ").take(lvl).collect::<String>();
228-
// We output hierarchy for long calls, but sum up all short calls
229-
let mut short = Vec::new();
218+
let children_map = idx_to_children(msgs);
219+
let root_idx = msgs.len() - 1;
220+
print_for_idx(root_idx, &children_map, msgs, longer_than, out);
221+
}
222+
223+
fn print_for_idx(
224+
current_idx: usize,
225+
children_map: &[Vec<usize>],
226+
msgs: &[Message],
227+
longer_than: Duration,
228+
out: &mut impl Write,
229+
) {
230+
let current = &msgs[current_idx];
231+
let current_indent = " ".repeat(current.level);
232+
writeln!(out, "{}{:5}ms - {}", current_indent, current.duration.as_millis(), current.message)
233+
.expect("printing profiling info");
234+
235+
let longer_than_millis = longer_than.as_millis();
236+
let children_indices = &children_map[current_idx];
230237
let mut accounted_for = Duration::default();
231-
for (i, &Message { level, duration, message: ref msg }) in msgs.iter().enumerate() {
232-
if level != lvl {
233-
continue;
234-
}
235-
accounted_for += duration;
236-
if duration.as_millis() > longer_than.as_millis() {
237-
writeln!(out, "{}{:5}ms - {}", indent, duration.as_millis(), msg)
238-
.expect("printing profiling info to stdout");
238+
let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
239239

240-
print(lvl + 1, &msgs[next_start..i], out, longer_than, Some(duration));
240+
for child_idx in children_indices.iter() {
241+
let child = &msgs[*child_idx];
242+
if child.duration.as_millis() > longer_than_millis {
243+
print_for_idx(*child_idx, children_map, msgs, longer_than, out);
241244
} else {
242-
short.push((msg, duration))
245+
let pair = short_children.entry(&child.message).or_insert((Duration::default(), 0));
246+
pair.0 += child.duration;
247+
pair.1 += 1;
243248
}
249+
accounted_for += child.duration;
250+
}
244251

245-
next_start = i + 1;
252+
for (child_msg, (duration, count)) in short_children.iter() {
253+
let millis = duration.as_millis();
254+
writeln!(out, " {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count)
255+
.expect("printing profiling info");
246256
}
247-
short.sort_by_key(|(msg, _time)| *msg);
248-
for (msg, entires) in short.iter().group_by(|(msg, _time)| msg).into_iter() {
249-
let mut count = 0;
250-
let mut total_duration = Duration::default();
251-
entires.for_each(|(_msg, time)| {
252-
count += 1;
253-
total_duration += *time;
254-
});
255-
writeln!(out, "{}{:5}ms - {} ({} calls)", indent, total_duration.as_millis(), msg, count)
256-
.expect("printing profiling info to stdout");
257+
258+
let unaccounted_millis = (current.duration - accounted_for).as_millis();
259+
if !children_indices.is_empty()
260+
&& unaccounted_millis > 0
261+
&& unaccounted_millis > longer_than_millis
262+
{
263+
writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted_millis)
264+
.expect("printing profiling info");
257265
}
266+
}
258267

259-
if let Some(total) = total {
260-
if let Some(unaccounted) = total.checked_sub(accounted_for) {
261-
let unaccounted_millis = unaccounted.as_millis();
262-
if unaccounted_millis > longer_than.as_millis() && unaccounted_millis > 0 {
263-
writeln!(out, "{}{:5}ms - ???", indent, unaccounted_millis)
264-
.expect("printing profiling info to stdout");
265-
}
268+
/// Returns a mapping from an index in the `msgs` to the vector with the indices of its children.
269+
///
270+
/// This assumes that the entries in `msgs` are in the order of when the calls to `profile` finish.
271+
/// In other words, a postorder of the call graph. In particular, the root is the last element of
272+
/// `msgs`.
273+
fn idx_to_children(msgs: &[Message]) -> Vec<Vec<usize>> {
274+
// Initialize with the index of the root; `ancestors` should be never empty.
275+
let mut ancestors = vec![msgs.len() - 1];
276+
let mut result: Vec<Vec<usize>> = vec![];
277+
result.resize_with(msgs.len(), Default::default);
278+
for (idx, msg) in msgs[..msgs.len() - 1].iter().enumerate().rev() {
279+
// We need to find the parent of the current message, i.e., the last ancestor that has a
280+
// level lower than the current message.
281+
while msgs[*ancestors.last().unwrap()].level >= msg.level {
282+
ancestors.pop();
266283
}
284+
result[*ancestors.last().unwrap()].push(idx);
285+
ancestors.push(idx);
286+
}
287+
// Note that above we visited all children from the last to the first one. Let's reverse vectors
288+
// to get the more natural order where the first element is the first child.
289+
for vec in result.iter_mut() {
290+
vec.reverse();
267291
}
292+
result
268293
}
269294

270295
/// Prints backtrace to stderr, useful for debugging.
@@ -373,7 +398,7 @@ mod tests {
373398
Message { level: 1, duration: Duration::from_nanos(2), message: "bar".to_owned() },
374399
Message { level: 0, duration: Duration::from_millis(1), message: "foo".to_owned() },
375400
];
376-
print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1)));
401+
print(&msgs, Duration::from_millis(0), &mut result);
377402
// The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed
378403
// when printing.
379404
assert_eq!(
@@ -389,7 +414,7 @@ mod tests {
389414
Message { level: 1, duration: Duration::from_millis(2), message: "bar".to_owned() },
390415
Message { level: 0, duration: Duration::from_millis(5), message: "foo".to_owned() },
391416
];
392-
print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1)));
417+
print(&msgs, Duration::from_millis(0), &mut result);
393418
assert_eq!(
394419
std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
395420
vec![
@@ -411,7 +436,7 @@ mod tests {
411436
Message { level: 1, duration: Duration::from_millis(4), message: "bar".to_owned() },
412437
Message { level: 0, duration: Duration::from_millis(9), message: "foo".to_owned() },
413438
];
414-
print(0, &msgs, &mut result, Duration::from_millis(0), None);
439+
print(&msgs, Duration::from_millis(0), &mut result);
415440
assert_eq!(
416441
std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
417442
vec![

0 commit comments

Comments
 (0)