Skip to content

Commit 0ca7856

Browse files
committed
part-bench: allow fine grained thread counts
It's about time
1 parent 1df4e6a commit 0ca7856

File tree

2 files changed

+89
-16
lines changed

2 files changed

+89
-16
lines changed

tools/doc/mesh-part.1.scd

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,27 @@ Options specific to *mesh-part*:
6161

6262
Options specific to *part-bench*:
6363

64-
*-e, --efficiency*
64+
*-e, --efficiency* [threads]
6565
Measure strong scaling by running the algorithm with different amounts of
6666
threads.
6767

68+
By default, part-bench starts at 1 thread, then doubles the thread count
69+
until it exceeds the number of available hardware threads.
70+
71+
You can specify arbitrary thread counts in the following manner:
72+
73+
```
74+
threads = range *( , range )
75+
range = VALUE / ( FROM : TO ) / ( FROM : TO : STEP )
76+
```
77+
78+
For example, the following invocation will run the algorithms for 1 thread,
79+
then 2, 6, 10, ... to 64, then 72, 80, 88, ... to 256.
80+
81+
part-bench -e 1,2:64:4,64:256:8,256
82+
83+
Ranges are exclusive.
84+
6885
*-b, --baseline* <name>
6986
*-s, --save-baseline* <name>
7087
Compare against a named baseline. If *--save-baseline* is specified, the

tools/src/bin/part-bench.rs

Lines changed: 71 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,46 @@ use mesh_io::Mesh;
1313
use std::env;
1414
use std::fs;
1515
use std::io;
16+
use std::iter;
17+
use std::ops::Range;
1618
use std::thread::sleep;
1719
use std::time::Duration;
1820

1921
const USAGE: &str = "Usage: part-bench [options]";
2022

23+
struct RangeWithStep {
24+
range: Range<usize>,
25+
step_by: usize,
26+
}
27+
28+
fn parse_ranges(s: &str) -> Result<Vec<RangeWithStep>> {
29+
s.split(',')
30+
.map(|range_str| {
31+
let components: Vec<&str> = range_str.split(':').collect();
32+
let (range, step_by) = match *components {
33+
[] => anyhow::bail!("missing range definition"),
34+
[value] => {
35+
let value = value.parse()?;
36+
(value..value + 1, 1)
37+
}
38+
[from, to] => (from.parse()?..to.parse()?, 1),
39+
[from, to, step_by] => (from.parse()?..to.parse()?, step_by.parse()?),
40+
_ => anyhow::bail!("excessive arguments"),
41+
};
42+
Ok(RangeWithStep { range, step_by })
43+
})
44+
.collect()
45+
}
46+
47+
impl IntoIterator for RangeWithStep {
48+
type Item = usize;
49+
type IntoIter = std::iter::StepBy<Range<usize>>;
50+
51+
fn into_iter(self) -> Self::IntoIter {
52+
self.range.step_by(self.step_by)
53+
}
54+
}
55+
2156
fn criterion_options(options: &mut getopts::Options) {
2257
// TODO use Criterion::configure_with_args when it respects POSIX's "--"
2358
// TODO more options if needed
@@ -94,6 +129,40 @@ fn build_pool(thread_count: usize) -> rayon::ThreadPool {
94129
.unwrap()
95130
}
96131

132+
fn measure_efficiency(
133+
c: &mut Criterion,
134+
benchmark_name: String,
135+
thread_counts: Option<String>,
136+
mut benchmark: impl FnMut() + Send,
137+
) -> Result<()> {
138+
let mut g = c.benchmark_group(benchmark_name);
139+
140+
let thread_counts: Box<dyn Iterator<Item = usize>> = match thread_counts {
141+
Some(s) => {
142+
let ranges = parse_ranges(&s).context("failed to parse the value of -e")?;
143+
Box::new(ranges.into_iter().flatten())
144+
}
145+
None => {
146+
let max_threads = rayon::current_num_threads();
147+
let it = iter::successors(Some(1), move |t| (t * 2 <= max_threads).then(|| t * 2));
148+
Box::new(it)
149+
}
150+
};
151+
let mut thread_counts = thread_counts.peekable();
152+
153+
while let Some(thread_count) = thread_counts.next() {
154+
let pool = build_pool(thread_count);
155+
let benchmark_name = format!("threads={thread_count}");
156+
g.bench_function(&benchmark_name, |b| pool.install(|| b.iter(&mut benchmark)));
157+
if thread_counts.peek().is_some() {
158+
println!("Waiting 4s for CPUs to cool down...");
159+
sleep(Duration::from_secs(4));
160+
}
161+
}
162+
163+
Ok(())
164+
}
165+
97166
fn main_d<const D: usize>(
98167
matches: getopts::Matches,
99168
edge_weights: coupe_tools::EdgeWeightDistribution,
@@ -163,20 +232,7 @@ where
163232
}
164233
};
165234
if matches.opt_present("e") {
166-
let max_threads = rayon::current_num_threads();
167-
let mut g = c.benchmark_group(benchmark_name);
168-
let mut thread_count = 1;
169-
loop {
170-
let pool = build_pool(thread_count);
171-
let benchmark_name = format!("threads={thread_count}");
172-
g.bench_function(&benchmark_name, |b| pool.install(|| b.iter(&mut benchmark)));
173-
thread_count *= 2;
174-
if max_threads < thread_count {
175-
break;
176-
}
177-
println!("Waiting 4s for CPUs to cool down...");
178-
sleep(Duration::from_secs(4));
179-
}
235+
measure_efficiency(&mut c, benchmark_name, matches.opt_str("e"), benchmark)?;
180236
} else {
181237
c.bench_function(&benchmark_name, |b| b.iter(&mut benchmark));
182238
}
@@ -196,7 +252,7 @@ fn main() -> Result<()> {
196252
"name of the algorithm to run, see ALGORITHMS",
197253
"NAME",
198254
);
199-
options.optflag("e", "efficiency", "Benchmark efficiency");
255+
options.optflagopt("e", "efficiency", "Benchmark efficiency", "THREADS");
200256
options.optopt(
201257
"E",
202258
"edge-weights",

0 commit comments

Comments
 (0)