Skip to content

Commit 91c7d76

Browse files
committed
Shard by consecutive slices by default
Add --sharding={round-robin,slice} to control this. Fixes #546
1 parent cdba6d1 commit 91c7d76

File tree

6 files changed

+185
-26
lines changed

6 files changed

+185
-26
lines changed

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## Unreleased
44

5+
- New: `--sharding` option to control how mutants are distributed across multiple machines, with choices of `slice` or `round-robin`.
6+
7+
- Changed: The default sharding strategy is now `slice`; previously it was `round-robin`. Sliced sharding gives each worker better locality of reference due to testing changes to related packages, but may make the runtime more uneven between workers if some packages are slower to test than others.
8+
59
- Changed: Tree copying now attempts to use reflinks (copy-on-write) for faster copying on supported filesystems (Btrfs, XFS, APFS), with automatic fallback to regular copying.
610

711
- Book: Recommend using the `-Zunstable-options --fail-fast` argument to test targets to speed up mutation testing, on recent nightly toolchains.

book/src/shards.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ Note that the number of shards is set to match the `/8` in the `--shard` argumen
3333

3434
[Sharding works with `--baseline=skip`](baseline.md), to avoid the cost of running the baseline on every shard. But, if you do this, then you must ensure that the tests suite is passing in the baseline, for example by checking it in a previous CI step.
3535

36+
## Sharding algorithm
37+
38+
The `--sharding` command line and config option controls the algorithm by which mutants are distributed across shards.
39+
40+
* `slice` (the default): The first `n / k` mutants are assigned to shard 0, and so on. Because each shard successively builds related versions of the code, incremental builds may be faster, particularly in trees with many packages.
41+
42+
* `round-robin`: Mutant `i` is assigned to shard `i % n`. This distributes the mutants evenly across shards and is likely to cause shards to finish at similar times.
43+
3644
## Performance of sharding
3745

3846
Each mutant does some constant upfront work:

src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ fn main() -> Result<()> {
560560
};
561561
}
562562
if let Some(shard) = &args.shard {
563-
mutants = shard.select(mutants);
563+
mutants = options.sharding().shard(*shard, mutants);
564564
}
565565
if args.list {
566566
print!("{}", list_mutants(&mutants, &options));

src/options.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use crate::annotation::ResolvedAnnotation;
2828
use crate::config::Config;
2929
use crate::glob::build_glob_set;
3030
use crate::mutant::Mutant;
31+
use crate::shard::Sharding;
3132
use crate::{Args, BaselineStrategy, Phase, Result, ValueEnum};
3233

3334
/// Options for mutation testing, based on both command-line arguments and the config file.
@@ -180,16 +181,22 @@ pub struct Common {
180181
/// Tool used to run test suites: cargo or nextest.
181182
#[arg(long, help_heading = "Execution")]
182183
pub test_tool: Option<TestTool>,
184+
185+
/// Sharding method to use when running with --shards.
186+
#[arg(long, help_heading = "Execution", requires = "shard")]
187+
pub sharding: Option<Sharding>,
183188
}
184189

185190
impl Common {
186191
/// Merge two `MoreOptions`, taking values from self first.
187192
///
188193
/// Scalar values are taken from self if present, otherwise from other.
194+
#[allow(clippy::trivially_copy_pass_by_ref)] // will be bigger later
189195
pub fn merge(&self, other: &Common) -> Common {
190196
Common {
191197
emit_diffs: self.emit_diffs.or(other.emit_diffs),
192198
test_tool: self.test_tool.or(other.test_tool),
199+
sharding: self.sharding.or(other.sharding),
193200
}
194201
}
195202
}
@@ -473,6 +480,10 @@ impl Options {
473480
pub fn test_tool(&self) -> TestTool {
474481
self.common.test_tool.unwrap_or_default()
475482
}
483+
484+
pub fn sharding(&self) -> Sharding {
485+
self.common.sharding.unwrap_or_default()
486+
}
476487
}
477488

478489
/// If the first slices is non-empty, return that, otherwise the second.
@@ -506,6 +517,8 @@ mod test {
506517
assert!(options.common.test_tool.is_none());
507518
assert_eq!(options.test_tool(), TestTool::Cargo);
508519
assert!(!options.cap_lints);
520+
assert!(options.common.sharding.is_none());
521+
assert_eq!(options.sharding(), Sharding::Slice);
509522
}
510523

511524
#[test]
@@ -1132,4 +1145,37 @@ mod test {
11321145
let options = Options::new(&args, &config).unwrap();
11331146
assert!(!options.copy_vcs);
11341147
}
1148+
1149+
#[test]
1150+
fn sharding() {
1151+
let args = Args::try_parse_from(["mutants", "--sharding=slice", "--shard=0/10"]).unwrap();
1152+
let config = Config::default();
1153+
let options = Options::new(&args, &config).unwrap();
1154+
assert_eq!(options.sharding(), Sharding::Slice);
1155+
1156+
let args = Args::parse_from(["mutants", "--sharding=round-robin", "--shard=0/10"]);
1157+
let config = Config::default();
1158+
let options = Options::new(&args, &config).unwrap();
1159+
assert_eq!(options.sharding(), Sharding::RoundRobin);
1160+
1161+
let args = Args::parse_from(["mutants"]);
1162+
let config = Config::from_str(r#"sharding = "slice""#).unwrap();
1163+
let options = Options::new(&args, &config).unwrap();
1164+
assert_eq!(options.sharding(), Sharding::Slice);
1165+
1166+
let args = Args::parse_from(["mutants"]);
1167+
let config = Config::from_str(r#"sharding = "round-robin""#).unwrap();
1168+
dbg!(&config);
1169+
let options = Options::new(&args, &config).unwrap();
1170+
assert_eq!(options.sharding(), Sharding::RoundRobin);
1171+
1172+
let args = Args::parse_from(["mutants"]);
1173+
let config = Config::from_str("").unwrap();
1174+
let options = Options::new(&args, &config).unwrap();
1175+
assert_eq!(
1176+
options.sharding(),
1177+
Sharding::Slice,
1178+
"Default sharding should be slice"
1179+
);
1180+
}
11351181
}

src/shard.rs

Lines changed: 70 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
// Copyright 2023 Martin Pool
2-
31
//! Sharding parameters.
42
53
use std::str::FromStr;
64

75
use anyhow::{anyhow, ensure, Context, Error};
6+
use clap::ValueEnum;
7+
use schemars::JsonSchema;
8+
use serde::Deserialize;
89

910
/// Select mutants for a particular shard of the total list.
1011
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
@@ -15,17 +16,6 @@ pub struct Shard {
1516
pub n: usize,
1617
}
1718

18-
impl Shard {
19-
/// Select the mutants that should be run for this shard.
20-
pub fn select<M, I: IntoIterator<Item = M>>(&self, mutants: I) -> Vec<M> {
21-
mutants
22-
.into_iter()
23-
.enumerate()
24-
.filter_map(|(i, m)| if i % self.n == self.k { Some(m) } else { None })
25-
.collect()
26-
}
27-
}
28-
2919
impl FromStr for Shard {
3020
type Err = Error;
3121

@@ -38,6 +28,57 @@ impl FromStr for Shard {
3828
}
3929
}
4030

31+
/// Method for sharding.
32+
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Deserialize, ValueEnum, JsonSchema)]
33+
#[serde(rename_all = "kebab-case")] // consistent with Clap default
34+
pub enum Sharding {
35+
/// Run mutant `i` on shard `i % k`.
36+
///
37+
/// This was the default up to cargo-mutants 25.3.1.
38+
///
39+
/// This distributes mutants more evenly and will likely generate more equal completion
40+
/// times, but it has less locality of reference within each shard's cache and so may
41+
/// cause more build time.
42+
RoundRobin,
43+
44+
/// Run consecutive ranges of mutants on each shard: the first `n/k` on shard 0, etc. (default)
45+
///
46+
/// This makes the incremental change between each build likely to be smaller and
47+
/// so may reduce build time, but it may also lead to more unbalanced shards.
48+
#[default]
49+
Slice,
50+
}
51+
52+
impl Sharding {
53+
/// Select the mutants that should be run for this shard.
54+
pub fn shard<M>(self, shard: Shard, mut mutants: Vec<M>) -> Vec<M> {
55+
match self {
56+
Sharding::RoundRobin => mutants
57+
.into_iter()
58+
.enumerate()
59+
.filter_map(|(i, m)| {
60+
if i % shard.n == shard.k {
61+
Some(m)
62+
} else {
63+
None
64+
}
65+
})
66+
.collect(),
67+
Sharding::Slice => {
68+
let total = mutants.len();
69+
let chunk_size = total.div_ceil(shard.n);
70+
let start = shard.k * chunk_size;
71+
let end = ((shard.k + 1) * chunk_size).min(total);
72+
if start >= total {
73+
Vec::new()
74+
} else {
75+
mutants.drain(start..end).collect()
76+
}
77+
}
78+
}
79+
}
80+
}
81+
4182
#[cfg(test)]
4283
mod tests {
4384
use super::*;
@@ -74,10 +115,21 @@ mod tests {
74115
}
75116

76117
#[test]
77-
fn shard_select() {
78-
assert_eq!(
79-
Shard::from_str("1/4").unwrap().select(0..10).as_slice(),
80-
&[1, 5, 9]
81-
);
118+
fn shard_round_robin() {
119+
// This test works on ints instead of real mutants just for ease of testing.
120+
let fake_mutants: Vec<usize> = (0..10).collect();
121+
for (k, expect) in [
122+
(0, [0, 4, 8].as_slice()),
123+
(1, &[1, 5, 9]),
124+
(2, &[2, 6]),
125+
(3, &[3, 7]),
126+
] {
127+
assert_eq!(
128+
Sharding::RoundRobin
129+
.shard(Shard { k, n: 4 }, fake_mutants.clone())
130+
.as_slice(),
131+
expect
132+
);
133+
}
82134
}
83135
}

tests/shard.rs

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,16 @@ fn shard_divides_all_mutants() {
2828
.collect_vec();
2929

3030
let n_shards = 5;
31-
let shard_lists = (0..n_shards)
31+
let rr_shard_lists = (0..n_shards)
3232
.map(|k| {
3333
String::from_utf8(
3434
run()
3535
.args(common_args)
36-
.args(["--shard", &format!("{k}/{n_shards}")])
36+
.args([
37+
"--shard",
38+
&format!("{k}/{n_shards}"),
39+
"--sharding=round-robin",
40+
])
3741
.assert()
3842
.success()
3943
.get_output()
@@ -53,28 +57,73 @@ fn shard_divides_all_mutants() {
5357
// If we had a bug where we shuffled before sharding, then the shards would
5458
// see inconsistent lists and this test would fail in at least some attempts.
5559
assert_eq!(
56-
shard_lists.iter().flatten().sorted().collect_vec(),
60+
rr_shard_lists.iter().flatten().sorted().collect_vec(),
5761
full_list.iter().sorted().collect_vec()
5862
);
5963

6064
// The shards should also be approximately the same size.
61-
let shard_lens = shard_lists.iter().map(|l| l.len()).collect_vec();
65+
let shard_lens = rr_shard_lists.iter().map(|l| l.len()).collect_vec();
6266
assert!(shard_lens.iter().max().unwrap() - shard_lens.iter().min().unwrap() <= 1);
6367

6468
// And the shards are disjoint
6569
for i in 0..n_shards {
6670
for j in 0..n_shards {
6771
if i != j {
6872
assert!(
69-
shard_lists[i].iter().all(|m| !shard_lists[j].contains(m)),
73+
rr_shard_lists[i]
74+
.iter()
75+
.all(|m| !rr_shard_lists[j].contains(m)),
7076
"shard {} contains {}",
7177
j,
72-
shard_lists[j]
78+
rr_shard_lists[j]
7379
.iter()
74-
.filter(|m| shard_lists[i].contains(m))
80+
.filter(|m| rr_shard_lists[j].contains(m))
7581
.join(", ")
7682
);
7783
}
7884
}
7985
}
86+
87+
// If you reassemble the round-robin shards in order, you get the original order back.
88+
//
89+
// To do so: cycle around the list of shards, taking one from each shard in order, until
90+
// we get to the end of any list.
91+
let mut reassembled = Vec::new();
92+
let mut rr_iters = rr_shard_lists
93+
.clone()
94+
.into_iter()
95+
.map(|l| l.into_iter())
96+
.collect_vec();
97+
let mut i = 0;
98+
let mut limit = 0;
99+
for name in rr_iters[i].by_ref() {
100+
reassembled.push(name);
101+
i = (i + 1) % n_shards;
102+
limit += 1;
103+
assert!(limit < full_list.len() * 2, "too many iterations");
104+
}
105+
106+
// Check with slice sharding, the new default
107+
let slice_shard_lists = (0..n_shards)
108+
.map(|k| {
109+
String::from_utf8(
110+
run()
111+
.args(common_args)
112+
.args([&format!("--shard={k}/{n_shards}")]) // "--sharding=slice"
113+
.assert()
114+
.success()
115+
.get_output()
116+
.stdout
117+
.clone(),
118+
)
119+
.unwrap()
120+
.lines()
121+
.map(ToOwned::to_owned)
122+
.collect_vec()
123+
})
124+
.collect_vec();
125+
126+
// These can just be concatenated
127+
let slice_reassembled = slice_shard_lists.into_iter().flatten().collect_vec();
128+
assert_eq!(slice_reassembled, full_list);
80129
}

0 commit comments

Comments
 (0)