Skip to content

Commit 87a6287

Browse files
committed
Improve AFL prefix filter fuzzer with clustered keys and ping-pong
Use custom Arbitrary impls for keys and prefixes instead of raw Vec<u8>: - ClusteredKey: first byte from 0..8, length 1..=9, so keys naturally group into a small number of prefix buckets and filter lookups frequently hit real data - ClusteredPrefix: 1..=3 bytes from the same 0..8 alphabet, likely to match actual key prefixes - Key lengths 1..=9 with extractors up to length 4 means AFL naturally explores both in-domain and out-of-domain keys Also restores PrefixPingPong (bidirectional iterator stepping) which exercises iterator state machine bugs that forward-only scans miss.
1 parent f26d827 commit 87a6287

1 file changed

Lines changed: 149 additions & 29 deletions

File tree

fuzz/prefix_filter/src/main.rs

Lines changed: 149 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,85 @@ impl BpkChoice {
5959
}
6060
}
6161

62+
// ---------------------------------------------------------------------------
63+
// Clustered key/prefix types — small alphabet, bounded length
64+
// ---------------------------------------------------------------------------
65+
66+
/// A key with first byte drawn from a small alphabet (0..8) and bounded
67+
/// length (1..=9). This ensures keys cluster into a small number of prefix
68+
/// groups, so prefix scans and filter lookups frequently hit real data.
69+
#[derive(Debug, Clone)]
70+
struct ClusteredKey(Vec<u8>);
71+
72+
impl<'a> Arbitrary<'a> for ClusteredKey {
73+
fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result<Self> {
74+
let len: usize = u.int_in_range(1..=9)?;
75+
let first_byte: u8 = u.int_in_range(0..=7)?;
76+
let mut key = Vec::with_capacity(len);
77+
key.push(first_byte);
78+
for _ in 1..len {
79+
key.push(u8::arbitrary(u)?);
80+
}
81+
Ok(ClusteredKey(key))
82+
}
83+
}
84+
85+
/// A prefix with length 1..=3, each byte from the same small alphabet (0..8).
86+
/// Likely to match actual key prefixes since keys share the same first-byte space.
87+
#[derive(Debug, Clone)]
88+
struct ClusteredPrefix(Vec<u8>);
89+
90+
impl<'a> Arbitrary<'a> for ClusteredPrefix {
91+
fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result<Self> {
92+
let len: usize = u.int_in_range(1..=3)?;
93+
let mut prefix = Vec::with_capacity(len);
94+
for _ in 0..len {
95+
prefix.push(u.int_in_range(0..=7)?);
96+
}
97+
Ok(ClusteredPrefix(prefix))
98+
}
99+
}
100+
101+
// ---------------------------------------------------------------------------
102+
// Operations using clustered keys/prefixes
103+
// ---------------------------------------------------------------------------
104+
62105
#[derive(Arbitrary, Debug, Clone)]
63106
enum Op {
64-
Insert { key: Vec<u8>, value: Vec<u8> },
65-
Delete { key: Vec<u8> },
107+
Insert {
108+
key: ClusteredKey,
109+
value_len: u8,
110+
value_seed: u8,
111+
},
112+
Delete {
113+
key: ClusteredKey,
114+
},
66115
Flush,
67116
Compact,
68117
MajorCompact,
69-
Get { key: Vec<u8> },
70-
PrefixScan { prefix: Vec<u8> },
71-
PrefixScanRev { prefix: Vec<u8> },
72-
RangeScan { start: Vec<u8>, end: Vec<u8> },
73-
RangeScanRev { start: Vec<u8>, end: Vec<u8> },
118+
Get {
119+
key: ClusteredKey,
120+
},
121+
PrefixScan {
122+
prefix: ClusteredPrefix,
123+
},
124+
PrefixScanRev {
125+
prefix: ClusteredPrefix,
126+
},
127+
/// Bidirectional iterator stepping on a prefix scan.
128+
/// Each bool in `directions` controls: true = next_back, false = next.
129+
PrefixPingPong {
130+
prefix: ClusteredPrefix,
131+
directions: Vec<bool>,
132+
},
133+
RangeScan {
134+
start: ClusteredKey,
135+
end: ClusteredKey,
136+
},
137+
RangeScanRev {
138+
start: ClusteredKey,
139+
end: ClusteredKey,
140+
},
74141
Reopen,
75142
}
76143

@@ -110,6 +177,18 @@ fn open_tree(
110177
config.open().unwrap()
111178
}
112179

180+
fn ordered_range<'a>(a: &'a [u8], b: &'a [u8]) -> (&'a [u8], &'a [u8]) {
181+
if a <= b {
182+
(a, b)
183+
} else {
184+
(b, a)
185+
}
186+
}
187+
188+
fn make_value(len: u8, seed: u8) -> Vec<u8> {
189+
(0..len).map(|i| seed.wrapping_add(i)).collect()
190+
}
191+
113192
// ---------------------------------------------------------------------------
114193
// Oracle test: tree with prefix extractor vs tree without
115194
// ---------------------------------------------------------------------------
@@ -136,11 +215,13 @@ fn run_oracle_test(extractor: SharedPrefixExtractor, bloom_bpk: f32, ops: &[Op])
136215

137216
for (i, op) in ops.iter().enumerate() {
138217
match op {
139-
Op::Insert { key, value } => {
140-
// Skip empty keys (lsm-tree requires non-empty keys)
141-
if key.is_empty() {
142-
continue;
143-
}
218+
Op::Insert {
219+
key,
220+
value_len,
221+
value_seed,
222+
} => {
223+
let key = &key.0;
224+
let value = make_value(*value_len, *value_seed);
144225
let s1 = seqno_with.next();
145226
let s2 = seqno_without.next();
146227
tree_with.insert(key.clone(), value.clone(), s1);
@@ -150,9 +231,7 @@ fn run_oracle_test(extractor: SharedPrefixExtractor, bloom_bpk: f32, ops: &[Op])
150231
}
151232

152233
Op::Delete { key } => {
153-
if key.is_empty() {
154-
continue;
155-
}
234+
let key = &key.0;
156235
let s1 = seqno_with.next();
157236
let s2 = seqno_without.next();
158237
tree_with.remove(key.as_slice(), s1);
@@ -181,9 +260,7 @@ fn run_oracle_test(extractor: SharedPrefixExtractor, bloom_bpk: f32, ops: &[Op])
181260
}
182261

183262
Op::Get { key } => {
184-
if key.is_empty() {
185-
continue;
186-
}
263+
let key = &key.0;
187264
let s1 = vis_with.get();
188265
let s2 = vis_without.get();
189266
let r1 = tree_with.get(key.as_slice(), s1).unwrap();
@@ -192,6 +269,7 @@ fn run_oracle_test(extractor: SharedPrefixExtractor, bloom_bpk: f32, ops: &[Op])
192269
}
193270

194271
Op::PrefixScan { prefix } => {
272+
let prefix = &prefix.0;
195273
let s1 = vis_with.get();
196274
let s2 = vis_without.get();
197275
let a = collect_kv(tree_with.prefix(prefix.clone(), s1, None));
@@ -200,15 +278,57 @@ fn run_oracle_test(extractor: SharedPrefixExtractor, bloom_bpk: f32, ops: &[Op])
200278
}
201279

202280
Op::PrefixScanRev { prefix } => {
281+
let prefix = &prefix.0;
203282
let s1 = vis_with.get();
204283
let s2 = vis_without.get();
205284
let a = collect_kv(tree_with.prefix(prefix.clone(), s1, None).rev());
206285
let b = collect_kv(tree_without.prefix(prefix.clone(), s2, None).rev());
207286
assert_eq!(a, b, "op {i}: reverse prefix scan mismatch for {prefix:?}");
208287
}
209288

289+
Op::PrefixPingPong { prefix, directions } => {
290+
let prefix = &prefix.0;
291+
let s1 = vis_with.get();
292+
let s2 = vis_without.get();
293+
let mut iter_with = tree_with.prefix(prefix.clone(), s1, None);
294+
let mut iter_without = tree_without.prefix(prefix.clone(), s2, None);
295+
296+
for (j, &go_back) in directions.iter().enumerate() {
297+
let item_with = if go_back {
298+
iter_with.next_back()
299+
} else {
300+
iter_with.next()
301+
};
302+
let item_without = if go_back {
303+
iter_without.next_back()
304+
} else {
305+
iter_without.next()
306+
};
307+
308+
let kv_with = item_with.and_then(|g| g.into_inner().ok());
309+
let kv_without = item_without.and_then(|g| g.into_inner().ok());
310+
311+
match (&kv_with, &kv_without) {
312+
(Some((k1, v1)), Some((k2, v2))) => {
313+
assert_eq!(
314+
(k1.as_ref(), v1.as_ref()),
315+
(k2.as_ref(), v2.as_ref()),
316+
"op {i} step {j}: ping-pong mismatch for prefix {prefix:?}, go_back={go_back}",
317+
);
318+
}
319+
(None, None) => {}
320+
_ => {
321+
panic!(
322+
"op {i} step {j}: ping-pong length mismatch for prefix {prefix:?}: \
323+
with={kv_with:?}, without={kv_without:?}",
324+
);
325+
}
326+
}
327+
}
328+
}
329+
210330
Op::RangeScan { start, end } => {
211-
let (lo, hi) = ordered_range(start, end);
331+
let (lo, hi) = ordered_range(&start.0, &end.0);
212332
if lo == hi {
213333
continue;
214334
}
@@ -220,7 +340,7 @@ fn run_oracle_test(extractor: SharedPrefixExtractor, bloom_bpk: f32, ops: &[Op])
220340
}
221341

222342
Op::RangeScanRev { start, end } => {
223-
let (lo, hi) = ordered_range(start, end);
343+
let (lo, hi) = ordered_range(&start.0, &end.0);
224344
if lo == hi {
225345
continue;
226346
}
@@ -251,14 +371,6 @@ fn run_oracle_test(extractor: SharedPrefixExtractor, bloom_bpk: f32, ops: &[Op])
251371
}
252372
}
253373

254-
fn ordered_range<'a>(a: &'a [u8], b: &'a [u8]) -> (&'a [u8], &'a [u8]) {
255-
if a <= b {
256-
(a, b)
257-
} else {
258-
(b, a)
259-
}
260-
}
261-
262374
// ---------------------------------------------------------------------------
263375
// AFL entry point
264376
// ---------------------------------------------------------------------------
@@ -270,10 +382,18 @@ fn main() {
270382
return;
271383
};
272384

273-
// Limit op count so each iteration stays fast for AFL
385+
// Limit op count so each iteration stays fast for AFL.
386+
// Also cap PrefixPingPong directions to avoid very slow iterations.
274387
if input.ops.is_empty() || input.ops.len() > 200 {
275388
return;
276389
}
390+
for op in &input.ops {
391+
if let Op::PrefixPingPong { directions, .. } = op {
392+
if directions.len() > 50 {
393+
return;
394+
}
395+
}
396+
}
277397

278398
run_oracle_test(
279399
input.extractor.into_extractor(),

0 commit comments

Comments
 (0)