Skip to content

Commit 70d77fb

Browse files
committed
Add support for defining custom mutators
This adds support for defining custom mutators, as described in https://github.com/google/fuzzing/blob/master/docs/structure-aware-fuzzing.md
1 parent b882dd6 commit 70d77fb

File tree

5 files changed

+294
-0
lines changed

5 files changed

+294
-0
lines changed

Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,9 @@ arbitrary-derive = ["arbitrary/derive"]
2121
members = [
2222
"./example",
2323
"./example_arbitrary",
24+
"./example_mutator",
2425
]
26+
27+
[dev-dependencies]
28+
flate2 = "1.0.20"
29+
rand = "0.8.3"

ci/script.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,18 @@ RUST_LIBFUZZER_DEBUG_PATH=$(pwd)/debug_output \
3939
cat $(pwd)/debug_output
4040
grep -q Rgb $(pwd)/debug_output
4141
popd
42+
43+
pushd ./example_mutator
44+
cargo rustc \
45+
--release \
46+
-- \
47+
-Cpasses='sancov' \
48+
-Cllvm-args=-sanitizer-coverage-level=3 \
49+
-Cllvm-args=-sanitizer-coverage-trace-compares \
50+
-Cllvm-args=-sanitizer-coverage-inline-8bit-counters \
51+
-Cllvm-args=-sanitizer-coverage-stack-depth \
52+
-Cllvm-args=-sanitizer-coverage-trace-geps \
53+
-Cllvm-args=-sanitizer-coverage-prune-blocks=0 \
54+
-Zsanitizer=address
55+
(! $CARGO_TARGET_DIR/release/example_mutator -runs=10000000)
56+
popd

example_mutator/Cargo.toml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[package]
2+
name = "example_mutator"
3+
version = "0.1.0"
4+
authors = ["Nick Fitzgerald <[email protected]>"]
5+
edition = "2018"
6+
7+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
8+
9+
[dependencies]
10+
flate2 = "1.0.20"
11+
libfuzzer-sys = { path = ".." }

example_mutator/src/main.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#![no_main]
2+
3+
use flate2::{read::GzDecoder, write::GzEncoder, Compression};
4+
use libfuzzer_sys::{fuzz_mutator, fuzz_target};
5+
use std::io::{Read, Write};
6+
7+
fuzz_target!(|data: &[u8]| {
8+
// Decompress the input data and crash if it starts with "boom".
9+
if let Some(data) = decompress(data) {
10+
if data.starts_with(b"boom") {
11+
panic!();
12+
}
13+
}
14+
});
15+
16+
fuzz_mutator!(
17+
|data: &mut [u8], size: usize, max_size: usize, _seed: u32| {
18+
// Decompress the input data. If that fails, use a dummy value.
19+
let mut decompressed = decompress(&data[..size]).unwrap_or_else(|| b"hi".to_vec());
20+
21+
// Mutate the decompressed data with `libFuzzer`'s default mutator. Make
22+
// the `decompressed` vec's extra capacity available for insertion
23+
// mutations via `resize`.
24+
let len = decompressed.len();
25+
let cap = decompressed.capacity();
26+
decompressed.resize(cap, 0);
27+
let new_decompressed_size = libfuzzer_sys::fuzzer_mutate(&mut decompressed, len);
28+
29+
// Recompress the mutated data.
30+
let compressed = compress(&decompressed[..new_decompressed_size]);
31+
32+
// Copy the recompressed mutated data into `data` and return the new size.
33+
let new_size = std::cmp::min(max_size, compressed.len());
34+
data[..new_size].copy_from_slice(&compressed[..new_size]);
35+
new_size
36+
}
37+
);
38+
39+
fn decompress(data: &[u8]) -> Option<Vec<u8>> {
40+
let mut decoder = GzDecoder::new(data);
41+
let mut decompressed = Vec::new();
42+
if decoder.read_to_end(&mut decompressed).is_ok() {
43+
Some(decompressed)
44+
} else {
45+
None
46+
}
47+
}
48+
49+
fn compress(data: &[u8]) -> Vec<u8> {
50+
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
51+
encoder
52+
.write_all(data)
53+
.expect("writing into a vec is infallible");
54+
encoder.finish().expect("writing into a vec is infallible")
55+
}

src/lib.rs

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ extern "C" {
1717
// We do not actually cross the FFI bound here.
1818
#[allow(improper_ctypes)]
1919
fn rust_fuzzer_test_input(input: &[u8]);
20+
21+
fn LLVMFuzzerMutate(data: *mut u8, size: usize, max_size: usize) -> usize;
2022
}
2123

2224
#[doc(hidden)]
@@ -188,3 +190,209 @@ macro_rules! fuzz_target {
188190
}
189191
};
190192
}
193+
194+
/// Define a custom mutator.
195+
///
196+
/// This is optional, and libFuzzer will use its own, default mutation strategy
197+
/// if this is not provided.
198+
///
199+
/// You might consider using a custom mutator when your fuzz target is very
200+
/// particular about the shape of its input:
201+
///
202+
/// * You want to fuzz "deeper" than just the parser.
203+
/// * The input contains checksums that have to match the hash of some subset of
204+
/// the data or else the whole thing is invalid, and therefore mutating any of
205+
/// that subset means you need to recompute the checksums.
206+
/// * Small random changes to the input buffer make it invalid.
207+
///
208+
/// That is, a custom mutator is useful in similar situations where [a `T:
209+
/// Arbitrary` input type](macro.fuzz_target.html#arbitrary-input-types) is
210+
/// useful. Note that the two approaches are not mutually exclusive; you can use
211+
/// whichever is easier for your problem domain or both!
212+
///
213+
/// ## Implementation Contract
214+
///
215+
/// The original, unmodified input is given in `data[..size]`.
216+
///
217+
/// You must modify the data in place and return the new size.
218+
///
219+
/// The new size should not be greater than `max_size`. If this is not the case,
220+
/// then the `data` will be truncated to fit within `max_size`. Note that
221+
/// `max_size < size` is possible when shrinking test cases.
222+
///
223+
/// You must produce the same mutation given the same `seed`. Generally, when
224+
/// choosing what kind of mutation to make or where to mutate, you should start
225+
/// by creating a random number generator (RNG) that is seeded with the given
226+
/// `seed` and then consult the RNG whenever making a decision:
227+
///
228+
/// ```no_run
229+
/// #![no_main]
230+
///
231+
/// use rand::{rngs::StdRng, Rng, SeedableRng};
232+
///
233+
/// libfuzzer_sys::fuzz_mutator!(|data: &mut [u8], size: usize, max_size: usize, seed: u32| {
234+
/// let mut rng = StdRng::seed_from_u64(seed as u64);
235+
///
236+
/// # let first_mutation = |_, _, _, _| todo!();
237+
/// # let second_mutation = |_, _, _, _| todo!();
238+
/// # let third_mutation = |_, _, _, _| todo!();
239+
/// # let fourth_mutation = |_, _, _, _| todo!();
240+
/// // Choose which of our four supported kinds of mutations we want to make.
241+
/// match rng.gen_range(0..4) {
242+
/// 0 => first_mutation(rng, data, size, max_size),
243+
/// 1 => second_mutation(rng, data, size, max_size),
244+
/// 2 => third_mutation(rng, data, size, max_size),
245+
/// 3 => fourth_mutation(rng, data, size, max_size),
246+
/// _ => unreachable!()
247+
/// }
248+
/// });
249+
/// ```
250+
///
251+
/// ## Example: Compression
252+
///
253+
/// Consider a simple fuzz target that takes compressed data as input,
254+
/// decompresses it, and then asserts that the decompressed data doesn't begin
255+
/// with "boom". It is difficult for `libFuzzer` (or any other fuzzer) to crash
256+
/// this fuzz target because nearly all mutations it makes will invalidate the
257+
/// compression format. Therefore, we use a custom mutator that decompresses the
258+
/// raw input, mutates the decompressed data, and then recompresses it. This
259+
/// allows `libFuzzer` to quickly discover crashing inputs.
260+
///
261+
/// ```no_run
262+
/// #![no_main]
263+
///
264+
/// use flate2::{read::GzDecoder, write::GzEncoder, Compression};
265+
/// use libfuzzer_sys::{fuzz_mutator, fuzz_target};
266+
/// use std::io::{Read, Write};
267+
///
268+
/// fuzz_target!(|data: &[u8]| {
269+
/// // Decompress the input data and crash if it starts with "boom".
270+
/// if let Some(data) = decompress(data) {
271+
/// if data.starts_with(b"boom") {
272+
/// panic!();
273+
/// }
274+
/// }
275+
/// });
276+
///
277+
/// fuzz_mutator!(
278+
/// |data: &mut [u8], size: usize, max_size: usize, _seed: u32| {
279+
/// // Decompress the input data. If that fails, use a dummy value.
280+
/// let mut decompressed = decompress(&data[..size]).unwrap_or_else(|| b"hi".to_vec());
281+
///
282+
/// // Mutate the decompressed data with `libFuzzer`'s default mutator. Make
283+
/// // the `decompressed` vec's extra capacity available for insertion
284+
/// // mutations via `resize`.
285+
/// let len = decompressed.len();
286+
/// let cap = decompressed.capacity();
287+
/// decompressed.resize(cap, 0);
288+
/// let new_decompressed_size = libfuzzer_sys::fuzzer_mutate(&mut decompressed, len);
289+
///
290+
/// // Recompress the mutated data.
291+
/// let compressed = compress(&decompressed[..new_decompressed_size]);
292+
///
293+
/// // Copy the recompressed mutated data into `data` and return the new size.
294+
/// let new_size = std::cmp::min(max_size, compressed.len());
295+
/// data[..new_size].copy_from_slice(&compressed[..new_size]);
296+
/// new_size
297+
/// }
298+
/// );
299+
///
300+
/// fn decompress(compressed_data: &[u8]) -> Option<Vec<u8>> {
301+
/// let mut decoder = GzDecoder::new(compressed_data);
302+
/// let mut decompressed = Vec::new();
303+
/// if decoder.read_to_end(&mut decompressed).is_ok() {
304+
/// Some(decompressed)
305+
/// } else {
306+
/// None
307+
/// }
308+
/// }
309+
///
310+
/// fn compress(data: &[u8]) -> Vec<u8> {
311+
/// let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
312+
/// encoder
313+
/// .write_all(data)
314+
/// .expect("writing into a vec is infallible");
315+
/// encoder.finish().expect("writing into a vec is infallible")
316+
/// }
317+
/// ```
318+
///
319+
/// This example is inspired by [a similar example from the official `libFuzzer`
320+
/// docs](https://github.com/google/fuzzing/blob/master/docs/structure-aware-fuzzing.md#example-compression).
321+
///
322+
/// ## More Example Ideas
323+
///
324+
/// * A PNG custom mutator that decodes a PNG, mutates the image, and then
325+
/// re-encodes the mutated image as a new PNG.
326+
///
327+
/// * A [`serde`](https://serde.rs/) custom mutator that deserializes your
328+
/// structure, mutates it, and then reserializes it.
329+
///
330+
/// * A Wasm binary custom mutator that inserts, replaces, and removes a
331+
/// bytecode instruction in a function's body.
332+
///
333+
/// * An HTTP request custom mutator that inserts, replaces, and removes a
334+
/// header from an HTTP request.
335+
#[macro_export]
336+
macro_rules! fuzz_mutator {
337+
(
338+
|
339+
$data:ident : &mut [u8] ,
340+
$size:ident : usize ,
341+
$max_size:ident : usize ,
342+
$seed:ident : u32 $(,)*
343+
|
344+
$body:block
345+
) => {
346+
/// Auto-generated function.
347+
#[export_name = "LLVMFuzzerCustomMutator"]
348+
pub fn rust_fuzzer_custom_mutator(
349+
$data: *mut u8,
350+
$size: usize,
351+
$max_size: usize,
352+
$seed: std::os::raw::c_uint,
353+
) -> usize {
354+
// Depending on if we are growing or shrinking the test case, `size`
355+
// might be larger or smaller than `max_size`. The `data`'s capacity
356+
// is the maximum of the two.
357+
let len = std::cmp::max($max_size, $size);
358+
let $data: &mut [u8] = unsafe { std::slice::from_raw_parts_mut($data, len) };
359+
360+
// `unsigned int` is generally a `u32`, but not on all targets. Do
361+
// an infallible (and potentially lossy, but that's okay because it
362+
// preserves determinism) conversion.
363+
let $seed = $seed as u32;
364+
365+
// Truncate the new size if it is larger than the max.
366+
let new_size = { $body };
367+
std::cmp::min(new_size, $max_size)
368+
}
369+
};
370+
}
371+
372+
/// The default `libFuzzer` mutator.
373+
///
374+
/// You generally don't have to use this at all unless you're defining a
375+
/// custom mutator with [the `fuzz_mutator!` macro][crate::fuzz_mutator].
376+
///
377+
/// Mutates `data[..size]` in place and returns the new size of the mutated
378+
/// data.
379+
///
380+
/// # Example
381+
///
382+
/// ```no_run
383+
/// // Create some data in a buffer.
384+
/// let mut data = vec![0; 128];
385+
/// data[..5].copy_from_slice(b"hello");
386+
///
387+
/// // Ask `libFuzzer` to mutate the data.
388+
/// let new_size = libfuzzer_sys::fuzzer_mutate(&mut data, 5);
389+
///
390+
/// // Get the mutated data out of the buffer.
391+
/// let mutated_data = &data[..new_size];
392+
/// ```
393+
pub fn fuzzer_mutate(data: &mut [u8], size: usize) -> usize {
394+
assert!(size <= data.len());
395+
let new_size = unsafe { LLVMFuzzerMutate(data.as_mut_ptr(), size, data.len()) };
396+
assert!(new_size <= data.len());
397+
new_size
398+
}

0 commit comments

Comments
 (0)