Skip to content

Commit 4d037a0

Browse files
committed
Add fuzzer for utf8::validate.
1 parent 41f8bdb commit 4d037a0

File tree

5 files changed

+107
-1
lines changed

5 files changed

+107
-1
lines changed

.github/workflows/fuzz.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: Fuzzing
2+
3+
on:
4+
push:
5+
branches: [ master ]
6+
pull_request:
7+
branches: [ master ]
8+
9+
jobs:
10+
fuzzing:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@v2
14+
- name: Install Rust
15+
uses: actions-rs/toolchain@v1
16+
with:
17+
toolchain: nightly
18+
override: true
19+
- name: Install cargo-fuzz
20+
run: |
21+
cargo install cargo-fuzz
22+
- name: Run Fuzzing
23+
run: |
24+
cargo fuzz run utf8_validate -- -max_total_time=180

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
.*.swp
22
tags
33
target
4-
/Cargo.lock
4+
Cargo.lock

fuzz/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
target
2+
corpus
3+
artifacts
4+
coverage

fuzz/Cargo.toml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
[package]
2+
name = "bstr-fuzz"
3+
version = "0.0.0"
4+
publish = false
5+
edition = "2018"
6+
7+
[package.metadata]
8+
cargo-fuzz = true
9+
10+
[dependencies]
11+
libfuzzer-sys = "0.4"
12+
13+
[dependencies.bstr]
14+
path = ".."
15+
16+
[[bin]]
17+
name = "utf8_validate"
18+
path = "fuzz_targets/utf8_validate.rs"
19+
test = false
20+
doc = false
21+
bench = false
22+
23+
[workspace]

fuzz/fuzz_targets/utf8_validate.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
//! This fuzzer attempts to test the functional correctness of the `bstr::utf8::validate` function.
2+
//! This coverage is desirable, because some `unsafe` blocks in the `bstr` crate depend on the
3+
//! guarantees made by `utf8::validate` - e.g. the soundness of `bstr::ByteSlice::to_str` depends
4+
//! on these guarantees.
5+
//!
6+
//! The `utf8::validate` function is in a non-public module, which means that we can't test it
7+
//! directly. Therefore we test via `bstr::ByteSlice::to_str` instead.
8+
//!
9+
//! We use the following [test oracle](https://en.wikipedia.org/wiki/Test_oracle) to validate
10+
//! results returned by `utf8::validate`:
11+
//!
12+
//! * A standard library implementation (`std::str::from_utf8` is analogous to
13+
//! `bstr::ByteSlice::to_str` and `run_utf8_validation` in `core/str/validations.rs` is analogous
14+
//! to `bstr::utf8::validate`).
15+
//! https://github.com/BurntSushi/bstr/issues/25#issuecomment-543835601 explains
16+
//! why `bstr` doesn't reuse the standard library's implementation.
17+
//! * TODO: Consider also adding a manual, simple (and therefore hopefully "obviously correct")
18+
//! implementation as another test oracle.
19+
20+
#![no_main]
21+
22+
use bstr::ByteSlice;
23+
use libfuzzer_sys::fuzz_target;
24+
25+
fn validate(data: &[u8]) {
26+
let bstr_result = data.to_str();
27+
let std_result = std::str::from_utf8(data);
28+
29+
match bstr_result {
30+
Ok(bstr_str) => {
31+
let Ok(std_str) = std_result else {
32+
panic!("`bstr` succeeded but `std` failed");
33+
};
34+
assert_eq!(data.as_ptr(), bstr_str.as_ptr());
35+
assert_eq!(data.as_ptr(), std_str.as_ptr());
36+
assert_eq!(data.len(), bstr_str.len());
37+
assert_eq!(data.len(), std_str.len());
38+
}
39+
Err(bstr_err) => {
40+
let Err(std_err) = std_result else {
41+
panic!("`bstr` failed but `std` succeeded");
42+
};
43+
assert_eq!(bstr_err.error_len(), std_err.error_len());
44+
assert_eq!(bstr_err.valid_up_to(), std_err.valid_up_to());
45+
}
46+
}
47+
}
48+
49+
fuzz_target!(|data: &[u8]| {
50+
// Test various alignments, because `utf8::validate` calls into `ascii::first_non_ascii_byte`
51+
// and the latter is sensitive to the alignment.
52+
for alignment_offset in 0..=(std::cmp::min(data.len(), 16)) {
53+
validate(&data[alignment_offset..]);
54+
}
55+
});

0 commit comments

Comments
 (0)