Skip to content

Commit b77183b

Browse files
authored
Add fuzzing code to CI (#111)
* Add fuzzing code to CI * fix fuzz suite from breaking changes
1 parent 7afc655 commit b77183b

File tree

8 files changed

+50
-18
lines changed

8 files changed

+50
-18
lines changed

.github/workflows/fuzz.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# This workflow doesn't do fuzzing, it just ensures that the fuzzing code can compile
2+
3+
name: Fuzz check
4+
on:
5+
push:
6+
branches: [ main ]
7+
pull_request:
8+
branches: [ main ]
9+
10+
jobs:
11+
check:
12+
runs-on: ubuntu-latest
13+
steps:
14+
- uses: actions/checkout@v3
15+
- uses: dtolnay/rust-toolchain@master
16+
with:
17+
toolchain: stable
18+
- run: cd fuzz
19+
- run: cargo check

fuzz/Cargo.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

fuzz/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ pretty_assertions = "1.0.0"
1414

1515
# thirdparty crates to fuzz against
1616
html5gum_old = { version = "=0.6.1", package = "html5gum" }
17-
html5ever = "0.27.0"
17+
html5ever = "0.29.0"
1818
swc_common = "3.0"
1919
swc_html_parser = "3.0"
2020
swc_html_ast = "3.0"

fuzz/src/testcase/html5ever.rs

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::cell::RefCell;
2+
13
use html5ever::buffer_queue::BufferQueue;
24
use html5ever::tendril::format_tendril;
35
use html5ever::tokenizer::{
@@ -8,11 +10,11 @@ use html5gum::{Emitter, Reader, Token};
810
use pretty_assertions::assert_eq;
911

1012
pub fn run_html5ever(s: &str) {
11-
let mut reference_tokenizer = html5ever::tokenizer::Tokenizer::new(
12-
TokenSink {
13+
let reference_tokenizer = html5ever::tokenizer::Tokenizer::new(
14+
TokenSink(RefCell::new(TokenSinkInner {
1315
testing_tokenizer: html5gum::Tokenizer::new(s),
1416
carried_over_token: None,
15-
},
17+
})),
1618
TokenizerOpts {
1719
// the html5gum tokenizer does not handle the BOM, and also not discarding a BOM is
1820
// what the test suite expects, see https://github.com/html5lib/html5lib-tests/issues/2
@@ -31,19 +33,30 @@ pub fn run_html5ever(s: &str) {
3133
reference_tokenizer.end();
3234
}
3335

34-
struct TokenSink<R: Reader, E: Emitter> {
36+
struct TokenSinkInner<R: Reader, E: Emitter> {
3537
testing_tokenizer: html5gum::Tokenizer<R, E>,
3638
carried_over_token: Option<Token>,
3739
}
3840

41+
struct TokenSink<R: Reader, E: Emitter>(RefCell<TokenSinkInner<R, E>>);
42+
3943
impl<R: Reader, E: Emitter<Token = Token>> html5ever::tokenizer::TokenSink for TokenSink<R, E> {
4044
type Handle = ();
4145

4246
fn process_token(
43-
&mut self,
47+
&self,
4448
reference_token: html5ever::tokenizer::Token,
4549
_line_number: u64,
4650
) -> TokenSinkResult<Self::Handle> {
51+
self.0.borrow_mut().process_token(reference_token)
52+
}
53+
}
54+
55+
impl<R: Reader, E: Emitter<Token = Token>> TokenSinkInner<R, E> {
56+
fn process_token(
57+
&mut self,
58+
reference_token: html5ever::tokenizer::Token,
59+
) -> TokenSinkResult<()> {
4760
if matches!(reference_token, Token2::ParseError(_)) {
4861
// TODO
4962
return TokenSinkResult::Continue;

fuzz/src/testcase/lolhtml.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ pub fn run_lolhtml(data: &[u8]) {
5757
}
5858

5959
let mut gum_tokens = Vec::new();
60-
for mut token in html5gum::Tokenizer::new(data).infallible() {
60+
for Ok(mut token) in html5gum::Tokenizer::new(data) {
6161
match token {
6262
Token::Error(_) => continue,
6363
Token::StartTag(ref mut s) => {

fuzz/src/testcase/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ pub fn run(s: &[u8]) {
1010

1111
// unconditionally run tokenizer against raw bytes, it should never crash. we rely on running
1212
// in debug mode such that this is not just simply optimized away
13-
let testing_tokenizer = html5gum::Tokenizer::new(s).infallible();
14-
for _ in testing_tokenizer {}
13+
let testing_tokenizer = html5gum::Tokenizer::new(s);
14+
for Ok(_) in testing_tokenizer {}
1515

1616
if env::var("FUZZ_OLD_HTML5GUM").unwrap() == "1" {
1717
if let Ok(data) = std::str::from_utf8(s) {

fuzz/src/testcase/old_html5gum.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ use html5gum::{Doctype, EndTag, StartTag, Token};
55
use pretty_assertions::assert_eq;
66

77
pub fn run_old_html5gum(s: &str) {
8-
let reference_tokenizer = html5gum_old::Tokenizer::new(s).infallible();
9-
let testing_tokenizer = html5gum::Tokenizer::new(s).infallible();
8+
let reference_tokenizer = html5gum_old::Tokenizer::new(s);
9+
let testing_tokenizer = html5gum::Tokenizer::new(s);
1010

11-
let mut testing_tokens: Vec<_> = testing_tokenizer.collect();
12-
let mut reference_tokens: Vec<_> = reference_tokenizer.collect();
11+
let Ok(mut testing_tokens): Result<Vec<_>, _> = testing_tokenizer.collect();
12+
let Ok(mut reference_tokens): Result<Vec<_>, _> = reference_tokenizer.collect();
1313

1414
fn isnt_error(x: &html5gum::Token) -> bool {
1515
!matches!(*x, html5gum::Token::Error(_))

fuzz/src/testcase/swc.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ pub fn run_swc(s: &str) {
9191
}
9292

9393
let mut gum_tokens = vec![];
94-
for token in html5gum::Tokenizer::new(s).infallible() {
94+
for Ok(token) in html5gum::Tokenizer::new(s) {
9595
match token {
9696
html5gum::Token::Error(_) => {}
9797
token => gum_tokens.push(token),

0 commit comments

Comments
 (0)