Skip to content

Commit 896a5e1

Browse files
authored
New PR with Devin's complete changes (#507)
Mostly generated by devin, serialization test updated personally, and removed redundant fuzz tests
1 parent e0aab6f commit 896a5e1

File tree

8 files changed

+330
-12
lines changed

8 files changed

+330
-12
lines changed

.evergreen/config.yml

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,18 @@ stepback: true
1313
command_type: system
1414

1515
# Protect ourself against rogue test case, or curl gone wild, that runs forever
16-
# 12 minutes is the longest we'll ever run
17-
exec_timeout_secs: 3600 # 12 minutes is the longest we'll ever run
16+
# 60 minutes is the longest we'll ever run
17+
exec_timeout_secs: 3600 # 1 hour total for security-focused fuzzing
1818

1919
# What to do when evergreen hits the timeout (`post:` tasks are run automatically)
2020
timeout:
2121
- command: shell.exec
2222
params:
2323
script: |
24-
ls -la
24+
echo "Fuzzing timed out. Collecting any available artifacts..."
25+
if [ -d "src/fuzz/artifacts" ]; then
26+
tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/
27+
fi
2528
2629
functions:
2730
"fetch source":
@@ -154,7 +157,25 @@ functions:
154157
- command: shell.exec
155158
params:
156159
script: |
157-
# Nothing needs to be done here
160+
# Archive crash artifacts if they exist and contain crashes
161+
if [ -d "src/fuzz/artifacts" ] && [ "$(ls -A src/fuzz/artifacts)" ]; then
162+
echo "Creating artifacts archive..."
163+
tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/
164+
else
165+
echo "No crashes found in artifacts directory. Skipping archive creation."
166+
fi
167+
# Upload crash artifacts if they exist
168+
- command: s3.put
169+
params:
170+
aws_key: ${aws_key}
171+
aws_secret: ${aws_secret}
172+
local_file: ${PROJECT_DIRECTORY}/crash-artifacts.tar.gz
173+
remote_file: ${CURRENT_VERSION}/crash-artifacts.tar.gz
174+
bucket: mciuploads
175+
permissions: public-read
176+
content_type: application/x-gzip
177+
optional: true
178+
158179
pre:
159180
- func: "fetch source"
160181
- func: "install dependencies"
@@ -259,4 +280,4 @@ buildvariants:
259280
run_on:
260281
- ubuntu1804-test
261282
tasks:
262-
- name: "wasm-test"
283+
- name: "wasm-test"

.evergreen/run-fuzzer.sh

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,36 @@ set -o errexit
66

77
cd fuzz
88

9-
# each runs for a minute
10-
cargo +nightly fuzz run deserialize -- -rss_limit_mb=4096 -max_total_time=60
11-
cargo +nightly fuzz run raw_deserialize -- -rss_limit_mb=4096 -max_total_time=60
12-
cargo +nightly fuzz run iterate -- -rss_limit_mb=4096 -max_total_time=60
9+
# Create directories for crashes and corpus
10+
mkdir -p artifacts
11+
mkdir -p corpus
12+
13+
# Generate initial corpus if directory is empty
14+
if [ -z "$(ls -A corpus)" ]; then
15+
echo "Generating initial corpus..."
16+
cargo run --bin generate_corpus
17+
fi
18+
19+
# Function to run fuzzer and collect crashes
20+
run_fuzzer() {
21+
target=$1
22+
echo "Running fuzzer for $target"
23+
# Run fuzzer and redirect crashes to artifacts directory
24+
RUST_BACKTRACE=1 cargo +nightly fuzz run $target -- \
25+
-rss_limit_mb=4096 \
26+
-max_total_time=60 \
27+
-artifact_prefix=artifacts/ \
28+
-print_final_stats=1 \
29+
corpus/
30+
}
31+
32+
# Run existing targets
33+
run_fuzzer "deserialize"
34+
run_fuzzer "raw_deserialize"
35+
run_fuzzer "iterate"
36+
37+
# Run new security-focused targets
38+
run_fuzzer "malformed_length"
39+
run_fuzzer "type_markers"
40+
run_fuzzer "string_handling"
41+
run_fuzzer "serialization"

fuzz/Cargo.toml

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,25 @@
1-
21
[package]
32
name = "bson-fuzz"
43
version = "0.0.1"
54
authors = ["Automatically generated"]
65
publish = false
6+
edition = "2021"
77

88
[package.metadata]
99
cargo-fuzz = true
1010

1111
[dependencies.bson]
1212
path = ".."
13+
1314
[dependencies.libfuzzer-sys]
1415
version = "0.4.0"
1516

16-
# Prevent this from interfering with workspaces
17+
[dependencies.serde]
18+
version = "1.0"
19+
20+
[dependencies.serde_json]
21+
version = "1.0"
22+
1723
[workspace]
1824
members = ["."]
1925

@@ -32,3 +38,23 @@ path = "fuzz_targets/raw_deserialize.rs"
3238
[[bin]]
3339
name = "raw_deserialize_utf8_lossy"
3440
path = "fuzz_targets/raw_deserialize_utf8_lossy.rs"
41+
42+
[[bin]]
43+
name = "malformed_length"
44+
path = "fuzz_targets/malformed_length.rs"
45+
46+
[[bin]]
47+
name = "type_markers"
48+
path = "fuzz_targets/type_markers.rs"
49+
50+
[[bin]]
51+
name = "string_handling"
52+
path = "fuzz_targets/string_handling.rs"
53+
54+
[[bin]]
55+
name = "serialization"
56+
path = "fuzz_targets/serialization.rs"
57+
58+
[[bin]]
59+
name = "generate_corpus"
60+
path = "generate_corpus.rs"

fuzz/fuzz_targets/serialization.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#![no_main]
2+
use bson::{
3+
raw::{RawDocument, RawDocumentBuf},
4+
Bson,
5+
Document,
6+
};
7+
use libfuzzer_sys::fuzz_target;
8+
9+
fn compare_docs(doc1: &Document, doc2: &Document) -> bool {
10+
if doc1.len() != doc2.len() {
11+
return false;
12+
}
13+
for (key, value) in doc1 {
14+
if !doc2.contains_key(key) {
15+
return false;
16+
}
17+
if let Some(val2) = doc2.get(key) {
18+
match (value, val2) {
19+
(Bson::Double(d1), Bson::Double(d2)) => {
20+
if (!d1.is_nan() || !d2.is_nan()) && d1 != d2 {
21+
return false;
22+
}
23+
}
24+
(v1, v2) => {
25+
if v1 != v2 {
26+
return false;
27+
}
28+
}
29+
}
30+
}
31+
}
32+
true
33+
}
34+
35+
fuzz_target!(|input: &[u8]| {
36+
if let Ok(rawdoc) = RawDocument::from_bytes(&input) {
37+
if let Ok(doc) = Document::try_from(rawdoc) {
38+
let out = RawDocumentBuf::try_from(&doc).unwrap();
39+
let out_bytes = out.as_bytes();
40+
if input != out_bytes {
41+
let reserialized = RawDocument::from_bytes(&out_bytes).unwrap();
42+
let reserialized_doc = Document::try_from(reserialized).unwrap();
43+
// Ensure that the reserialized document is the same as the original document, the
44+
// bytes can differ while still resulting in the same Document.
45+
if !compare_docs(&doc, &reserialized_doc) {
46+
panic!(
47+
"Reserialized document is not the same as the original document: {:?} != \
48+
{:?}",
49+
doc, reserialized_doc
50+
);
51+
}
52+
}
53+
}
54+
}
55+
});

fuzz/fuzz_targets/string_handling.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#![no_main]
2+
#[macro_use]
3+
extern crate libfuzzer_sys;
4+
extern crate bson;
5+
use bson::{RawBsonRef, RawDocument};
6+
use std::convert::TryInto;
7+
8+
fuzz_target!(|buf: &[u8]| {
9+
if let Ok(doc) = RawDocument::from_bytes(buf) {
10+
for elem in doc.iter_elements().flatten() {
11+
// Convert to RawBsonRef and check string-related types
12+
if let Ok(bson) = elem.try_into() {
13+
match bson {
14+
RawBsonRef::String(s) => {
15+
let _ = s.len();
16+
let _ = s.chars().count();
17+
}
18+
_ => {}
19+
}
20+
}
21+
}
22+
}
23+
});

fuzz/fuzz_targets/type_markers.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#![no_main]
2+
#[macro_use]
3+
extern crate libfuzzer_sys;
4+
extern crate bson;
5+
use bson::{RawBsonRef, RawDocument};
6+
use std::convert::TryInto;
7+
8+
fuzz_target!(|buf: &[u8]| {
9+
if let Ok(doc) = RawDocument::from_bytes(buf) {
10+
for elem in doc.iter_elements().flatten() {
11+
let _: Result<RawBsonRef, _> = elem.try_into();
12+
}
13+
}
14+
});

fuzz/generate_corpus.rs

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
use bson::{doc, Bson, Decimal128};
2+
use std::{
3+
fs,
4+
io::{Error, ErrorKind},
5+
path::Path,
6+
str::FromStr,
7+
};
8+
9+
fn main() -> std::io::Result<()> {
10+
let corpus_dir = Path::new("fuzz/corpus");
11+
fs::create_dir_all(corpus_dir)?;
12+
13+
// Generate edge cases for each fuzz target
14+
generate_length_edge_cases(corpus_dir)?;
15+
generate_type_marker_cases(corpus_dir)?;
16+
generate_string_edge_cases(corpus_dir)?;
17+
generate_serialization_cases(corpus_dir)?;
18+
Ok(())
19+
}
20+
21+
fn generate_length_edge_cases(dir: &Path) -> std::io::Result<()> {
22+
let target_dir = dir.join("malformed_length");
23+
fs::create_dir_all(&target_dir)?;
24+
25+
// Invalid length
26+
fs::write(target_dir.join("invalid_len"), vec![4, 5])?;
27+
28+
// Minimal valid document
29+
let min_doc = doc! {};
30+
fs::write(
31+
target_dir.join("min_doc"),
32+
bson::to_vec(&min_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
33+
)?;
34+
35+
// Document with length near i32::MAX
36+
let large_doc = doc! { "a": "b".repeat(i32::MAX as usize / 2) };
37+
fs::write(
38+
target_dir.join("large_doc"),
39+
bson::to_vec(&large_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
40+
)?;
41+
42+
Ok(())
43+
}
44+
45+
fn generate_type_marker_cases(dir: &Path) -> std::io::Result<()> {
46+
let target_dir = dir.join("type_markers");
47+
fs::create_dir_all(&target_dir)?;
48+
49+
// Document with all BSON types
50+
let all_types = doc! {
51+
"double": 1.0f64,
52+
"double_nan": f64::NAN,
53+
"double_infinity": f64::INFINITY,
54+
"double_neg_infinity": f64::NEG_INFINITY,
55+
"string": "test",
56+
"document": doc! {},
57+
"array": vec![1, 2, 3],
58+
"binary": Bson::Binary(bson::Binary { subtype: bson::spec::BinarySubtype::Generic, bytes: vec![1, 2, 3] }),
59+
"object_id": bson::oid::ObjectId::new(),
60+
"bool": true,
61+
"date": bson::DateTime::now(),
62+
"null": Bson::Null,
63+
"regex": Bson::RegularExpression(bson::Regex { pattern: "pattern".into(), options: "i".into() }),
64+
"int32": 123i32,
65+
"timestamp": bson::Timestamp { time: 12345, increment: 1 },
66+
"int64": 123i64,
67+
"decimal128_nan": Decimal128::from_str("NaN").unwrap(),
68+
"decimal128_infinity": Decimal128::from_str("Infinity").unwrap(),
69+
"decimal128_neg_infinity": Decimal128::from_str("-Infinity").unwrap(),
70+
"min_key": Bson::MinKey,
71+
"max_key": Bson::MaxKey,
72+
"undefined": Bson::Undefined
73+
};
74+
fs::write(
75+
target_dir.join("all_types"),
76+
bson::to_vec(&all_types).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
77+
)?;
78+
79+
Ok(())
80+
}
81+
82+
fn generate_string_edge_cases(dir: &Path) -> std::io::Result<()> {
83+
let target_dir = dir.join("string_handling");
84+
fs::create_dir_all(&target_dir)?;
85+
86+
// UTF-8 edge cases
87+
let utf8_cases = doc! {
88+
"empty": "",
89+
"null_bytes": "hello\0world",
90+
"unicode": "🦀💻🔒",
91+
"high_surrogate": "\u{10000}",
92+
"invalid_continuation": Bson::Binary(bson::Binary {
93+
subtype: bson::spec::BinarySubtype::Generic,
94+
bytes: vec![0x80u8, 0x80u8, 0x80u8]
95+
}),
96+
"overlong": Bson::Binary(bson::Binary {
97+
subtype: bson::spec::BinarySubtype::Generic,
98+
bytes: vec![0xC0u8, 0x80u8]
99+
})
100+
};
101+
fs::write(
102+
target_dir.join("utf8_cases"),
103+
bson::to_vec(&utf8_cases).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
104+
)?;
105+
106+
Ok(())
107+
}
108+
109+
fn generate_serialization_cases(dir: &Path) -> std::io::Result<()> {
110+
let target_dir = dir.join("serialization");
111+
fs::create_dir_all(&target_dir)?;
112+
113+
// Deeply nested document
114+
let mut nested_doc = doc! {};
115+
let mut current = &mut nested_doc;
116+
for i in 0..100 {
117+
let next_doc = doc! {};
118+
current.insert(i.to_string(), next_doc);
119+
current = current
120+
.get_mut(&i.to_string())
121+
.unwrap()
122+
.as_document_mut()
123+
.unwrap();
124+
}
125+
fs::write(
126+
target_dir.join("nested_doc"),
127+
bson::to_vec(&nested_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
128+
)?;
129+
130+
// Document with large binary data
131+
let large_binary = doc! {
132+
"binary": Bson::Binary(bson::Binary {
133+
subtype: bson::spec::BinarySubtype::Generic,
134+
bytes: vec![0xFF; 1024 * 1024] // 1MB of data
135+
})
136+
};
137+
fs::write(
138+
target_dir.join("large_binary"),
139+
bson::to_vec(&large_binary).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
140+
)?;
141+
142+
Ok(())
143+
}

0 commit comments

Comments
 (0)