diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 150e360..fcb37ef 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,7 +61,12 @@ jobs: - name: cd nostd && cargo +nightly clippy -- --deny=warnings run: cargo +nightly clippy -- --deny=warnings working-directory: nostd + - name: cd lib/fuzz && cargo +nightly clippy --lib --examples -- --deny=warnings + run: cargo +nightly clippy --lib --examples -- --deny=warnings + working-directory: lib/fuzz - name: cd lib/fuzz && cargo +nightly clippy -- --deny=warnings + env: + RUSTFLAGS: --cfg=fuzzing run: cargo +nightly clippy -- --deny=warnings working-directory: lib/fuzz - name: cd lib && cargo +nightly build @@ -103,7 +108,12 @@ jobs: - name: cd nostd && cargo +nightly build --release run: cargo +nightly build --release working-directory: nostd + - name: cd lib/fuzz && cargo +nightly build --lib --examples + run: cargo +nightly build --lib --examples + working-directory: lib/fuzz - name: cd lib/fuzz && cargo +nightly build + env: + RUSTFLAGS: --cfg=fuzzing run: cargo +nightly build working-directory: lib/fuzz - name: cd cmp && cargo +nightly build @@ -128,8 +138,8 @@ jobs: - name: cd nostd && cargo +nightly run --release --features=alloc run: cargo +nightly run --release --features=alloc working-directory: nostd - - name: cd lib/fuzz && cargo +nightly test - run: cargo +nightly test + - name: cd lib/fuzz && cargo +nightly test --lib + run: cargo +nightly test --lib working-directory: lib/fuzz - name: cd cmp && cargo +nightly test run: cargo +nightly test diff --git a/lib/fuzz/Cargo.toml b/lib/fuzz/Cargo.toml index 0e9cdae..d18b911 100644 --- a/lib/fuzz/Cargo.toml +++ b/lib/fuzz/Cargo.toml @@ -4,7 +4,6 @@ version = "0.0.0" authors = ["Automatically generated"] publish = false edition = "2021" -rust-version = "1.81" [package.metadata] cargo-fuzz = true @@ -13,20 +12,42 @@ cargo-fuzz = true data-encoding = { path = ".." } libfuzzer-sys = "0.4.3" +# Fuzz targets organization based on prefix: +# - fuzz_FOO: FOO holds for the fuzzing tools (property testing) +# - impl_FOO: FOO is correctly implemented (differential testing) +# - spec_FOO: FOO holds for the specification (property testing) + +[[bin]] +name = "fuzz_any_spec" +path = "fuzz_targets/fuzz_any_spec.rs" + +[[bin]] +name = "impl_encode" +path = "fuzz_targets/impl_encode.rs" + +[[bin]] +name = "impl_decode" +path = "fuzz_targets/impl_decode.rs" + +[[bin]] +name = "impl_new_encoder" +path = "fuzz_targets/impl_new_encoder.rs" + [[bin]] -name = "round_trip" -path = "fuzz_targets/round_trip.rs" -test = false -doc = false +name = "impl_encode_write_buffer" +path = "fuzz_targets/impl_encode_write_buffer.rs" [[bin]] -name = "encoder" -path = "fuzz_targets/encoder.rs" -test = false -doc = false +name = "spec_spec_base" +path = "fuzz_targets/spec_spec_base.rs" [[bin]] -name = "encode_write" -path = "fuzz_targets/encode_write.rs" -test = false -doc = false +name = "spec_encode_decode" +path = "fuzz_targets/spec_encode_decode.rs" + +[[bin]] +name = "spec_decode_encode" +path = "fuzz_targets/spec_decode_encode.rs" + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(fuzzing)'] } diff --git a/lib/fuzz/analyze.sh b/lib/fuzz/analyze.sh new file mode 100755 index 0000000..f4c22da --- /dev/null +++ b/lib/fuzz/analyze.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +cargo run --manifest-path=fuzz/Cargo.toml --release --example=analyze -- "$@" diff --git a/lib/fuzz/compact.sh b/lib/fuzz/compact.sh new file mode 100755 index 0000000..1b88709 --- /dev/null +++ b/lib/fuzz/compact.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +for target in $(cargo fuzz list); do + cargo fuzz cmin $target +done diff --git a/lib/fuzz/debug.sh b/lib/fuzz/debug.sh new file mode 100755 index 0000000..bcfd7a9 --- /dev/null +++ b/lib/fuzz/debug.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +cargo run --manifest-path=fuzz/Cargo.toml --example=debug -- "$1" diff --git a/lib/fuzz/examples/analyze.rs b/lib/fuzz/examples/analyze.rs index a89819b..d9cbffd 100644 --- a/lib/fuzz/examples/analyze.rs +++ b/lib/fuzz/examples/analyze.rs @@ -1,145 +1,201 @@ -use std::collections::HashMap; -use std::ops::AddAssign; - -use data_encoding::BitOrder; -use data_encoding_fuzz::{decode_prefix, generate_specification}; - -#[derive(Clone, Copy, Hash, PartialEq, Eq)] -enum Key { - Bit, - Msb, - Ctb, - Pad, - HasIgnore, - Ignore, - HasWrap, - WrapWidth, - WrapLen, - HasTranslate, - Translate, - Canonical, - InputLen, - DecodeLen, -} +use std::collections::{BTreeMap, HashMap, HashSet}; -const ALL_KEYS: &'static [Key] = { - use Key::*; - &[ - Bit, - Msb, - Ctb, - Pad, - HasIgnore, - Ignore, - HasWrap, - WrapWidth, - WrapLen, - HasTranslate, - Translate, - Canonical, - InputLen, - DecodeLen, - ] -}; - -impl Key { - fn name(self) -> &'static str { - use Key::*; - match self { - Bit => "bit", - Msb => "msb", - Ctb => "ctb", - Pad => "pad", - HasIgnore => "has_ignore", - Ignore => "ignore", - HasWrap => "has_wrap", - WrapWidth => "wrap_width", - WrapLen => "wrap_len", - HasTranslate => "has_translate", - Translate => "translate", - Canonical => "canonical", - InputLen => "input_len", - DecodeLen => "decode_len", - } +use data_encoding_fuzz::cmd; + +fn main() { + let path = cmd::path(false); + let target = cmd::target(&path); + let mut stats = Stats::new(std::env::args().skip(2)); + for entry in std::fs::read_dir(path).unwrap() { + stats.merge(&cmd::execute(&target, &std::fs::read(entry.unwrap().path()).unwrap())); } + stats.print(); } -#[derive(Default, Clone)] -struct Stat(HashMap); - -impl Stat { - fn new(mut data: &[u8]) -> Stat { - let encoding = generate_specification(&mut data).encoding().unwrap(); - let spec = encoding.specification(); - let mut stat = HashMap::new(); - assert!(stat.insert(Key::Bit, spec.symbols.len().trailing_zeros() as usize).is_none()); - assert!(stat - .insert(Key::Msb, (spec.bit_order == BitOrder::MostSignificantFirst) as usize) - .is_none()); - assert!(stat.insert(Key::Ctb, spec.check_trailing_bits as usize).is_none()); - assert!(stat.insert(Key::Pad, spec.padding.is_some() as usize).is_none()); - assert!(stat.insert(Key::HasIgnore, !spec.ignore.is_empty() as usize).is_none()); - assert!(stat.insert(Key::Ignore, spec.ignore.len()).is_none()); - assert!(stat.insert(Key::HasWrap, (spec.wrap.width > 0) as usize).is_none()); - assert!(stat.insert(Key::WrapWidth, spec.wrap.width).is_none()); - assert!(stat.insert(Key::WrapLen, spec.wrap.separator.len()).is_none()); - assert!(stat.insert(Key::HasTranslate, !spec.translate.from.is_empty() as usize).is_none()); - assert!(stat.insert(Key::Translate, spec.translate.from.len()).is_none()); - assert!(stat.insert(Key::Canonical, encoding.is_canonical() as usize).is_none()); - assert!(stat.insert(Key::InputLen, data.len()).is_none()); - decode_prefix(&encoding, &mut data); - assert!(stat.insert(Key::DecodeLen, data.len()).is_none()); - Stat(stat) - } +struct Stats { + buckets: Vec<(String, Bucket)>, + filters: Vec<(String, Filter)>, + stats: HashMap>, HashMap<&'static str, Stat>>, } -impl Stat { - fn map(&self, mut f: impl FnMut(&T) -> U) -> Stat { - Stat(self.0.iter().map(|(&k, x)| (k, f(x))).collect()) - } +#[derive(Clone, Copy, Default)] +struct Stat { + sum: f64, + len: usize, } -impl AddAssign for Stat { - fn add_assign(&mut self, rhs: Stat) { - for (k, x) in rhs.0 { - *self.0.entry(k).or_default() += x; +#[derive(Clone, Copy)] +enum Bucket { + Lin(usize), + Exp(usize), +} + +#[derive(Clone, Copy)] +enum Filter { + Is(usize), + Eq(usize), + Lt(usize), + Gt(usize), +} + +impl Stats { + fn new(args: impl Iterator) -> Self { + let mut buckets = BTreeMap::new(); + let mut filters = Vec::new(); + for arg in args { + let Some((name, value)) = arg.split_once(['+', '*', '!', '=', '<', '>']) else { + panic!("{arg:?} does not contain an operator: + * ! = < >"); + }; + if !name.bytes().all(|x| b"abcdefghijklmnopqrstuvwxz_".contains(&x)) { + panic!("{name:?} is not a name"); + } + let Ok(value) = value.parse::() else { + panic!("{value:?} is not a value"); + }; + let op = match arg.as_bytes()[name.len()] { + b'+' => Ok(Bucket::Lin(value)), + b'*' => Ok(Bucket::Exp(value)), + b'!' => Err(Filter::Is(value)), + b'=' => Err(Filter::Eq(value)), + b'<' => Err(Filter::Lt(value)), + b'>' => Err(Filter::Gt(value)), + _ => unreachable!(), + }; + match op { + Ok(bucket) => { + if buckets.insert(name.to_string(), bucket).is_some() { + panic!("duplicate bucket for {name}"); + } + } + Err(filter) => filters.push((name.to_string(), filter)), + } + } + let buckets: Vec<_> = buckets.into_iter().collect(); + Stats { buckets, filters, stats: HashMap::new() } + } + + fn merge(&mut self, stat: &HashMap<&'static str, usize>) { + let Stats { buckets, filters, stats } = self; + let slot = buckets.iter().map(|(k, b)| stat.get(k.as_str()).map(|&v| b.slot(v))).collect(); + if !filters.iter().all(|x| x.1.contains(stat.get(x.0.as_str()).copied())) { + return; } + let stats = stats.entry(slot).or_default(); + for (&key, &value) in stat { + stats.entry(key).or_default().merge(value); + } + stats.entry(COUNT).or_default().merge(1); + } + + fn print(self) { + // Compute slot headers. + let slot_hdrs: Vec<_> = self.buckets.iter().map(|x| x.0.as_str()).collect(); + assert!(slot_hdrs.is_sorted()); + + // Compute stat headers. + let mut stat_hdrs = HashSet::new(); + for stats in self.stats.values() { + stat_hdrs.extend(stats.keys().copied()); + } + let mut stat_hdrs: Vec<_> = stat_hdrs.into_iter().collect(); + stat_hdrs.sort(); + let stat_hdrs = stat_hdrs; + + // Compute columns (a column is a slot and its stat). + let mut cols = Vec::new(); + for (k, v) in self.stats { + let mut t = Vec::new(); + for &x in &stat_hdrs { + t.push(v.get(x).copied()); + } + cols.push((k, t)); + } + cols.sort_by(|x, y| x.0.cmp(&y.0)); + + // Compute matrix. + let mut matrix = vec![Vec::new(); slot_hdrs.len() + stat_hdrs.len()]; + let n = slot_hdrs.len(); + for (i, h) in slot_hdrs.iter().enumerate() { + matrix[i].push(h.to_string()); + } + for (i, h) in stat_hdrs.iter().enumerate() { + matrix[n + i].push(h.to_string()); + } + for (slot, stat) in cols { + for (i, x) in slot.into_iter().enumerate() { + matrix[i].push(x.map_or("-".to_string(), |x| format!("{x}.."))); + } + for (i, x) in stat.into_iter().enumerate() { + let cell = match x { + Some(x) if stat_hdrs[i] == COUNT => x.len.to_string(), + Some(x) => format!("{:.2}", x.average()), + None => "-".to_string(), + }; + matrix[n + i].push(cell); + } + } + + // Print matrix. + print_matrix(matrix); } } -#[derive(Default, Clone)] -struct Stats { - sum: Stat, - count: usize, +impl Stat { + fn merge(&mut self, value: usize) { + self.sum += value as f64; + self.len += 1; + } + + fn average(self) -> f64 { + self.sum / self.len as f64 + } } -impl Stats { - fn add(&mut self, stat: &Stat) { - self.sum += stat.map(|&x| x as f64); - self.count += 1; +impl Bucket { + fn slot(self, mut value: usize) -> usize { + match self { + Bucket::Lin(delta) => value / delta * delta, + Bucket::Exp(base) => { + let mut slot = 0; + while 0 < value { + value /= base; + slot = if slot == 0 { 1 } else { slot * base }; + } + slot + } + } } } -impl std::fmt::Display for Stats { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - writeln!(f, "count: {}", self.count)?; - for key in ALL_KEYS { - let sum = *self.sum.0.get(key).unwrap(); - writeln!(f, "{}: {:.2}", key.name(), sum / self.count as f64)?; +impl Filter { + fn contains(self, value: Option) -> bool { + match self { + Filter::Is(x) => value.is_some() as usize == x, + Filter::Eq(x) => value.is_some_and(|y| y == x), + Filter::Lt(x) => value.is_some_and(|y| y < x), + Filter::Gt(x) => value.is_some_and(|y| y > x), } - Ok(()) } } -fn main() { - let mut stats = vec![Stats::default(); 6]; - for entry in std::fs::read_dir(std::env::args().nth(1).unwrap()).unwrap() { - let entry = entry.unwrap(); - let stat = Stat::new(&std::fs::read(entry.path()).unwrap()); - let bit = *stat.0.get(&Key::Bit).unwrap(); - stats[bit - 1].add(&stat); +const COUNT: &str = "-- count --"; + +fn align(x: &str, n: usize) { + for _ in 0 .. n.saturating_sub(x.len()) { + print!(" "); } - for stats in &stats { - println!("{}", stats); + print!("{x}") +} + +fn print_matrix(mut m: Vec>) { + let Some(n) = m.iter().map(|r| r.len()).max() else { return }; + m.iter_mut().for_each(|x| x.resize(n, String::new())); + let w: Vec<_> = + (0 .. n).map(|i| m.iter().map(|x| x[i].len()).max().unwrap() + (i != 0) as usize).collect(); + for x in m { + for i in 0 .. n { + align(&x[i], w[i]); + } + println!(); } } diff --git a/lib/fuzz/examples/debug.rs b/lib/fuzz/examples/debug.rs index 4f2b101..02a27c2 100644 --- a/lib/fuzz/examples/debug.rs +++ b/lib/fuzz/examples/debug.rs @@ -1,13 +1,7 @@ -use std::io::Read; - -use data_encoding_fuzz::generate_specification; +use data_encoding_fuzz::cmd; fn main() { - let stdin = std::io::stdin(); - let mut input = Vec::new(); - stdin.lock().read_to_end(&mut input).unwrap(); - let mut data = &input[..]; - println!("{:#?}", generate_specification(&mut data)); - println!("spec = {:?}", &input[.. input.len() - data.len()]); - println!("data = {:?}", data); + let path = cmd::path(true); + let input = std::fs::read(&path).unwrap(); + cmd::execute(&cmd::target(&path), &input); } diff --git a/lib/fuzz/fuzz_targets/encode_write.rs b/lib/fuzz/fuzz_targets/encode_write.rs deleted file mode 100644 index 16d91d5..0000000 --- a/lib/fuzz/fuzz_targets/encode_write.rs +++ /dev/null @@ -1,15 +0,0 @@ -#![no_main] - -use data_encoding_fuzz::{generate_encoding, generate_usize}; -use libfuzzer_sys::fuzz_target; - -fuzz_target!(|data: &[u8]| { - let mut data = data; - let encoding = generate_encoding(&mut data); - let mut buffer = vec![0; generate_usize(&mut data, 510, 2050)]; - let input = data; - let mut output = String::new(); - encoding.encode_write_buffer(input, &mut output, &mut buffer).unwrap(); - let expected = encoding.encode(input); - assert_eq!(output, expected); -}); diff --git a/lib/fuzz/fuzz_targets/encoder.rs b/lib/fuzz/fuzz_targets/encoder.rs deleted file mode 100644 index e832680..0000000 --- a/lib/fuzz/fuzz_targets/encoder.rs +++ /dev/null @@ -1,21 +0,0 @@ -#![no_main] - -use data_encoding_fuzz::{generate_bytes, generate_encoding, generate_usize}; -use libfuzzer_sys::fuzz_target; - -fuzz_target!(|data: &[u8]| { - let mut data = data; - let encoding = generate_encoding(&mut data); - let mut output = String::new(); - let mut input = Vec::new(); - let mut encoder = encoding.new_encoder(&mut output); - while !data.is_empty() { - let len = generate_usize(&mut data, 0, 3 * 256 - 1); - let chunk = generate_bytes(&mut data, len); - input.extend_from_slice(chunk); - encoder.append(chunk); - } - encoder.finalize(); - let expected = encoding.encode(&input); - assert_eq!(output, expected); -}); diff --git a/lib/fuzz/fuzz_targets/fuzz_any_spec.rs b/lib/fuzz/fuzz_targets/fuzz_any_spec.rs new file mode 120000 index 0000000..ba589d7 --- /dev/null +++ b/lib/fuzz/fuzz_targets/fuzz_any_spec.rs @@ -0,0 +1 @@ +template.rs \ No newline at end of file diff --git a/lib/fuzz/fuzz_targets/impl_decode.rs b/lib/fuzz/fuzz_targets/impl_decode.rs new file mode 120000 index 0000000..ba589d7 --- /dev/null +++ b/lib/fuzz/fuzz_targets/impl_decode.rs @@ -0,0 +1 @@ +template.rs \ No newline at end of file diff --git a/lib/fuzz/fuzz_targets/impl_encode.rs b/lib/fuzz/fuzz_targets/impl_encode.rs new file mode 120000 index 0000000..ba589d7 --- /dev/null +++ b/lib/fuzz/fuzz_targets/impl_encode.rs @@ -0,0 +1 @@ +template.rs \ No newline at end of file diff --git a/lib/fuzz/fuzz_targets/impl_encode_write_buffer.rs b/lib/fuzz/fuzz_targets/impl_encode_write_buffer.rs new file mode 120000 index 0000000..ba589d7 --- /dev/null +++ b/lib/fuzz/fuzz_targets/impl_encode_write_buffer.rs @@ -0,0 +1 @@ +template.rs \ No newline at end of file diff --git a/lib/fuzz/fuzz_targets/impl_new_encoder.rs b/lib/fuzz/fuzz_targets/impl_new_encoder.rs new file mode 120000 index 0000000..ba589d7 --- /dev/null +++ b/lib/fuzz/fuzz_targets/impl_new_encoder.rs @@ -0,0 +1 @@ +template.rs \ No newline at end of file diff --git a/lib/fuzz/fuzz_targets/round_trip.rs b/lib/fuzz/fuzz_targets/round_trip.rs deleted file mode 100644 index 4c7e5b2..0000000 --- a/lib/fuzz/fuzz_targets/round_trip.rs +++ /dev/null @@ -1,15 +0,0 @@ -#![no_main] - -use data_encoding_fuzz::{decode_prefix, generate_encoding}; -use libfuzzer_sys::fuzz_target; - -fuzz_target!(|data: &[u8]| { - let mut data = data; - let e = generate_encoding(&mut data); - assert_eq!(e.specification().encoding().unwrap(), e); - assert_eq!(e.decode(e.encode(data).as_bytes()).unwrap(), data); - if e.is_canonical() { - let raw = decode_prefix(&e, &mut data); - assert_eq!(e.encode(&raw).as_bytes(), data); - } -}); diff --git a/lib/fuzz/fuzz_targets/spec_decode_encode.rs b/lib/fuzz/fuzz_targets/spec_decode_encode.rs new file mode 120000 index 0000000..ba589d7 --- /dev/null +++ b/lib/fuzz/fuzz_targets/spec_decode_encode.rs @@ -0,0 +1 @@ +template.rs \ No newline at end of file diff --git a/lib/fuzz/fuzz_targets/spec_encode_decode.rs b/lib/fuzz/fuzz_targets/spec_encode_decode.rs new file mode 120000 index 0000000..ba589d7 --- /dev/null +++ b/lib/fuzz/fuzz_targets/spec_encode_decode.rs @@ -0,0 +1 @@ +template.rs \ No newline at end of file diff --git a/lib/fuzz/fuzz_targets/spec_spec_base.rs b/lib/fuzz/fuzz_targets/spec_spec_base.rs new file mode 120000 index 0000000..ba589d7 --- /dev/null +++ b/lib/fuzz/fuzz_targets/spec_spec_base.rs @@ -0,0 +1 @@ +template.rs \ No newline at end of file diff --git a/lib/fuzz/fuzz_targets/template.rs b/lib/fuzz/fuzz_targets/template.rs new file mode 100644 index 0000000..df9dfdd --- /dev/null +++ b/lib/fuzz/fuzz_targets/template.rs @@ -0,0 +1,6 @@ +#![no_main] + +use data_encoding_fuzz::cmd; +use libfuzzer_sys::{fuzz_target, Corpus}; + +fuzz_target!(|data: &[u8]| -> Corpus { cmd::execute(env!("CARGO_BIN_NAME"), data) }); diff --git a/lib/fuzz/run.sh b/lib/fuzz/run.sh new file mode 100755 index 0000000..30ddc4f --- /dev/null +++ b/lib/fuzz/run.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +N="$(cargo fuzz list | wc -l)" +i=1 +next() { cargo fuzz list | head -n$i | tail -n1; } +while cargo fuzz run "$(next)" -- -max_total_time=600; do + i=$(( i % N + 1 )) +done diff --git a/lib/fuzz/src/cmd.rs b/lib/fuzz/src/cmd.rs new file mode 100644 index 0000000..78cdba7 --- /dev/null +++ b/lib/fuzz/src/cmd.rs @@ -0,0 +1,156 @@ +#[cfg(not(fuzzing))] +use std::collections::HashMap; +#[cfg(not(fuzzing))] +use std::path::{Path, PathBuf}; +#[cfg(not(fuzzing))] +use std::sync::OnceLock; + +use data_encoding::{BitOrder, Encoding, Specification}; + +use crate::{gen, spec}; + +macro_rules! debug { + ($($arg:tt)*) => { + #[cfg(not(fuzzing))] + if *DEBUG.get().unwrap() { + println!($($arg)*); + } + }; +} + +#[cfg(not(fuzzing))] +pub fn path(debug: bool) -> PathBuf { + DEBUG.set(debug).unwrap(); + PathBuf::from(std::env::args_os().nth(1).unwrap()) +} + +#[cfg(not(fuzzing))] +pub fn target(path: &Path) -> String { + path.components().nth(2).unwrap().as_os_str().to_str().unwrap().to_owned() +} + +pub fn execute(target: &str, mut input: &[u8]) -> Output { + let mut output = BothOutput::default(); + match target { + "fuzz_any_spec" => { + let Some(spec) = gen::any_spec(&mut input) else { return output.reject() }; + let Ok(base) = spec.encoding() else { return output.reject() }; + let spec = base.specification(); + stat_spec(&mut output, &spec, &base); + let input = gen::rev_spec(&spec); + assert_eq!(gen::spec(&mut input.as_slice()).encoding().unwrap(), base); + } + "impl_encode" => { + let (spec, base) = gen_spec_base(&mut input, &mut output); + assert_eq!(base.encode(input), spec::encode(&spec, input)); + } + "impl_decode" => { + let (spec, base) = gen_spec_base(&mut input, &mut output); + let actual = base.decode(input); + output.insert("decode_ok", actual.is_ok() as usize); + assert_eq!(actual.ok(), spec::decode(&spec, input)); + } + "impl_encode_write_buffer" => { + let (_, base) = gen_spec_base(&mut input, &mut output); + let mut buffer = vec![0; gen::nat(&mut input, 510, 2050)]; + output.insert("buffer_len", buffer.len()); + let mut actual = String::new(); + base.encode_write_buffer(input, &mut actual, &mut buffer).unwrap(); + assert_eq!(actual, base.encode(input)); + } + "impl_new_encoder" => { + let (_, base) = gen_spec_base(&mut input, &mut output); + let mut actual = String::new(); + let mut full = Vec::new(); + let mut encoder = base.new_encoder(&mut actual); + let mut num_chunks = 0; + while !input.is_empty() { + let len = gen::nat(&mut input, 0, 3 * 256 - 1); + let chunk = gen::bytes(&mut input, len); + full.extend_from_slice(chunk); + encoder.append(chunk); + num_chunks += 1; + } + encoder.finalize(); + output.insert("full_len", full.len()); + output.insert("num_chunks", num_chunks); + assert_eq!(actual, base.encode(&full)); + } + "spec_decode_encode" => { + let (_, base) = gen_spec_base(&mut input, &mut output); + let true = base.is_canonical() else { return output.reject() }; + let Ok(tmp) = base.decode(input) else { return output.reject() }; + assert_eq!(base.encode(&tmp).as_bytes(), input); + } + "spec_encode_decode" => { + let (_, base) = gen_spec_base(&mut input, &mut output); + assert_eq!(base.decode(base.encode(input).as_bytes()).unwrap(), input); + } + "spec_spec_base" => { + let (_, base) = gen_spec_base(&mut input, &mut output); + assert_eq!(base.specification().encoding().unwrap(), base); + } + x => unimplemented!("{x:?}"), + } + output.0 +} + +fn gen_spec_base(input: &mut &[u8], output: &mut BothOutput) -> (Specification, Encoding) { + let base = gen::base(input); + let spec = base.specification(); + debug!("{spec:#?}"); + debug!("{input:?}"); + stat_spec(output, &spec, &base); + output.insert("input_len", input.len()); + (spec, base) +} + +fn stat_spec(output: &mut BothOutput, spec: &Specification, base: &Encoding) { + output.insert("bit", spec.symbols.len().trailing_zeros() as usize); + output.insert("msb", (spec.bit_order == BitOrder::MostSignificantFirst) as usize); + output.insert("ctb", spec.check_trailing_bits as usize); + output.insert("pad", spec.padding.is_some() as usize); + output.insert("ignore_len", spec.ignore.len()); + output.insert("wrap_col", spec.wrap.width); + output.insert("wrap_len", spec.wrap.separator.len()); + output.insert("translate_len", spec.translate.from.len()); + output.insert("is_canonical", base.is_canonical() as usize); +} + +#[cfg(fuzzing)] +type Output = libfuzzer_sys::Corpus; +#[cfg(not(fuzzing))] +type Output = HashMap<&'static str, usize>; + +struct BothOutput(Output); + +impl Default for BothOutput { + fn default() -> Self { + #[cfg(fuzzing)] + let output = libfuzzer_sys::Corpus::Keep; + #[cfg(not(fuzzing))] + let output = HashMap::default(); + BothOutput(output) + } +} + +impl BothOutput { + #[cfg(fuzzing)] + fn insert(&mut self, _: &'static str, _: usize) {} + #[cfg(not(fuzzing))] + fn insert(&mut self, key: &'static str, value: usize) { + assert!(self.0.insert(key, value).is_none()); + } + + #[cfg(fuzzing)] + fn reject(self) -> Output { + libfuzzer_sys::Corpus::Reject + } + #[cfg(not(fuzzing))] + fn reject(self) -> Output { + self.0 + } +} + +#[cfg(not(fuzzing))] +static DEBUG: OnceLock = OnceLock::new(); diff --git a/lib/fuzz/src/gen.rs b/lib/fuzz/src/gen.rs new file mode 100644 index 0000000..64d25cd --- /dev/null +++ b/lib/fuzz/src/gen.rs @@ -0,0 +1,228 @@ +use data_encoding::{Encoding, Specification}; + +pub fn base(data: &mut &[u8]) -> Encoding { + spec(data).encoding().unwrap() +} + +pub fn spec(data: &mut &[u8]) -> Specification { + let mut spec = Specification::new(); + let mut ascii = Ascii::new(); + let bit = generate(data, 1, 6); + for _ in 0 .. 1 << bit { + spec.symbols.push(ascii.next_free(data)); + } + if generate(data, 0, 1) == 1 { + spec.bit_order = data_encoding::BitOrder::LeastSignificantFirst; + } + if generate(data, 0, 1) == 1 { + spec.check_trailing_bits = false; + } + if 8 % bit != 0 && generate(data, 0, 1) == 1 { + spec.padding = Some(ascii.next_free(data)); + } + let ignore_translate_len = generate(data, 0, ascii.len_free()); + let ignore_len = generate(data, 0, ignore_translate_len); + let translate_len = ignore_translate_len - ignore_len; + for _ in 0 .. ignore_len { + spec.ignore.push(ascii.next_free(data)); + } + if !spec.ignore.is_empty() { + let dec = match bit { + 1 | 3 | 5 => 8, + 2 | 6 => 4, + 4 => 2, + _ => unreachable!(), + }; + spec.wrap.width = generate(data, 0, 255) as usize / dec * dec; + if spec.wrap.width > 0 { + for _ in 0 .. generate(data, 1, 255) { + spec.wrap.separator.push(Ascii::next(spec.ignore.as_bytes(), data)); + } + } + } + for _ in 0 .. translate_len { + spec.translate.to.push(ascii.next_used(data)); + } + for _ in 0 .. translate_len { + spec.translate.from.push(ascii.next_free(data)); + } + spec +} + +pub fn rev_spec(spec: &Specification) -> Vec { + assert!(spec.encoding().is_ok()); + let mut output = Vec::new(); + let mut ascii = Ascii::new(); + let bit = spec.symbols.len().trailing_zeros() as u8; + output.push(bit - 1); + for x in spec.symbols.bytes() { + output.push(ascii.rev_free(x)); + } + output.push((spec.bit_order == data_encoding::BitOrder::LeastSignificantFirst) as u8); + output.push(!spec.check_trailing_bits as u8); + if 8 % bit != 0 { + output.push(spec.padding.is_some() as u8); + if let Some(pad) = spec.padding { + output.push(ascii.rev_free(pad as u8)); + } + } + output.push((spec.ignore.len() + spec.translate.from.len()) as u8); + output.push(spec.ignore.len() as u8); + for x in spec.ignore.bytes() { + output.push(ascii.rev_free(x)); + } + if !spec.ignore.is_empty() { + output.push(spec.wrap.width as u8); + if 0 < spec.wrap.width { + output.push(spec.wrap.separator.len() as u8 - 1); + for x in spec.wrap.separator.bytes() { + output.push(Ascii::rev(spec.ignore.as_bytes(), x)); + } + } + } + for x in spec.translate.to.bytes() { + output.push(ascii.rev_used(x)); + } + for x in spec.translate.from.bytes() { + output.push(ascii.rev_free(x)); + } + output +} + +pub fn any_spec(data: &mut &[u8]) -> Option { + let symbols = string(data)?; + let bit_order = match flip(data) { + false => data_encoding::BitOrder::LeastSignificantFirst, + true => data_encoding::BitOrder::MostSignificantFirst, + }; + let check_trailing_bits = flip(data); + let padding = string(data)?.pop(); + let ignore = string(data)?; + let width = generate(data, 0, 255) as usize; + let separator = string(data)?; + let wrap = data_encoding::Wrap { width, separator }; + let from = string(data)?; + let to = string(data)?; + let translate = data_encoding::Translate { from, to }; + Some(Specification { + symbols, + bit_order, + check_trailing_bits, + padding, + ignore, + wrap, + translate, + }) +} + +pub fn bytes<'a>(data: &'_ mut &'a [u8], len: usize) -> &'a [u8] { + let len = std::cmp::min(len, data.len()); + let res = &data[.. len]; + *data = &data[len ..]; + res +} + +pub fn nat(data: &mut &[u8], min: usize, max: usize) -> usize { + let log = match (max - min).checked_ilog2() { + None => return min, + Some(x) => x, + }; + let mut res = 0; + for _ in 0 .. log / 8 + 1 { + res = (res << 8) | generate(data, 0, 255) as usize; + } + if usize::MIN < min || max < usize::MAX { + res = min + res % (max - min + 1); + } + res +} + +fn flip(data: &mut &[u8]) -> bool { + generate(data, 0, 1) == 1 +} + +fn string(data: &mut &[u8]) -> Option { + let len = generate(data, 0, 255) as usize; + String::from_utf8(bytes(data, len).to_vec()).ok() +} + +fn generate(data: &mut &[u8], min: u8, max: u8) -> u8 { + if data.is_empty() { + return min; + } + let mut res = data[0]; + if min > 0 || max < 255 { + res = min + data[0] % (max - min + 1); + } + *data = &data[1 ..]; + res +} + +struct Ascii { + free: Vec, + used: Vec, +} + +impl Ascii { + fn new() -> Ascii { + Ascii { free: (0 .. 128).collect(), used: Vec::with_capacity(128) } + } + + fn next_free(&mut self, data: &mut &[u8]) -> char { + let res = self.free.swap_remove(generate(data, 0, self.len_free() - 1) as usize); + self.used.push(res); + res as char + } + + fn rev_free(&mut self, x: u8) -> u8 { + let i = self.free.iter().position(|&y| x == y).unwrap(); + assert_eq!(self.free.swap_remove(i), x); + self.used.push(x); + i as u8 + } + + fn next_used(&self, data: &mut &[u8]) -> char { + Ascii::next(&self.used, data) + } + + fn rev_used(&self, x: u8) -> u8 { + Ascii::rev(&self.used, x) + } + + fn next(input: &[u8], data: &mut &[u8]) -> char { + input[generate(data, 0, input.len() as u8 - 1) as usize] as char + } + + fn rev(input: &[u8], x: u8) -> u8 { + input.iter().position(|&y| x == y).unwrap() as u8 + } + + fn len_free(&self) -> u8 { + self.free.len() as u8 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn nat_ok() { + #[track_caller] + fn test(mut data: &[u8], min: usize, max: usize, expected: usize) { + assert_eq!(nat(&mut data, min, max), expected); + assert_eq!(data, &[]); + } + test(&[], 0, 0, 0); + test(&[], 0, 0xffff, 0); + test(&[0], 0, 0xffff, 0); + test(&[0x23], 0, 0xffff, 0x2300); + test(&[0x23, 0x58], 0, 0xffff, 0x2358); + test(&[0x23, 0x58], 0x10000, 0x1ffff, 0x12358); + test(&[0], 0, 1, 0); + test(&[1], 0, 1, 1); + test(&[2], 0, 1, 0); + test(&[128], 0, 255, 128); + test(&[1, 0], 0, 256, 256); + } +} diff --git a/lib/fuzz/src/lib.rs b/lib/fuzz/src/lib.rs index 7f87b67..283b6b5 100644 --- a/lib/fuzz/src/lib.rs +++ b/lib/fuzz/src/lib.rs @@ -1,153 +1,3 @@ -use data_encoding::{DecodePartial, Encoding, Specification}; - -pub fn generate_encoding(data: &mut &[u8]) -> Encoding { - generate_specification(data).encoding().unwrap() -} - -struct Ascii { - free: Vec, - used: Vec, -} - -impl Ascii { - fn new() -> Ascii { - Ascii { free: (0 .. 128).collect(), used: Vec::with_capacity(128) } - } - - fn next_free(&mut self, data: &mut &[u8]) -> char { - let res = self.free.swap_remove(generate(data, 0, self.len_free() - 1) as usize); - self.used.push(res); - res as char - } - - fn next_used(&self, data: &mut &[u8]) -> char { - Ascii::next(&self.used, data) - } - - fn next(input: &[u8], data: &mut &[u8]) -> char { - input[generate(data, 0, input.len() as u8 - 1) as usize] as char - } - - fn len_free(&self) -> u8 { - self.free.len() as u8 - } -} - -pub fn generate_specification(data: &mut &[u8]) -> Specification { - let mut spec = Specification::new(); - let mut ascii = Ascii::new(); - let bit = generate(data, 1, 6); - for _ in 0 .. 1 << bit { - spec.symbols.push(ascii.next_free(data)); - } - if generate(data, 0, 1) == 1 { - spec.bit_order = data_encoding::BitOrder::LeastSignificantFirst; - } - if generate(data, 0, 1) == 1 { - spec.check_trailing_bits = false; - } - if 8 % bit != 0 && generate(data, 0, 1) == 1 { - spec.padding = Some(ascii.next_free(data)); - } - let ignore_translate_len = generate(data, 0, ascii.len_free()); - let ignore_len = generate(data, 0, ignore_translate_len); - let translate_len = ignore_translate_len - ignore_len; - for _ in 0 .. ignore_len { - spec.ignore.push(ascii.next_free(data)); - } - if !spec.ignore.is_empty() { - let dec = match bit { - 1 | 3 | 5 => 8, - 2 | 6 => 4, - 4 => 2, - _ => panic!(), - }; - spec.wrap.width = generate(data, 0, 255) as usize / dec * dec; - if spec.wrap.width > 0 { - for _ in 0 .. generate(data, 1, 255) { - spec.wrap.separator.push(Ascii::next(spec.ignore.as_bytes(), data)); - } - } - } - for _ in 0 .. translate_len { - spec.translate.to.push(ascii.next_used(data)); - } - for _ in 0 .. translate_len { - spec.translate.from.push(ascii.next_free(data)); - } - spec -} - -pub fn generate_bytes<'a>(data: &'_ mut &'a [u8], len: usize) -> &'a [u8] { - let len = std::cmp::min(len, data.len()); - let res = &data[.. len]; - *data = &data[len ..]; - res -} - -pub fn generate_usize(data: &mut &[u8], min: usize, max: usize) -> usize { - let log = match (max - min).checked_ilog2() { - None => return min, - Some(x) => x, - }; - let mut res = 0; - for _ in 0 .. log / 8 + 1 { - res = (res << 8) | generate(data, 0, 255) as usize; - } - if usize::MIN < min || max < usize::MAX { - res = min + res % (max - min + 1); - } - res -} - -pub fn generate(data: &mut &[u8], min: u8, max: u8) -> u8 { - if data.is_empty() { - return min; - } - let mut res = data[0]; - if min > 0 || max < 255 { - res = min + data[0] % (max - min + 1); - } - *data = &data[1 ..]; - res -} - -pub fn decode_prefix(encoding: &Encoding, input: &mut &[u8]) -> Vec { - if let Err(e) = encoding.decode_len(input.len()) { - *input = &input[.. e.position]; - } - let mut output = vec![0; encoding.decode_len(input.len()).unwrap()]; - match encoding.decode_mut(input, &mut output) { - Ok(len) => output.truncate(len), - Err(DecodePartial { read, written, .. }) => { - *input = &input[.. read]; - output.truncate(written) - } - } - output -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn generate_usize_ok() { - #[track_caller] - fn test(mut data: &[u8], min: usize, max: usize, expected: usize) { - assert_eq!(generate_usize(&mut data, min, max), expected); - assert_eq!(data, &[]); - } - test(&[], 0, 0, 0); - test(&[], 0, 0xffff, 0); - test(&[0], 0, 0xffff, 0); - test(&[0x23], 0, 0xffff, 0x2300); - test(&[0x23, 0x58], 0, 0xffff, 0x2358); - test(&[0x23, 0x58], 0x10000, 0x1ffff, 0x12358); - test(&[0], 0, 1, 0); - test(&[1], 0, 1, 1); - test(&[2], 0, 1, 0); - test(&[128], 0, 255, 128); - test(&[1, 0], 0, 256, 256); - } -} +pub mod cmd; +pub mod gen; +pub mod spec; diff --git a/lib/fuzz/src/spec.rs b/lib/fuzz/src/spec.rs new file mode 100644 index 0000000..20cfda7 --- /dev/null +++ b/lib/fuzz/src/spec.rs @@ -0,0 +1,163 @@ +//! Reference implementation of the specification. + +use data_encoding::{BitOrder, Specification}; + +pub fn encode(spec: &Specification, input: &[u8]) -> String { + // Make sure the specification is valid. + assert!(spec.encoding().is_ok()); + // Define short variables. + let symbols = spec.symbols.as_bytes(); + let bit = symbols.len().trailing_zeros() as usize; + let msb = spec.bit_order == BitOrder::MostSignificantFirst; + // Convert from base256 to binary and from binary to baseX. + let mut output = bits_value(bit, msb, &value_bits(8, msb, input)); + // Convert from values to symbols. + output.iter_mut().for_each(|x| *x = symbols[*x as usize]); + // Pad to the next `dec(bit)` boundary, if needed. + if let Some(pad) = spec.padding { + while output.len() % dec(bit) != 0 { + output.push(pad as u8); + } + } + // Wrap every `width` bytes with `separator`, if needed. Including a possibly partial last row. + if spec.wrap.width != 0 { + for row in std::mem::take(&mut output).chunks(spec.wrap.width) { + output.extend_from_slice(row); + output.extend_from_slice(spec.wrap.separator.as_bytes()); + } + } + // Cast the symbols to a string. + String::from_utf8(output).unwrap() +} + +pub fn decode(spec: &Specification, input: &[u8]) -> Option> { + // Make sure the specification is valid. + assert!(spec.encoding().is_ok()); + // Define short variables. + let symbols = spec.symbols.as_bytes(); + let bit = symbols.len().trailing_zeros() as usize; + let xlate = &spec.translate; + // Make sure we also ignore the separators. + let mut ignore = spec.ignore.as_bytes().to_vec(); + ignore.extend_from_slice(spec.wrap.separator.as_bytes()); + // Translate and ignore bytes as needed. Only symbols and padding are left (for valid input). + let input: Vec = input + .iter() + .map(|&x| xlate.from.bytes().position(|y| y == x).map_or(x, |i| xlate.to.as_bytes()[i])) + .filter(|x| !ignore.contains(x)) + .collect(); + // Decode by blocks of `dec(bit)` bytes. Only the last one may be partial. + let mut output = Vec::new(); + for block in input.chunks(dec(bit)) { + output.extend_from_slice(&decode_block(spec, block)?); + } + Some(output) +} + +fn decode_block(spec: &Specification, mut input: &[u8]) -> Option> { + // Define short variables. + let bit = spec.symbols.len().trailing_zeros() as usize; + let msb = spec.bit_order == BitOrder::MostSignificantFirst; + // Remove padding, if needed. + if let Some(pad) = spec.padding { + // There are no partial blocks with padding. + if input.len() != dec(bit) { + return None; + } + // Repeatedly remove last byte, if padding. + while *input.last()? == pad as u8 { + input = &input[.. input.len() - 1]; + } + } + // Convert from symbols to values. + let input = input.iter().map(|&x| value_symbol(spec, x)).collect::>>()?; + // Convert from baseX to binary. + let mut bits = value_bits(bit, msb, &input); + // Check trailing bits (leading bits of the binary number that don't form a full byte). + let trail = bits.len() % 8; + if 0 < trail { + // The trailing bits should not contain a full symbol. + if bit <= trail { + return None; + } + // The trailing bits should be zero, if checked. + let trail = bits.split_off(bits.len() - trail); + if spec.check_trailing_bits && trail.iter().any(|x| *x) { + return None; + } + } + // A block cannot be composed of padding only. + if bits.is_empty() { + return None; + } + // Convert from binary to base256. + Some(bits_value(8, msb, &bits)) +} + +fn value_symbol(spec: &Specification, symbol: u8) -> Option { + // The value of a symbol is its position in the specification. + spec.symbols.bytes().position(|x| x == symbol).map(|x| x as u8) +} + +fn value_bits(bit: usize, msb: bool, input: &[u8]) -> Vec { + // Convert from binary to baseX. + let mut output = Vec::new(); + for &x in input { + for i in order(msb, bit) { + output.push(x & (1 << i) != 0); + } + } + output +} + +fn bits_value(bit: usize, msb: bool, input: &[bool]) -> Vec { + // Convert from baseX to binary. + let mut output = Vec::new(); + for bits in input.chunks(bit) { + output.push(order(msb, bit).zip(bits).map(|(i, &b)| (b as u8) << i).sum()); + } + output +} + +fn order(msb: bool, n: usize) -> Box> { + // Iterate from 0 to n - 1, or the opposite if most significant bit first. + if msb { + Box::new((0 .. n).rev()) + } else { + Box::new(0 .. n) + } +} + +fn enc(bit: usize) -> usize { + // Input block size for encoding, output block size for decoding. + match bit { + 1 | 2 | 4 => 1, + 3 | 6 => 3, + 5 => 5, + _ => unreachable!(), + } +} + +fn dec(bit: usize) -> usize { + // Input block size for decoding, output block size for encoding. + enc(bit) * 8 / bit +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn value_bits_ok() { + #[track_caller] + fn test(bit: usize, msb: bool, values: &[u8], bits: &[u8]) { + let bits: Vec<_> = bits.into_iter().map(|&x| x == 1).collect(); + assert_eq!(value_bits(bit, msb, values), bits); + assert_eq!(bits_value(bit, msb, &bits), values); + } + test(8, true, &[0xc5, 0x69], &[1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1]); + test(8, false, &[0xc5, 0x69], &[1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0]); + test(6, true, &[0x36, 0x2c], &[1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0]); + test(6, false, &[0x36, 0x2c], &[0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1]); + } +} diff --git a/lib/fuzz/test.sh b/lib/fuzz/test.sh new file mode 100755 index 0000000..8760344 --- /dev/null +++ b/lib/fuzz/test.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +for target in $(cargo fuzz list); do + cargo fuzz run $target -- -runs=0 +done diff --git a/xtask/src/main.rs b/xtask/src/main.rs index fc3c0bf..7a40c79 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -119,6 +119,7 @@ impl Action { (Task::Format, _) => &["--", "--check"], (Task::Clippy, _) => &["--", "--deny=warnings"], (Task::Build, Dir::Nostd) => &["--release"], + (Task::Test, Dir::Fuzz) => &["--lib"], (Task::Miri, _) => &["test"], (Task::SemverChecks, _) => &["check-release"], (Task::Audit, _) => &["--deny=warnings"], @@ -149,6 +150,12 @@ impl Action { }; instructions *= &[&["--features=alloc"]]; } + if self.dir == Dir::Fuzz && matches!(self.task, Task::Clippy | Task::Build) { + let mut instruction = instructions.0[0].clone(); + instructions.0[0].args.splice(0 .. 0, ["--lib", "--examples"].map(|x| x.to_string())); + instruction.env.push(("RUSTFLAGS".to_string(), "--cfg=fuzzing".to_string())); + instructions += instruction; + } if self.dir == Dir::Bin && matches!(self.task, Task::Test | Task::Bench) { instructions = Instructions::default(); instructions += Instruction {