diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 72f3d12..eda0c5b 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -25,6 +25,13 @@ jobs: run: cargo test --verbose --all-features --all - name: Build Examples run: cargo build --examples --all-features --all + kani: + name: Kani Verification + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - run: rustup update + - uses: model-checking/kani-github-action clippy: name: Clippy runs-on: ubuntu-latest diff --git a/Cargo.toml b/Cargo.toml index 4f7b725..9059729 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,3 +40,9 @@ members = ["./fuzz"] [dev-dependencies] exhaustigen = "0.1.0" + +[target.'cfg(kani)'.dependencies] +paste = "1.0" + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)'] } diff --git a/fuzz/fuzz_targets/int_in_range.rs b/fuzz/fuzz_targets/int_in_range.rs index 14c07f0..fdc391f 100755 --- a/fuzz/fuzz_targets/int_in_range.rs +++ b/fuzz/fuzz_targets/int_in_range.rs @@ -12,27 +12,27 @@ fn fuzz(data: &[u8]) -> Result<()> { let mut u = Unstructured::new(data); let choices = [ - assert_in_range::, - assert_in_range::, - assert_in_range::, - assert_in_range::, - assert_in_range::, - assert_in_range::, - assert_in_range::, - assert_in_range::, - assert_in_range::, - assert_in_range::, - assert_in_range::, - assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, ]; let f = u.choose(&choices[..])?; f(&mut u) } -fn assert_in_range<'a, 'b, T>(u: &'a mut Unstructured<'b>) -> Result<()> +fn assert_in_range<'a, 'b, T, const BYTES: usize>(u: &'a mut Unstructured<'b>) -> Result<()> where - T: Arbitrary<'b> + Int + Display, + T: Arbitrary<'b> + Int + Display, { let range = RangeInclusive::::arbitrary(u)?; let start = *range.start(); diff --git a/src/back_note.md b/src/back_note.md new file mode 100644 index 0000000..4532caa --- /dev/null +++ b/src/back_note.md @@ -0,0 +1,2 @@ +Length information is generally encoded here. +See [UnstructuredBuilder::extend_from_dearbitrary_iter_rev_with_length] diff --git a/src/dearbitrary/error.rs b/src/dearbitrary/error.rs new file mode 100644 index 0000000..458472f --- /dev/null +++ b/src/dearbitrary/error.rs @@ -0,0 +1,41 @@ +use std::{error, fmt}; + +/// An enumeration of dearbitrartion errors +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum Error { + /// The instance's size is unsupported by its corresponding [Arbitrary] type + TooLarge, + /// The instance's details are too specific to this platform to be represented by its corresponding [Arbitrary] type, + TooSpecific +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Error::TooLarge => write!( + f, + "This type instance is too large to be losslessly reconstructed by Arbitrary after dearbitration." + ), + Error::TooSpecific => write!( + f, + "This type instance is too specific to the platform to be lossly reconstructed by Arbitrary after dearbitration on other platforms." + ), + } + } +} + +impl error::Error for Error {} + +/// A `Result` with the error type fixed as `arbitrary::Error`. +/// +/// Either an `Ok(T)` or `Err(arbitrary::Error)`. +pub type Result = std::result::Result; + +#[cfg(test)] +mod tests { + #[test] + fn can_use_custom_error_types_with_result() -> super::Result<(), String> { + Ok(()) + } +} diff --git a/src/dearbitrary/mod.rs b/src/dearbitrary/mod.rs new file mode 100644 index 0000000..7edf3bf --- /dev/null +++ b/src/dearbitrary/mod.rs @@ -0,0 +1,2 @@ +mod error; +pub use error::*; diff --git a/src/lib.rs b/src/lib.rs index 6ff98f3..86d1698 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,6 +32,10 @@ pub mod unstructured; #[doc(inline)] pub use unstructured::Unstructured; +mod dearbitrary; +pub use dearbitrary::{Error as DearbitraryError, Result as DearbitraryResult}; +pub use unstructured::UnstructuredBuilder; + pub mod size_hint; use core::array; @@ -294,8 +298,8 @@ pub trait Arbitrary<'a>: Sized { /// // parameter, like what is done here, so that you can't /// // accidentally use the wrong depth. /// size_hint::or( - /// ::size_hint(depth), - /// ::size_hint(depth), + /// L::size_hint(depth), + /// R::size_hint(depth), /// ) /// }) /// } @@ -310,6 +314,73 @@ pub trait Arbitrary<'a>: Sized { } } +/// Generate raw bytes from structured values. +/// +/// The `Dearbitrary` trait is the inverse of the [Arbitrary] trait, and it allows +/// you to generate bytes that can be later turned into the exact value +/// by the [Arbitrary] trait. +/// +/// However, this trait does not return bytes directly, as bytes can also be read from +/// the end of the stream to better assist fuzzers. +/// +/// Due to the end-writing capabilities, dearbitration happens in reverse. +pub trait Dearbitrary<'a>: Arbitrary<'a> { + /// Apply `Self` to an [UnstructuredBuilder]. + /// + /// **Note**: This must be built in reverse. + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()>; +} + +/// Macro that automatically derives some kani proof. +/// Only works for non-generic types. +#[cfg(kani)] +macro_rules! kani_dearbitrary { + ($tt:tt : $ident:ident) => { + paste::paste! { + #[kani::proof] + #[kani::unwind(10)] + fn [< prove_dearbitrary_ $ident >]() { + let instance: $tt = kani::any(); + let mut builder = UnstructuredBuilder::new(); + instance.dearbitrary(&mut builder).expect("Failure on dearbitration"); + let bytes = builder.collect(); + let mut unstructured = Unstructured::new(&bytes); + let new_instance = <$tt>::arbitrary(&mut unstructured).expect("Failure on rearbitration"); + assert_eq!(instance, new_instance); + } + } + }; +} + +/// Macro that automatically derives some kani proof. +/// Only works for non-generic types. +/// Ensures that there is one-to-one between the byte representation and the type. +#[cfg(kani)] +macro_rules! kani_dearbitrary_lossless { + ($tt:tt : $ident:ident) => { + paste::paste! { + #[kani::proof] + #[kani::unwind(10)] + fn [< prove_dearbitrary_ $ident >]() { + let instance: $tt = kani::any(); + let bytes = { + let mut builder = UnstructuredBuilder::new(); + instance.dearbitrary(&mut builder).expect("Failure on instance dearbitration"); + builder.collect() + }; + let new_bytes = { + let mut unstructured = Unstructured::new(&bytes); + let new_instance = <$tt>::arbitrary(&mut unstructured).expect("Failure on rearbitration"); + let mut builder = UnstructuredBuilder::new(); + new_instance.dearbitrary(&mut builder).expect("Failure on new_instance dearbitration"); + builder.collect() + }; + assert_eq!(bytes, new_bytes); + } + } + }; +} + impl<'a> Arbitrary<'a> for () { fn arbitrary(_: &mut Unstructured<'a>) -> Result { Ok(()) @@ -321,19 +392,37 @@ impl<'a> Arbitrary<'a> for () { } } +impl<'a> Dearbitrary<'a> for () { + fn dearbitrary(&self, _: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + Ok(()) + } +} + +#[cfg(kani)] +kani_dearbitrary_lossless!(() : empty); + impl<'a> Arbitrary<'a> for bool { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - Ok(>::arbitrary(u)? & 1 == 1) + Ok(u8::arbitrary(u)? & 1 == 1) } #[inline] fn size_hint(depth: usize) -> (usize, Option) { - >::size_hint(depth) + u8::size_hint(depth) } } -macro_rules! impl_arbitrary_for_integers { - ( $( $ty:ty; )* ) => { +impl<'a> Dearbitrary<'a> for bool { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + if *self { u8::dearbitrary(&1, builder) } else { u8::dearbitrary(&0, builder) } + } +} + +#[cfg(kani)] +kani_dearbitrary!(bool : bool); + +macro_rules! impl_all_arbitrary_for_integers { + ( $( $ty:ty : $ident:ident; )* ) => { $( impl<'a> Arbitrary<'a> for $ty { fn arbitrary(u: &mut Unstructured<'a>) -> Result { @@ -349,24 +438,36 @@ macro_rules! impl_arbitrary_for_integers { } } + + impl<'a> Dearbitrary<'a> for $ty { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + // be_bytes is in inverse order of le_bytes, and lets us take advantage + // of any platform optimizations + builder.extend_front_from_slice(&self.to_be_bytes()); + Ok(()) + } + } + + #[cfg(kani)] + kani_dearbitrary!($ty : $ident); )* } } -impl_arbitrary_for_integers! { - u8; - u16; - u32; - u64; - u128; - i8; - i16; - i32; - i64; - i128; +impl_all_arbitrary_for_integers! { + u8 : u8; + u16 : u16; + u32 : u32; + u64 : u64; + u128 : u128; + i8 : i8; + i16 : i16; + i32 : i32; + i64 : i64; + i128 : i128; } -// Note: We forward Arbitrary for i/usize to i/u64 in order to simplify corpus +// Note: We forward (De)arbitrary for i/usize to i/u64 in order to simplify corpus // compatibility between 32-bit and 64-bit builds. This introduces dead space in // 32-bit builds but keeps the input layout independent of the build platform. impl<'a> Arbitrary<'a> for usize { @@ -376,10 +477,19 @@ impl<'a> Arbitrary<'a> for usize { #[inline] fn size_hint(depth: usize) -> (usize, Option) { - ::size_hint(depth) + u64::size_hint(depth) + } +} + +impl<'a> Dearbitrary<'a> for usize { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + u64::dearbitrary(&(*self as u64), builder) } } +#[cfg(kani)] +kani_dearbitrary_lossless!(usize : usize); + impl<'a> Arbitrary<'a> for isize { fn arbitrary(u: &mut Unstructured<'a>) -> Result { u.arbitrary::().map(|x| x as isize) @@ -387,28 +497,46 @@ impl<'a> Arbitrary<'a> for isize { #[inline] fn size_hint(depth: usize) -> (usize, Option) { - ::size_hint(depth) + i64::size_hint(depth) + } +} + +impl<'a> Dearbitrary<'a> for isize { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + i64::dearbitrary(&(*self as i64), builder) } } -macro_rules! impl_arbitrary_for_floats { +#[cfg(kani)] +kani_dearbitrary_lossless!(isize : isize); + +macro_rules! impl_all_arbitrary_for_floats { ( $( $ty:ident : $unsigned:ty; )* ) => { $( impl<'a> Arbitrary<'a> for $ty { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - Ok(Self::from_bits(<$unsigned as Arbitrary<'a>>::arbitrary(u)?)) + Ok(Self::from_bits(<$unsigned>::arbitrary(u)?)) } #[inline] fn size_hint(depth: usize) -> (usize, Option) { - <$unsigned as Arbitrary<'a>>::size_hint(depth) + <$unsigned>::size_hint(depth) } } + + impl<'a> Dearbitrary<'a> for $ty { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + <$unsigned>::dearbitrary(&self.to_bits(), builder) + } + } + + #[cfg(kani)] + kani_dearbitrary_lossless!($ty : $ty); )* } } -impl_arbitrary_for_floats! { +impl_all_arbitrary_for_floats! { f32: u32; f64: u64; } @@ -420,7 +548,7 @@ impl<'a> Arbitrary<'a> for char { const CHAR_END: u32 = 0x11_0000; // The size of the surrogate blocks const SURROGATES_START: u32 = 0xD800; - let mut c = >::arbitrary(u)? % CHAR_END; + let mut c = u32::arbitrary(u)? % CHAR_END; if let Some(c) = char::from_u32(c) { Ok(c) } else { @@ -433,10 +561,22 @@ impl<'a> Arbitrary<'a> for char { #[inline] fn size_hint(depth: usize) -> (usize, Option) { - >::size_hint(depth) + u32::size_hint(depth) + } +} + +impl<'a> Dearbitrary<'a> for char { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + (*self as u32).dearbitrary(builder) } } +#[cfg(kani)] +kani_dearbitrary_lossless!(char : char); + +// Note: We don't derive Dearbitrary for any atomics +// because of having to specify the precise order to retrieve +// the value from the atomic impl<'a> Arbitrary<'a> for AtomicBool { fn arbitrary(u: &mut Unstructured<'a>) -> Result { Arbitrary::arbitrary(u).map(Self::new) @@ -444,7 +584,7 @@ impl<'a> Arbitrary<'a> for AtomicBool { #[inline] fn size_hint(depth: usize) -> (usize, Option) { - >::size_hint(depth) + bool::size_hint(depth) } } @@ -455,7 +595,7 @@ impl<'a> Arbitrary<'a> for AtomicIsize { #[inline] fn size_hint(depth: usize) -> (usize, Option) { - >::size_hint(depth) + isize::size_hint(depth) } } @@ -466,7 +606,7 @@ impl<'a> Arbitrary<'a> for AtomicUsize { #[inline] fn size_hint(depth: usize) -> (usize, Option) { - >::size_hint(depth) + usize::size_hint(depth) } } @@ -493,6 +633,15 @@ macro_rules! impl_range { $size_hint_closure(depth) } } + + impl<'a, A> Dearbitrary<'a> for $range + where + A: Dearbitrary<'a> + Clone + PartialOrd, + { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + $value_closure(self).dearbitrary(builder) + } + } }; } @@ -502,8 +651,8 @@ impl_range!( (A, A), bounded_range(|(a, b)| a..b), |depth| crate::size_hint::and( - ::size_hint(depth), - ::size_hint(depth) + A::size_hint(depth), + A::size_hint(depth) ) ); impl_range!( @@ -511,7 +660,7 @@ impl_range!( |r: &RangeFrom| r.start.clone(), A, unbounded_range(|a| a..), - |depth| ::size_hint(depth) + |depth| A::size_hint(depth) ); impl_range!( RangeInclusive, @@ -519,8 +668,8 @@ impl_range!( (A, A), bounded_range(|(a, b)| a..=b), |depth| crate::size_hint::and( - ::size_hint(depth), - ::size_hint(depth) + A::size_hint(depth), + A::size_hint(depth) ) ); impl_range!( @@ -528,14 +677,14 @@ impl_range!( |r: &RangeTo| r.end.clone(), A, unbounded_range(|b| ..b), - |depth| ::size_hint(depth) + |depth| A::size_hint(depth) ); impl_range!( RangeToInclusive, |r: &RangeToInclusive| r.end.clone(), A, unbounded_range(|b| ..=b), - |depth| ::size_hint(depth) + |depth| A::size_hint(depth) ); pub(crate) fn bounded_range(bounds: (I, I), cb: CB) -> R @@ -562,7 +711,7 @@ where impl<'a> Arbitrary<'a> for Duration { fn arbitrary(u: &mut Unstructured<'a>) -> Result { Ok(Self::new( - ::arbitrary(u)?, + u64::arbitrary(u)?, u.int_in_range(0..=999_999_999)?, )) } @@ -570,15 +719,25 @@ impl<'a> Arbitrary<'a> for Duration { #[inline] fn size_hint(depth: usize) -> (usize, Option) { crate::size_hint::and( - ::size_hint(depth), - ::size_hint(depth), + u64::size_hint(depth), + u32::size_hint(depth), ) } } +impl<'a> Dearbitrary<'a> for Duration { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.subsec_nanos().dearbitrary(builder)?; + self.as_secs().dearbitrary(builder) + } +} + +#[cfg(kani)] +kani_dearbitrary!(Duration : duration); + impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Option { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - Ok(if >::arbitrary(u)? { + Ok(if u.arbitrary()? { Some(Arbitrary::arbitrary(u)?) } else { None @@ -588,37 +747,66 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Option { #[inline] fn size_hint(depth: usize) -> (usize, Option) { crate::size_hint::and( - ::size_hint(depth), - crate::size_hint::or((0, Some(0)), ::size_hint(depth)), + bool::size_hint(depth), + crate::size_hint::or((0, Some(0)), A::size_hint(depth)), ) } } +impl<'a, A: Dearbitrary<'a>> Dearbitrary<'a> for Option { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + match self { + Some(value) => { + value.dearbitrary(builder)?; + true.dearbitrary(builder) + } + None => { + false.dearbitrary(builder) + } + } + } +} + impl<'a, A: Arbitrary<'a>, B: Arbitrary<'a>> Arbitrary<'a> for std::result::Result { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - Ok(if >::arbitrary(u)? { - Ok(::arbitrary(u)?) + Ok(if u.arbitrary()? { + Ok(A::arbitrary(u)?) } else { - Err(::arbitrary(u)?) + Err(B::arbitrary(u)?) }) } #[inline] fn size_hint(depth: usize) -> (usize, Option) { crate::size_hint::and( - ::size_hint(depth), + bool::size_hint(depth), crate::size_hint::or( - ::size_hint(depth), - ::size_hint(depth), + A::size_hint(depth), + A::size_hint(depth), ), ) } } +impl <'a, A: Dearbitrary<'a>, B: Dearbitrary<'a>> Dearbitrary<'a> for std::result::Result { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + match self { + Result::Ok(value) => { + value.dearbitrary(builder)?; + true.dearbitrary(builder) + } + Result::Err(value) => { + value.dearbitrary(builder)?; + false.dearbitrary(builder) + } + } + } +} + macro_rules! arbitrary_tuple { () => {}; - ($last: ident $($xs: ident)*) => { - arbitrary_tuple!($($xs)*); + ($lidx: tt $last: ident $($idx: tt $xs: ident)*) => { + arbitrary_tuple!($($idx $xs)*); impl<'a, $($xs: Arbitrary<'a>,)* $last: Arbitrary<'a>> Arbitrary<'a> for ($($xs,)* $last,) { fn arbitrary(u: &mut Unstructured<'a>) -> Result { @@ -635,14 +823,22 @@ macro_rules! arbitrary_tuple { #[inline] fn size_hint(depth: usize) -> (usize, Option) { crate::size_hint::and_all(&[ - <$last as Arbitrary>::size_hint(depth), - $( <$xs as Arbitrary>::size_hint(depth) ),* + <$last>::size_hint(depth), + $( <$xs>::size_hint(depth) ),* ]) } } + + impl<'a, $($xs: Dearbitrary<'a>,)* $last: Dearbitrary<'a>> Dearbitrary<'a> for ($($xs,)* $last,) { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.$lidx.dearbitrary(builder)?; + $( self.$idx.dearbitrary(builder)?; )* + Ok(()) + } + } }; } -arbitrary_tuple!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z); +arbitrary_tuple!(25 A 24 B 23 C 22 D 21 E 20 F 19 G 18 H 17 I 16 J 15 K 14 L 13 M 12 N 11 O 10 P 9 Q 8 R 7 S 6 T 5 U 4 V 3 W 2 X 1 Y 0 Z); // Helper to safely create arrays since the standard library doesn't // provide one yet. Shouldn't be necessary in the future. @@ -703,11 +899,21 @@ where #[inline] fn size_hint(d: usize) -> (usize, Option) { crate::size_hint::and_all(&array::from_fn::<_, N, _>(|_| { - ::size_hint(d) + T::size_hint(d) })) } } +impl<'a, T, const N: usize> Dearbitrary<'a> for [T; N] +where + T: Dearbitrary<'a> + Clone, +{ + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + builder.extend_from_dearbitrary_iter_rev(self.iter().cloned())?; + Ok(()) + } +} + impl<'a> Arbitrary<'a> for &'a [u8] { fn arbitrary(u: &mut Unstructured<'a>) -> Result { let len = u.arbitrary_len::()?; @@ -724,6 +930,12 @@ impl<'a> Arbitrary<'a> for &'a [u8] { } } +impl<'a> Dearbitrary<'a> for &'a [u8] { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + builder.extend_from_dearbitrary_iter_rev_with_length(self.iter().cloned()) + } +} + impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Vec { fn arbitrary(u: &mut Unstructured<'a>) -> Result { u.arbitrary_iter()?.collect() @@ -739,6 +951,13 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Vec { } } +impl<'a, A: Dearbitrary<'a> + Clone> Dearbitrary<'a> for Vec { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + builder.extend_from_dearbitrary_iter_rev(self.iter().cloned())?; + Ok(()) + } +} + impl<'a, K: Arbitrary<'a> + Ord, V: Arbitrary<'a>> Arbitrary<'a> for BTreeMap { fn arbitrary(u: &mut Unstructured<'a>) -> Result { u.arbitrary_iter()?.collect() @@ -754,6 +973,13 @@ impl<'a, K: Arbitrary<'a> + Ord, V: Arbitrary<'a>> Arbitrary<'a> for BTreeMap + Ord + Clone, V: Dearbitrary<'a> + Clone> Dearbitrary<'a> for BTreeMap { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + builder.extend_from_dearbitrary_iter_rev(self.iter().map(|(k, v)| (k.clone(), v.clone())))?; + Ok(()) + } +} + impl<'a, A: Arbitrary<'a> + Ord> Arbitrary<'a> for BTreeSet { fn arbitrary(u: &mut Unstructured<'a>) -> Result { u.arbitrary_iter()?.collect() @@ -769,9 +995,16 @@ impl<'a, A: Arbitrary<'a> + Ord> Arbitrary<'a> for BTreeSet { } } +impl<'a, A: Dearbitrary<'a> + Ord + Clone> Dearbitrary<'a> for BTreeSet { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + builder.extend_from_dearbitrary_iter_rev(self.iter().cloned())?; + Ok(()) + } +} + impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Bound { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - match u.int_in_range::(0..=2)? { + match u.int_in_range::(0..=2)? { 0 => Ok(Bound::Included(A::arbitrary(u)?)), 1 => Ok(Bound::Excluded(A::arbitrary(u)?)), 2 => Ok(Bound::Unbounded), @@ -788,6 +1021,24 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Bound { } } +impl<'a, A: Dearbitrary<'a>> Dearbitrary<'a> for Bound { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + match self { + Bound::Included(value) => { + value.dearbitrary(builder)?; + (0 as u8).dearbitrary(builder) + } + Bound::Excluded(value) => { + value.dearbitrary(builder)?; + (1 as u8).dearbitrary(builder) + }, + Bound::Unbounded => { + (2 as u8).dearbitrary(builder) + } + } + } +} + impl<'a, A: Arbitrary<'a> + Ord> Arbitrary<'a> for BinaryHeap { fn arbitrary(u: &mut Unstructured<'a>) -> Result { u.arbitrary_iter()?.collect() @@ -803,6 +1054,13 @@ impl<'a, A: Arbitrary<'a> + Ord> Arbitrary<'a> for BinaryHeap { } } +impl<'a, A: Dearbitrary<'a> + Ord + Clone> Dearbitrary<'a> for BinaryHeap { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + builder.extend_from_dearbitrary_iter_rev(self.iter().cloned())?; + Ok(()) + } +} + impl<'a, K: Arbitrary<'a> + Eq + ::std::hash::Hash, V: Arbitrary<'a>, S: BuildHasher + Default> Arbitrary<'a> for HashMap { @@ -820,6 +1078,16 @@ impl<'a, K: Arbitrary<'a> + Eq + ::std::hash::Hash, V: Arbitrary<'a>, S: BuildHa } } +impl<'a, K: Dearbitrary<'a> + Eq + Clone + ::std::hash::Hash, V: Dearbitrary<'a> + Clone, S: BuildHasher + Default> + Dearbitrary<'a> for HashMap +{ + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + let mut pairs = self.iter().collect::>(); + pairs.reverse(); + builder.extend_from_dearbitrary_iter(pairs.iter().cloned().map(|(k, v)| (k.clone(), v.clone()))) + } +} + impl<'a, A: Arbitrary<'a> + Eq + ::std::hash::Hash, S: BuildHasher + Default> Arbitrary<'a> for HashSet { @@ -837,6 +1105,14 @@ impl<'a, A: Arbitrary<'a> + Eq + ::std::hash::Hash, S: BuildHasher + Default> Ar } } +impl<'a, A: Dearbitrary<'a> + Eq + Clone + ::std::hash::Hash, S: BuildHasher + Default> Dearbitrary<'a> + for HashSet +{ + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + builder.extend_from_dearbitrary_iter_rev(self.iter().cloned().collect::>().iter().cloned()) + } +} + impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for LinkedList { fn arbitrary(u: &mut Unstructured<'a>) -> Result { u.arbitrary_iter()?.collect() @@ -852,7 +1128,13 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for LinkedList { } } -impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for VecDeque { +impl<'a, A: Dearbitrary<'a> + Clone> Dearbitrary<'a> for LinkedList { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + builder.extend_from_dearbitrary_iter_rev(self.iter().cloned()) + } +} + +impl<'a, A: Arbitrary<'a> + Clone> Arbitrary<'a> for VecDeque { fn arbitrary(u: &mut Unstructured<'a>) -> Result { u.arbitrary_iter()?.collect() } @@ -867,6 +1149,12 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for VecDeque { } } +impl<'a, A: Dearbitrary<'a> + Clone> Dearbitrary<'a> for VecDeque { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + builder.extend_from_dearbitrary_iter_rev(self.iter().cloned()) + } +} + impl<'a, A> Arbitrary<'a> for Cow<'a, A> where A: ToOwned + ?Sized, @@ -879,11 +1167,21 @@ where #[inline] fn size_hint(depth: usize) -> (usize, Option) { crate::size_hint::recursion_guard(depth, |depth| { - <::Owned as Arbitrary>::size_hint(depth) + A::Owned::size_hint(depth) }) } } +impl<'a, A> Dearbitrary<'a> for Cow<'a, A> +where + A: ToOwned + ?Sized, + ::Owned: Dearbitrary<'a>, +{ + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.clone().into_owned().dearbitrary(builder) + } +} + fn arbitrary_str<'a>(u: &mut Unstructured<'a>, size: usize) -> Result<&'a str> { match str::from_utf8(u.peek_bytes(size).unwrap()) { Ok(s) => { @@ -919,24 +1217,36 @@ impl<'a> Arbitrary<'a> for &'a str { } } +impl<'a> Dearbitrary<'a> for &'a str { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + builder.extend_from_dearbitrary_iter_rev_with_length(self.as_bytes().iter().copied()) + } +} + impl<'a> Arbitrary<'a> for String { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - <&str as Arbitrary>::arbitrary(u).map(Into::into) + <&str>::arbitrary(u).map(Into::into) } fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { - <&str as Arbitrary>::arbitrary_take_rest(u).map(Into::into) + <&str>::arbitrary_take_rest(u).map(Into::into) } #[inline] fn size_hint(depth: usize) -> (usize, Option) { - <&str as Arbitrary>::size_hint(depth) + <&str>::size_hint(depth) + } +} + +impl<'a> Dearbitrary<'a> for String { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.as_str().dearbitrary(builder) } } impl<'a> Arbitrary<'a> for CString { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - as Arbitrary>::arbitrary(u).map(|mut x| { + >::arbitrary(u).map(|mut x| { x.retain(|&c| c != 0); // SAFETY: all zero bytes have been removed unsafe { Self::from_vec_unchecked(x) } @@ -945,29 +1255,50 @@ impl<'a> Arbitrary<'a> for CString { #[inline] fn size_hint(depth: usize) -> (usize, Option) { - as Arbitrary>::size_hint(depth) + >::size_hint(depth) + } +} + +impl<'a> Dearbitrary<'a> for CString { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.as_bytes().dearbitrary(builder) } } impl<'a> Arbitrary<'a> for OsString { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - ::arbitrary(u).map(From::from) + String::arbitrary(u).map(From::from) } #[inline] fn size_hint(depth: usize) -> (usize, Option) { - ::size_hint(depth) + String::size_hint(depth) + } +} + +impl<'a> Dearbitrary<'a> for OsString { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.clone().into_string() + .map_err(|_| DearbitraryError::TooSpecific) + .map(|string| string.dearbitrary(builder)) + .and_then(std::convert::identity) } } impl<'a> Arbitrary<'a> for PathBuf { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - ::arbitrary(u).map(From::from) + OsString::arbitrary(u).map(From::from) } #[inline] fn size_hint(depth: usize) -> (usize, Option) { - ::size_hint(depth) + OsString::size_hint(depth) + } +} + +impl<'a> Dearbitrary<'a> for PathBuf { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.clone().into_os_string().dearbitrary(builder) } } @@ -978,7 +1309,13 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Box { #[inline] fn size_hint(depth: usize) -> (usize, Option) { - crate::size_hint::recursion_guard(depth, ::size_hint) + crate::size_hint::recursion_guard(depth, A::size_hint) + } +} + +impl<'a, A: Dearbitrary<'a>> Dearbitrary<'a> for Box { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.as_ref().dearbitrary(builder) } } @@ -997,14 +1334,26 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Box<[A]> { } } +impl<'a, A: Dearbitrary<'a> + Clone> Dearbitrary<'a> for Box<[A]> { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + builder.extend_from_dearbitrary_iter_rev(self.iter().cloned()) + } +} + impl<'a> Arbitrary<'a> for Box { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - ::arbitrary(u).map(|x| x.into_boxed_str()) + String::arbitrary(u).map(|x| x.into_boxed_str()) } #[inline] fn size_hint(depth: usize) -> (usize, Option) { - ::size_hint(depth) + String::size_hint(depth) + } +} + +impl<'a> Dearbitrary<'a> for Box { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.as_ref().dearbitrary(builder) } } @@ -1028,7 +1377,13 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Arc { #[inline] fn size_hint(depth: usize) -> (usize, Option) { - crate::size_hint::recursion_guard(depth, ::size_hint) + crate::size_hint::recursion_guard(depth, A::size_hint) + } +} + +impl<'a, A: Dearbitrary<'a>> Dearbitrary<'a> for Arc { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.as_ref().dearbitrary(builder) } } @@ -1047,14 +1402,27 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Arc<[A]> { } } +impl<'a, A: Dearbitrary<'a> + Clone> Dearbitrary<'a> for Arc<[A]> { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + builder.extend_from_dearbitrary_iter_rev(self.iter().cloned())?; + Ok(()) + } +} + impl<'a> Arbitrary<'a> for Arc { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - <&str as Arbitrary>::arbitrary(u).map(Into::into) + <&str>::arbitrary(u).map(Into::into) } #[inline] fn size_hint(depth: usize) -> (usize, Option) { - <&str as Arbitrary>::size_hint(depth) + <&str>::size_hint(depth) + } +} + +impl<'a> Dearbitrary<'a> for Arc { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.as_ref().dearbitrary(builder) } } @@ -1065,7 +1433,13 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Rc { #[inline] fn size_hint(depth: usize) -> (usize, Option) { - crate::size_hint::recursion_guard(depth, ::size_hint) + crate::size_hint::recursion_guard(depth, A::size_hint) + } +} + +impl<'a, A: Dearbitrary<'a>> Dearbitrary<'a> for Rc { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.as_ref().dearbitrary(builder) } } @@ -1084,17 +1458,30 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Rc<[A]> { } } +impl<'a, A: Dearbitrary<'a> + Clone> Dearbitrary<'a> for Rc<[A]> { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + builder.extend_from_dearbitrary_iter_rev(self.iter().cloned()) + } +} + impl<'a> Arbitrary<'a> for Rc { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - <&str as Arbitrary>::arbitrary(u).map(Into::into) + <&str>::arbitrary(u).map(Into::into) } #[inline] fn size_hint(depth: usize) -> (usize, Option) { - <&str as Arbitrary>::size_hint(depth) + <&str>::size_hint(depth) + } +} + +impl<'a> Dearbitrary<'a> for Rc { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.as_ref().dearbitrary(builder) } } +// Note: Cell does not implement Dearbitrary impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Cell { fn arbitrary(u: &mut Unstructured<'a>) -> Result { Arbitrary::arbitrary(u).map(Self::new) @@ -1106,6 +1493,7 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Cell { } } +// Note: RefCell does not implement Dearbitrary impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for RefCell { fn arbitrary(u: &mut Unstructured<'a>) -> Result { Arbitrary::arbitrary(u).map(Self::new) @@ -1117,6 +1505,7 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for RefCell { } } +// Note: UnsafeCell does not implement Dearbitrary impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for UnsafeCell { fn arbitrary(u: &mut Unstructured<'a>) -> Result { Arbitrary::arbitrary(u).map(Self::new) @@ -1128,6 +1517,7 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for UnsafeCell { } } +// Note: Mutex does not implement Dearbitrary impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Mutex { fn arbitrary(u: &mut Unstructured<'a>) -> Result { Arbitrary::arbitrary(u).map(Self::new) @@ -1150,6 +1540,15 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for iter::Empty { } } +impl<'a, A: Dearbitrary<'a>> Dearbitrary<'a> for iter::Empty { + fn dearbitrary(&self, _: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + Ok(()) + } +} + +// #[cfg(kani)] +// kani_dearbitrary!(iter::Empty : empty); + impl<'a, A: ?Sized> Arbitrary<'a> for ::std::marker::PhantomData { fn arbitrary(_: &mut Unstructured<'a>) -> Result { Ok(::std::marker::PhantomData) @@ -1161,6 +1560,15 @@ impl<'a, A: ?Sized> Arbitrary<'a> for ::std::marker::PhantomData { } } +impl<'a, A: ?Sized> Dearbitrary<'a> for ::std::marker::PhantomData { + fn dearbitrary(&self, _: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + Ok(()) + } +} + +// #[cfg(kani)] +// kani_dearbitrary!(::std::marker::PhantomData : phantom_data); + impl<'a> Arbitrary<'a> for ::std::marker::PhantomPinned { fn arbitrary(_: &mut Unstructured<'a>) -> Result { Ok(::std::marker::PhantomPinned) @@ -1172,6 +1580,15 @@ impl<'a> Arbitrary<'a> for ::std::marker::PhantomPinned { } } +impl<'a> Dearbitrary<'a> for ::std::marker::PhantomPinned { + fn dearbitrary(&self, _: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + Ok(()) + } +} + +// #[cfg(kani)] +// kani_dearbitrary!(::std::marker::PhantomPinned : phantom_pinned); + impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for ::std::num::Wrapping { fn arbitrary(u: &mut Unstructured<'a>) -> Result { Arbitrary::arbitrary(u).map(::std::num::Wrapping) @@ -1184,10 +1601,10 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for ::std::num::Wrapping { } macro_rules! implement_nonzero_int { - ($nonzero:ty, $int:ty) => { + ($nonzero:ty, $int:ty, $ident:ident) => { impl<'a> Arbitrary<'a> for $nonzero { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - match Self::new(<$int as Arbitrary<'a>>::arbitrary(u)?) { + match Self::new(<$int>::arbitrary(u)?) { Some(n) => Ok(n), None => Err(Error::IncorrectFormat), } @@ -1198,21 +1615,30 @@ macro_rules! implement_nonzero_int { <$int as Arbitrary<'a>>::size_hint(depth) } } + + impl<'a> Dearbitrary<'a> for $nonzero { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.get().dearbitrary(builder) + } + } + + #[cfg(kani)] + kani_dearbitrary!($nonzero : $ident); }; } -implement_nonzero_int! { NonZeroI8, i8 } -implement_nonzero_int! { NonZeroI16, i16 } -implement_nonzero_int! { NonZeroI32, i32 } -implement_nonzero_int! { NonZeroI64, i64 } -implement_nonzero_int! { NonZeroI128, i128 } -implement_nonzero_int! { NonZeroIsize, isize } -implement_nonzero_int! { NonZeroU8, u8 } -implement_nonzero_int! { NonZeroU16, u16 } -implement_nonzero_int! { NonZeroU32, u32 } -implement_nonzero_int! { NonZeroU64, u64 } -implement_nonzero_int! { NonZeroU128, u128 } -implement_nonzero_int! { NonZeroUsize, usize } +implement_nonzero_int! { NonZeroI8, i8, nonzero_i8 } +implement_nonzero_int! { NonZeroI16, i16, nonzero_i16 } +implement_nonzero_int! { NonZeroI32, i32, nonzero_i32 } +implement_nonzero_int! { NonZeroI64, i64, nonzero_i64 } +implement_nonzero_int! { NonZeroI128, i128, nonzero_i128 } +implement_nonzero_int! { NonZeroIsize, isize, nonzero_isize } +implement_nonzero_int! { NonZeroU8, u8, nonzero_u8 } +implement_nonzero_int! { NonZeroU16, u16, nonzero_u16 } +implement_nonzero_int! { NonZeroU32, u32, nonzero_u32 } +implement_nonzero_int! { NonZeroU64, u64, nonzero_u64 } +implement_nonzero_int! { NonZeroU128, u128, nonzero_u128 } +implement_nonzero_int! { NonZeroUsize, usize, nonzero_usize } impl<'a> Arbitrary<'a> for Ipv4Addr { fn arbitrary(u: &mut Unstructured<'a>) -> Result { @@ -1225,6 +1651,15 @@ impl<'a> Arbitrary<'a> for Ipv4Addr { } } +impl<'a> Dearbitrary<'a> for Ipv4Addr { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + u32::dearbitrary(&self.to_bits(), builder) + } +} + +// #[cfg(kani)] +// kani_dearbitrary!(Ipv4Addr : ipv4_addr); + impl<'a> Arbitrary<'a> for Ipv6Addr { fn arbitrary(u: &mut Unstructured<'a>) -> Result { Ok(Ipv6Addr::from(u128::arbitrary(u)?)) @@ -1236,6 +1671,15 @@ impl<'a> Arbitrary<'a> for Ipv6Addr { } } +impl<'a> Dearbitrary<'a> for Ipv6Addr { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + u128::dearbitrary(&self.to_bits(), builder) + } +} + +// #[cfg(kani)] +// kani_dearbitrary!(Ipv6Addr : ipv6_addr); + impl<'a> Arbitrary<'a> for IpAddr { fn arbitrary(u: &mut Unstructured<'a>) -> Result { if u.arbitrary()? { @@ -1253,6 +1697,24 @@ impl<'a> Arbitrary<'a> for IpAddr { } } +impl<'a> Dearbitrary<'a> for IpAddr { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + match self { + IpAddr::V4(v4) => { + v4.dearbitrary(builder)?; + true.dearbitrary(builder) + } + IpAddr::V6(v6) => { + v6.dearbitrary(builder)?; + false.dearbitrary(builder) + } + } + } +} + +// #[cfg(kani)] +// kani_dearbitrary!(IpAddr : ip_addr); + impl<'a> Arbitrary<'a> for SocketAddrV4 { fn arbitrary(u: &mut Unstructured<'a>) -> Result { Ok(SocketAddrV4::new(u.arbitrary()?, u.arbitrary()?)) @@ -1263,6 +1725,15 @@ impl<'a> Arbitrary<'a> for SocketAddrV4 { size_hint::and(Ipv4Addr::size_hint(depth), u16::size_hint(depth)) } } +impl<'a> Dearbitrary<'a> for SocketAddrV4 { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.port().dearbitrary(builder)?; + self.ip().dearbitrary(builder) + } +} + +// #[cfg(kani)] +// kani_dearbitrary!(SocketAddrV4 : socket_addr_v4); impl<'a> Arbitrary<'a> for SocketAddrV6 { fn arbitrary(u: &mut Unstructured<'a>) -> Result { @@ -1286,6 +1757,18 @@ impl<'a> Arbitrary<'a> for SocketAddrV6 { } } +impl<'a> Dearbitrary<'a> for SocketAddrV6 { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + self.scope_id().dearbitrary(builder)?; + self.flowinfo().dearbitrary(builder)?; + self.port().dearbitrary(builder)?; + self.ip().dearbitrary(builder) + } +} + +// #[cfg(kani)] +// kani_dearbitrary!(SocketAddrV4 : socket_addr_v6); + impl<'a> Arbitrary<'a> for SocketAddr { fn arbitrary(u: &mut Unstructured<'a>) -> Result { if u.arbitrary()? { @@ -1306,6 +1789,24 @@ impl<'a> Arbitrary<'a> for SocketAddr { } } +impl<'a> Dearbitrary<'a> for SocketAddr { + fn dearbitrary(&self, builder: &mut UnstructuredBuilder) -> DearbitraryResult<()> { + match self { + SocketAddr::V4(v4) => { + v4.dearbitrary(builder)?; + true.dearbitrary(builder) + } + SocketAddr::V6(v6) => { + v6.dearbitrary(builder)?; + false.dearbitrary(builder) + } + } + } +} + +// #[cfg(kani)] +// kani_dearbitrary!(SocketAddr : socket_addr); + #[cfg(test)] mod test { use super::*; @@ -1619,7 +2120,7 @@ mod test { (7, Some(7)), <(bool, u16, i32) as Arbitrary<'_>>::size_hint(0) ); - assert_eq!((1, None), <(u8, Vec) as Arbitrary>::size_hint(0)); + assert_eq!((1, None), <(u8, Vec)>::size_hint(0)); } } diff --git a/src/reverse_note.md b/src/reverse_note.md new file mode 100644 index 0000000..7230648 --- /dev/null +++ b/src/reverse_note.md @@ -0,0 +1,4 @@ +**Note**: When the [UnstructuredBuilder] is finished, the `front` +is reversed to correspond with how the structure would actually look. +If this iterator is meant to mimic arbitrary behavior, either reverse it +before passing it in, or use the more generic [Self::extend_from_dearbitrary_iter_rev]. diff --git a/src/unstructured.rs b/src/unstructured.rs index 639a1fc..2780d2c 100644 --- a/src/unstructured.rs +++ b/src/unstructured.rs @@ -8,7 +8,7 @@ //! Wrappers around raw, unstructured bytes. -use crate::{Arbitrary, Error, Result}; +use crate::{Arbitrary, Dearbitrary, Error, Result, DearbitraryResult}; use std::marker::PhantomData; use std::ops::ControlFlow; use std::{mem, ops}; @@ -216,7 +216,7 @@ impl<'a> Unstructured<'a> { ElementType: Arbitrary<'a>, { let byte_size = self.arbitrary_byte_size()?; - let (lower, upper) = ::size_hint(0); + let (lower, upper) = ElementType::size_hint(0); let elem_size = upper.unwrap_or(lower * 2); let elem_size = std::cmp::max(1, elem_size); Ok(byte_size / elem_size) @@ -291,21 +291,21 @@ impl<'a> Unstructured<'a> { /// assert!(-5_000 <= x); /// assert!(x <= -1_000); /// ``` - pub fn int_in_range(&mut self, range: ops::RangeInclusive) -> Result + pub fn int_in_range(&mut self, range: ops::RangeInclusive) -> Result where - T: Int, + T: Int, { let (result, bytes_consumed) = Self::int_in_range_impl(range, self.data.iter().cloned())?; self.data = &self.data[bytes_consumed..]; Ok(result) } - fn int_in_range_impl( + fn int_in_range_impl( range: ops::RangeInclusive, - mut bytes: impl Iterator, + bytes: impl Iterator + ExactSizeIterator, ) -> Result<(T, usize)> where - T: Int, + T: Int, { let start = *range.start(); let end = *range.end(); @@ -313,7 +313,6 @@ impl<'a> Unstructured<'a> { start <= end, "`arbitrary::Unstructured::int_in_range` requires a non-empty range" ); - // When there is only one possible choice, don't waste any entropy from // the underlying data. if start == end { @@ -330,29 +329,22 @@ impl<'a> Unstructured<'a> { debug_assert_ne!(delta, T::Unsigned::ZERO); // Compute an arbitrary integer offset from the start of the range. We - // do this by consuming `size_of(T)` bytes from the input to create an - // arbitrary integer and then clamping that int into our range bounds - // with a modulo operation. - let mut arbitrary_int = T::Unsigned::ZERO; - let mut bytes_consumed: usize = 0; - - while (bytes_consumed < mem::size_of::()) - && (delta >> T::Unsigned::from_usize(bytes_consumed * 8)) > T::Unsigned::ZERO - { - let byte = match bytes.next() { - None => break, - Some(b) => b, - }; - bytes_consumed += 1; + // do this by consuming `min(size_of(T), bytes representing delta)` + // bytes from the input to create an arbitrary integer and then + // clamping that int into our range bounds with a modulo operation. - // Combine this byte into our arbitrary integer, but avoid - // overflowing the shift for `u8` and `i8`. - arbitrary_int = if mem::size_of::() == 1 { - T::Unsigned::from_u8(byte) - } else { - (arbitrary_int << 8) | T::Unsigned::from_u8(byte) - }; - } + let max_consumable_bytes = std::cmp::min( + BYTES, + delta.min_representable_bytes() as usize + ); + + let taken_bytes = bytes.take(max_consumable_bytes).collect::>(); + + let mut byte_slice = [0u8; BYTES]; + // never panics as consumed_slice.len() is always <= Bytes + byte_slice[0..taken_bytes.len()].copy_from_slice(&taken_bytes); + + let arbitrary_int = T::Unsigned::from_le_bytes(byte_slice); let offset = if delta == T::Unsigned::MAX { arbitrary_int @@ -369,7 +361,7 @@ impl<'a> Unstructured<'a> { debug_assert!(*range.start() <= result); debug_assert!(result <= *range.end()); - Ok((result, bytes_consumed)) + Ok((result, taken_bytes.len())) } /// Choose one of the given choices. @@ -516,9 +508,9 @@ impl<'a> Unstructured<'a> { /// # Ok(()) /// # } /// ``` - pub fn ratio(&mut self, numerator: T, denominator: T) -> Result + pub fn ratio(&mut self, numerator: T, denominator: T) -> Result where - T: Int, + T: Int, { assert!(T::ZERO < numerator); assert!(numerator <= denominator); @@ -745,6 +737,408 @@ impl<'a> Unstructured<'a> { } } +/// An intermediatery building struct for building an [Unstructured] +/// from [Dearbitary]-implementing objects. +pub struct UnstructuredBuilder { + front: Vec, + back: Vec +} + +impl UnstructuredBuilder { + /// Constructs an empty [UnstructuredBuilder]. + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let b = UnstructuredBuilder::new(); + /// ``` + pub fn new() -> Self { + UnstructuredBuilder { + front: vec![], + back: vec![] + } + } + + /// Constructs an [UnstructuredBuilder] with the given data. + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let b = UnstructuredBuilder::new(); + /// ``` + pub fn new_with_vecs(front: Vec, back: Vec) -> Self { + Self { + front, + back + } + } + + /// Pushes a byte to the front of the unstructured value. + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let mut b = UnstructuredBuilder::new(); + /// b.push_front(15u8); + /// b.push_front(7u8); + /// assert_eq!(b.len(), 2); + /// ``` + pub fn push_front(&mut self, value: u8) { + self.front.push(value); + } + + /// Extends the front by an iterator. + /// + /// Since [UnstructuredBuilder]'s `front` works in reverse, + /// this iterator should be reversed from when it is usually arbitrated. + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let mut b = UnstructuredBuilder::new(); + /// b.extend_front(1u8..5u8); + /// assert_eq!(b.len(), 4); + /// ``` + #[doc = include_str!("reverse_note.md")] + pub fn extend_front>(&mut self, iter: T) { + self.front.extend(iter); + } + + /// Dearbitratates every value inside the passed iterator. + /// + /// Generally, you would want to use [UnstructuredBuilder::extend_from_dearbitrary_iter_rev] + /// to reverse the order of the passed iterator, unless this is already purposely in reverse + /// for optimization reasons. + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let mut b = UnstructuredBuilder::new(); + /// b.extend_from_dearbitrary_iter([1u8, 2u8].iter().copied()); + /// let bytes = b.collect(); + /// assert_eq!(bytes, vec![2, 1]); + /// ``` + #[doc = include_str!("reverse_note.md")] + pub fn extend_from_dearbitrary_iter< + 'a, + A: Dearbitrary<'a>, + T: IntoIterator + >(&mut self, iter: T) -> DearbitraryResult<()> { + for value in iter { + value.dearbitrary(self)?; + } + Ok(()) + } + + /// Dearbitrates every value in inside the passed iterator in reverse. + /// + /// This is generally what you would want to do, as the front is later reversed + /// to mimic how this structure would usually be when not built backwards. + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let mut b = UnstructuredBuilder::new(); + /// b.extend_from_dearbitrary_iter_rev([1u8, 2u8].iter().copied()); + /// let bytes = b.collect(); + /// assert_eq!(bytes, vec![1, 2]); + /// ``` + pub fn extend_from_dearbitrary_iter_rev< + 'a, + A: Dearbitrary<'a>, + T: IntoIterator> + >(&mut self, iter: T) -> DearbitraryResult<()> { + for value in iter.into_iter().rev() { + value.dearbitrary(self)?; + } + Ok(()) + } + + /// Extends the front from a slice. + /// + /// Since [UnstructuredBuilder]'s `front` works in reverse, + /// this slice should be reversed from when it is usually arbitrated. + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let mut b = UnstructuredBuilder::new(); + /// b.extend_front_from_slice(&[1u8, 2u8]); + /// let bytes = b.collect(); + /// assert_eq!(bytes, vec![2, 1]); + /// ``` + #[doc = include_str!("reverse_note.md")] + pub fn extend_front_from_slice(&mut self, other: &[u8]) { + self.front.extend_from_slice(other); + } + + /// Pushes a byte to the back of the [UnstructuredBuilder]. + /// + #[doc = include_str!("back_note.md")] + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let mut b = UnstructuredBuilder::new(); + /// b.push_front(1); + /// b.push_back(2); + /// b.push_front(3); + /// b.push_back(4); + /// let bytes = b.collect(); + /// assert_eq!(bytes, vec![3, 1, 2, 4]); + /// ``` + pub fn push_back(&mut self, value: u8) { + self.back.push(value); + } + + /// Extends the back by some iterator. + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let mut b = UnstructuredBuilder::new(); + /// b.extend_back(1u8..=3u8); + /// b.push_front(4); + /// let bytes = b.collect(); + /// assert_eq!(bytes, vec![4, 1, 2, 3]); + /// ``` + #[doc = include_str!("back_note.md")] + pub fn extend_back>(&mut self, iter: T) { + self.back.extend(iter); + } + + /// Extends the back by some slice of bytes. + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let mut b = UnstructuredBuilder::new(); + /// b.extend_back_from_slice([1, 2, 3]); + /// b.push_front(4); + /// let bytes = b.collect(); + /// assert_eq!(bytes, vec![4, 1, 2, 3]); + /// ``` + #[doc = include_str!("back_note.md")] + pub fn extend_back_from_slice(&mut self, other: &[u8]) { + self.back.extend_from_slice(other); + } + + /// Adds an dearbitratable iterator to this `UnstructuredBuilder`, + /// along with its length information. + pub fn extend_from_dearbitrary_iter_rev_with_length<'a, A, T>( + &mut self, + iter: T + ) -> DearbitraryResult<()> + where + A: Dearbitrary<'a>, + T: DoubleEndedIterator + ExactSizeIterator + { + let iter_length = iter.len(); + + self.extend_from_dearbitrary_iter_rev(iter)?; + + // Determine the amount of bytes we need to pretend to encode the max amount of possible + // items in here; This mimics behavior found in `Unstructured::arbitrary_byte_size`. + let byte_count = if self.len() as u64 <= std::u8::MAX as u64 + 1 { + 1 + } else if self.len() as u64 <= std::u16::MAX as u64 + 1 { + 2 + } else if self.len() as u64 <= std::u32::MAX as u64 + 1 { + 4 + } else { + 8 + }; + + let max_size = self.len() - byte_count; + + // Encode the "expected" count of bytes - not the real amount, to help re-arbitrary later on. + // This is the reverse of Unstructured::int_in_range_impl + let (lower, upper) = A::size_hint(0); + let elem_size = upper.unwrap_or(lower * 2); + let elem_size = std::cmp::max(1, elem_size); + let byte_size = iter_length * elem_size; + + self.back_bytes_from_constrained_int::(0..=max_size as u64, byte_size as u64); + + Ok(()) + } + + /// This is the logical inverse of Unstructured::int_in_range_impl. + /// + /// It converts a number and its constrained range to the smallest representable + /// slice of bytes in big endian order. + fn bytes_from_constrained_int( + range: ops::RangeInclusive, + int: T + ) -> Vec + where + T: Int + core::fmt::Debug, + { + let start = *range.start(); + let end = *range.end(); + assert!( + start <= end, + "`UnstructuredBuilder::bytes_from_constrained_int` requires a non-empty range" + ); + assert!( + range.contains(&int), + "`UnstructuredBuilder::bytes_from_constrained_int` requires {int:?} to be inside {range:?}." + ); + + if start == end { + return vec![]; + } + + // int_in_range_impl works on unsigned numbers, which are then later converted to signed (if they + // originally were) + let start = start.to_unsigned(); + let end = end.to_unsigned(); + + let delta = end.wrapping_sub(start); + debug_assert_ne!(delta, T::Unsigned::ZERO); + + // start from the end - we need an unsigned number + let int = int.to_unsigned(); + // offset lossly equals arbitrary_int (offset = arbitrary_int % (delta + 1), remainder information is lost) + let arbitrary_int = int.wrapping_sub(start); + + arbitrary_int.to_le_bytes()[0..arbitrary_int.min_representable_bytes() as usize].to_vec() + } + + /// Extends the front by some integer clasped to a range. + /// + /// This is the inverse of [Unstructured::int_in_range]. + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let mut builder = UnstructuredBuilder::new(); + /// builder.front_bytes_from_constrained_int(0..=u8::MAX, 5u8); + /// assert_eq!(builder.collect(), vec![5]); + /// + /// let mut builder = UnstructuredBuilder::new(); + /// builder.front_bytes_from_constrained_int(5..=20u8, 6u8); + /// assert_eq!(builder.collect(), vec![1]); + /// + /// let mut builder = UnstructuredBuilder::new(); + /// builder.front_bytes_from_constrained_int(1_000u32..=20_000u32, 2_500u32); + /// assert_eq!(builder.collect(), vec![5, 220]); + /// // + /// assert_eq!(u32::from_be_bytes([0u8, 0u8, 5u8, 220u8]), 1_500u32); + /// ``` + pub fn front_bytes_from_constrained_int( + &mut self, + range: ops::RangeInclusive, + int: T + ) + where + T: Int + core::fmt::Debug + { + let bytes = Self::bytes_from_constrained_int(range, int); + self.extend_front(bytes); + } + + /// Extends the back by some integer clasped to a range. + /// + /// This is the inverse of [Unstructured::int_in_range], + /// generally for encoding length information. + /// + /// See [UnstructuredBuilder::extend_from_dearbitrary_iter_rev_with_length]. + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let mut builder = UnstructuredBuilder::new(); + /// builder.back_bytes_from_constrained_int(0..=u8::MAX, 5u8); + /// assert_eq!(builder.collect(), vec![5]); + /// + /// let mut builder = UnstructuredBuilder::new(); + /// builder.back_bytes_from_constrained_int(5..=20u8, 6u8); + /// assert_eq!(builder.collect(), vec![1]); + /// + /// let mut builder = UnstructuredBuilder::new(); + /// builder.back_bytes_from_constrained_int(1_000u32..=20_000u32, 2_500u32); + /// assert_eq!(builder.collect(), vec![5, 220]); + /// // + /// assert_eq!(u32::from_be_bytes([0u8, 0u8, 5u8, 220u8]), 1_500u32); + /// ``` + pub fn back_bytes_from_constrained_int( + &mut self, + range: ops::RangeInclusive, + int: T + ) + where + T: Int + core::fmt::Debug + { + let mut bytes = Self::bytes_from_constrained_int(range, int); + bytes.reverse(); + self.extend_back(bytes); + } + + /// Gets the current amount of bytes inside this `UnstructuredBuilder`. + /// + /// Length is a combination of both sides of this builder. + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let mut b = UnstructuredBuilder::new(); + /// b.extend_back_from_slice(&[1, 2, 3]); + /// b.extend_front_from_slice(&[4, 5, 6]); + /// assert_eq!(b.len(), 6); + /// ``` + pub fn len(&self) -> usize { + self.front.len() + self.back.len() + } + + /// Collects and converts this instance into an owned `Vec`. + /// This can be later be turned back into [Unstructured] or used + /// for corpus generation purposes. + /// + /// # Example + /// + /// ``` + /// use arbitrary::UnstructuredBuilder; + /// + /// let mut b = UnstructuredBuilder::new(); + /// b.extend_back_from_slice(&[1, 2, 3]); + /// b.extend_front_from_slice(&[4, 5, 6]); + /// let bytes = b.collect(); + /// assert_eq!(bytes, vec![6, 5, 4, 1, 2, 3]); + /// ``` + pub fn collect(mut self) -> Vec { + self.front.reverse(); + self.front.extend(self.back); + self.front + } +} + /// Utility iterator produced by [`Unstructured::arbitrary_iter`] pub struct ArbitraryIter<'a, 'b, ElementType> { u: &'b mut Unstructured<'a>, @@ -797,7 +1191,10 @@ impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryTakeRestIter<'a, Elem /// * `isize` /// /// Don't implement this trait yourself. -pub trait Int: +/// +/// The `Bytes` generic defines how many bytes this number would take in the +/// worst possible case platform. `usize` +pub trait Int: Copy + std::fmt::Debug + PartialOrd @@ -809,7 +1206,7 @@ pub trait Int: + ops::BitOr { #[doc(hidden)] - type Unsigned: Int; + type Unsigned: Int; #[doc(hidden)] const ZERO: Self; @@ -840,12 +1237,21 @@ pub trait Int: #[doc(hidden)] fn from_unsigned(unsigned: Self::Unsigned) -> Self; + + #[doc(hidden)] + fn from_le_bytes(bytes: [u8; BYTES]) -> Self; + + #[doc(hidden)] + fn to_le_bytes(self) -> [u8; BYTES]; + + #[doc(hidden)] + fn min_representable_bytes(self) -> u32; } macro_rules! impl_int { - ( $( $ty:ty : $unsigned_ty: ty ; )* ) => { + ( $( $ty:ty : $unsigned_ty: ty : $bytes:expr ; )* ) => { $( - impl Int for $ty { + impl Int<$bytes> for $ty { type Unsigned = $unsigned_ty; const ZERO: Self = 0; @@ -881,24 +1287,38 @@ macro_rules! impl_int { fn from_unsigned(unsigned: $unsigned_ty) -> Self { unsigned as Self } + + fn from_le_bytes(bytes: [u8; $bytes]) -> Self { + <$ty>::from_le_bytes(bytes) + } + + fn to_le_bytes(self) -> [u8; $bytes] { + <$ty>::to_le_bytes(self) + } + + fn min_representable_bytes(self) -> u32 { + // 0 as a number still needs to be represented by one byte for fuzzing + if self == 0 { return 1 }; + (<$ty>::ilog2(self) + 1).div_ceil(8) + } } )* } } impl_int! { - u8: u8; - u16: u16; - u32: u32; - u64: u64; - u128: u128; - usize: usize; - i8: u8; - i16: u16; - i32: u32; - i64: u64; - i128: u128; - isize: usize; + u8: u8 : 1; + u16: u16 : 2; + u32: u32 : 4; + u64: u64 : 8; + u128: u128 : 16; + usize: usize : 8; + i8: u8 : 1; + i16: u16 : 2; + i32: u32 : 4; + i64: u64 : 8; + i128: u128 : 16; + isize: usize : 8; } #[cfg(test)] @@ -929,18 +1349,23 @@ mod tests { assert_eq!(choice, 42) } + #[test] + fn min_representable_bytes_in_bigger_size() { + assert_eq!(1, (u8::MAX as u16).min_representable_bytes()); + } + #[test] fn int_in_range_uses_minimal_amount_of_bytes() { let mut u = Unstructured::new(&[1, 2]); - assert_eq!(1, u.int_in_range::(0..=u8::MAX).unwrap()); + assert_eq!(1, u.int_in_range::(0..=u8::MAX).unwrap()); assert_eq!(u.len(), 1); let mut u = Unstructured::new(&[1, 2]); - assert_eq!(1, u.int_in_range::(0..=u8::MAX as u32).unwrap()); + assert_eq!(1, u.int_in_range::(0..=u8::MAX as u32).unwrap()); assert_eq!(u.len(), 1); let mut u = Unstructured::new(&[1]); - assert_eq!(1, u.int_in_range::(0..=u8::MAX as u32 + 1).unwrap()); + assert_eq!(1, u.int_in_range::(0..=u8::MAX as u32 + 1).unwrap()); assert!(u.is_empty()); } @@ -1048,4 +1473,32 @@ mod tests { assert!(covered, "narrow[{}] should have been generated", i); } } + } + +// #[cfg(kani)] +// mod kani_suite { +// use crate::UnstructuredBuilder; +// use crate::Unstructured; + +// macro_rules! generate_int_check { +// ($type:ty, $f:ident) => { +// #[kani::proof] +// #[kani::unwind(30)] +// fn $f() { +// let first_number: $type = kani::any(); +// let last_number: $type = kani::any(); +// kani::assume(last_number >= first_number); +// let int: $type = kani::any(); +// kani::assume(int >= first_number && int <= last_number); +// let range = first_number..=last_number; +// let bytes = UnstructuredBuilder::bytes_from_constrained_int(range.clone(), int); +// let generated_int = Unstructured::int_in_range_impl(range.clone(), bytes.iter().copied()).unwrap().0; +// assert_eq!(generated_int, int); +// } +// } +// } + +// generate_int_check!(u8, test_u8); +// generate_int_check!(u16, test_u16); +// }