improve symmetry between encoder and decoder (#22)

danieleades · web-flow · commit 49026f5a60d3 · 2022-03-23T17:28:20.000Z
* add 'decode_all' method

* make decoder constructor infallible
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "arithmetic-coding"
 description = "fast and flexible arithmetic coding library"
-version = "0.1.1"
+version = "0.2.0"
 edition = "2021"
 license = "MIT"
 keywords = ["compression", "encoding", "arithmetic-coding", "lossless"]
diff --git a/arithmetic-coding-core/src/bitstore.rs b/arithmetic-coding-core/src/bitstore.rs
@@ -14,6 +14,7 @@ pub trait BitStore:
     + AddAssign
     + PartialOrd
     + Copy
+    + std::fmt::Debug
 {
     /// the number of bits needed to represent this type
     const BITS: u32;
diff --git a/benches/common/mod.rs b/benches/common/mod.rs
@@ -22,9 +22,9 @@ where
     I: IntoIterator<Item = M::Symbol>,
 {
     let mut bitwriter = BitWriter::endian(Vec::new(), BigEndian);
-    let mut encoder = Encoder::<M>::new(model);
+    let mut encoder = Encoder::new(model, &mut bitwriter);
 
-    encoder.encode_all(input, &mut bitwriter).unwrap();
+    encoder.encode_all(input).unwrap();
     bitwriter.byte_align().unwrap();
 
     bitwriter.into_writer()
@@ -35,11 +35,6 @@ where
     M: Model,
 {
     let bitreader = BitReader::endian(buffer, BigEndian);
-    let mut decoder = Decoder::new(model, bitreader).unwrap();
-    let mut output = Vec::new();
-
-    while let Some(symbol) = decoder.decode_symbol().unwrap() {
-        output.push(symbol);
-    }
-    output
+    let mut decoder = Decoder::new(model, bitreader);
+    decoder.decode_all().map(Result::unwrap).collect()
 }
diff --git a/examples/common/mod.rs b/examples/common/mod.rs
@@ -26,9 +26,9 @@ where
     I: IntoIterator<Item = M::Symbol>,
 {
     let mut bitwriter = BitWriter::endian(Vec::new(), BigEndian);
-    let mut encoder = Encoder::<M>::new(model);
+    let mut encoder = Encoder::new(model, &mut bitwriter);
 
-    encoder.encode_all(input, &mut bitwriter).unwrap();
+    encoder.encode_all(input).unwrap();
     bitwriter.byte_align().unwrap();
 
     bitwriter.into_writer()
@@ -39,13 +39,8 @@ where
     M: Model,
 {
     let bitreader = BitReader::endian(buffer, BigEndian);
-    let mut decoder = Decoder::new(model, bitreader).unwrap();
-    let mut output = Vec::new();
-
-    while let Some(symbol) = decoder.decode_symbol().unwrap() {
-        output.push(symbol);
-    }
-    output
+    let mut decoder = Decoder::new(model, bitreader);
+    decoder.decode_all().map(Result::unwrap).collect()
 }
 
 #[allow(unused)]
diff --git a/examples/concatenated.rs b/examples/concatenated.rs
@@ -121,29 +121,29 @@ where
 {
     let mut bitwriter = BitWriter::endian(Vec::default(), BigEndian);
 
-    let mut encoder1 = Encoder::with_precision(model1, PRECISION);
-    encode(&mut encoder1, input1, &mut bitwriter);
+    let mut encoder1 = Encoder::with_precision(model1, &mut bitwriter, PRECISION);
+    encode(&mut encoder1, input1);
 
     let mut encoder2 = encoder1.chain(model2);
-    encode(&mut encoder2, input2, &mut bitwriter);
+    encode(&mut encoder2, input2);
 
-    encoder2.flush(&mut bitwriter).unwrap();
+    encoder2.flush().unwrap();
 
     bitwriter.byte_align().unwrap();
     bitwriter.into_writer()
 }
 
 /// Encode all symbols, followed by EOF. Doesn't flush the encoder (allowing
 /// more bits to be concatenated)
-fn encode<M, W>(encoder: &mut Encoder<M>, input: &[M::Symbol], bitwriter: &mut W)
+fn encode<M, W>(encoder: &mut Encoder<M, W>, input: &[M::Symbol])
 where
     M: Model,
     W: BitWrite,
 {
     for symbol in input {
-        encoder.encode(Some(symbol), bitwriter).unwrap();
+        encoder.encode(Some(symbol)).unwrap();
     }
-    encoder.encode(None, bitwriter).unwrap();
+    encoder.encode(None).unwrap();
 }
 
 /// Decode two sets of symbols, in sequence
@@ -154,12 +154,10 @@ where
 {
     let bitreader = BitReader::endian(buffer, BigEndian);
 
-    let mut decoder1 = Decoder::with_precision(model1, bitreader, PRECISION).unwrap();
-
+    let mut decoder1 = Decoder::with_precision(model1, bitreader, PRECISION);
     let output1 = decode(&mut decoder1);
 
     let mut decoder2 = decoder1.chain(model2);
-
     let output2 = decode(&mut decoder2);
 
     (output1, output2)
@@ -171,11 +169,5 @@ where
     M: Model,
     R: BitRead,
 {
-    let mut output = Vec::default();
-
-    while let Some(symbol) = decoder.decode_symbol().unwrap() {
-        output.push(symbol);
-    }
-
-    output
+    decoder.decode_all().map(Result::unwrap).collect()
 }
diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs
@@ -5,7 +5,7 @@ use libfuzzer_sys::fuzz_target;
 mod round_trip;
 
 fuzz_target!(|data: &[u8]| {
-    let model = FenwickModel::with_symbols(256, 1 << 20);
+    let model = FenwickModel::builder(256, 1 << 20).build();
     let input: Vec<usize> = data.into_iter().copied().map(usize::from).collect();
 
     round_trip::round_trip(model, input);
diff --git a/fuzz/fuzz_targets/round_trip.rs b/fuzz/fuzz_targets/round_trip.rs
@@ -30,11 +30,7 @@ where
     M: Model,
 {
     let bitreader = BitReader::endian(buffer, BigEndian);
-    let mut decoder = Decoder::new(model, bitreader).expect("failed to initialise decoder");
-    let mut output = Vec::new();
+    let mut decoder = Decoder::new(model, bitreader).unwrap();
 
-    while let Some(symbol) = decoder.decode_symbol().expect("failed to encode symbol!") {
-        output.push(symbol);
-    }
-    output
+    decoder.decode_all().map(Result::unwrap).collect()
 }
diff --git a/src/decoder.rs b/src/decoder.rs
@@ -2,7 +2,7 @@ use std::io;
 
 use bitstream_io::BitRead;
 
-use crate::{BitStore, Error, Model};
+use crate::{BitStore, Model};
 
 // this algorithm is derived from this article - https://marknelson.us/posts/2014/10/19/data-compression-with-arithmetic-coding.html
 
@@ -22,6 +22,7 @@ where
     high: M::B,
     input: R,
     x: M::B,
+    uninitialised: bool,
 }
 
 trait BitReadExt {
@@ -49,10 +50,6 @@ where
     /// needed to represent the [`Model::denominator`]. 'precision' bits is
     /// equal to [`u32::BITS`] - [`Model::denominator`] bits.
     ///
-    /// # Errors
-    ///
-    /// This method can fail if the underlying [`BitRead`] cannot be read from.
-    ///
     /// # Panics
     ///
     /// The calculation of the number of bits used for 'precision' is subject to
@@ -64,7 +61,7 @@ where
     ///
     /// If these constraints cannot be satisfied this method will panic in debug
     /// builds
-    pub fn new(model: M, input: R) -> io::Result<Self> {
+    pub fn new(model: M, input: R) -> Self {
         let frequency_bits = model.max_denominator().log2() + 1;
         let precision = M::B::BITS - frequency_bits;
 
@@ -73,10 +70,6 @@ where
 
     /// Construct a new [`Decoder`] with a custom precision
     ///
-    /// # Errors
-    ///
-    /// This method can fail if the underlying [`BitRead`] cannot be read from.
-    ///
     /// # Panics
     ///
     /// The calculation of the number of bits used for 'precision' is subject to
@@ -88,7 +81,7 @@ where
     ///
     /// If these constraints cannot be satisfied this method will panic in debug
     /// builds
-    pub fn with_precision(model: M, input: R, precision: u32) -> io::Result<Self> {
+    pub fn with_precision(model: M, input: R, precision: u32) -> Self {
         let frequency_bits = model.max_denominator().log2() + 1;
         debug_assert!(
             (precision >= (frequency_bits + 2)),
@@ -103,17 +96,15 @@ where
         let high = M::B::ONE << precision;
         let x = M::B::ZERO;
 
-        let mut encoder = Self {
+        Self {
             model,
             precision,
             low,
             high,
             input,
             x,
-        };
-
-        encoder.fill()?;
-        Ok(encoder)
+            uninitialised: true,
+        }
     }
 
     fn fill(&mut self) -> io::Result<()> {
@@ -141,14 +132,26 @@ where
         self.half() + self.quarter()
     }
 
+    /// Return an iterator over the decoded symbols.
+    ///
+    /// The iterator will continue returning symbols until EOF is reached
+    pub fn decode_all(&mut self) -> DecodeIter<M, R> {
+        DecodeIter { decoder: self }
+    }
+
     /// Read the next symbol from the stream of bits
     ///
     /// This method will return `Ok(None)` when EOF is reached.
     ///
     /// # Errors
     ///
     /// This method can fail if the underlying [`BitRead`] cannot be read from.
-    pub fn decode_symbol(&mut self) -> Result<Option<M::Symbol>, Error<M::ValueError>> {
+    pub fn decode(&mut self) -> io::Result<Option<M::Symbol>> {
+        if self.uninitialised {
+            self.fill()?;
+            self.uninitialised = false;
+        }
+
         let range = self.high - self.low + M::B::ONE;
         let denominator = self.model.denominator();
         debug_assert!(
@@ -161,7 +164,7 @@ where
         let p = self
             .model
             .probability(symbol.as_ref())
-            .map_err(Error::ValueError)?;
+            .expect("this should not be able to fail. Check the implementation of the model.");
 
         self.high = self.low + (range * p.end) / denominator - M::B::ONE;
         self.low += (range * p.start) / denominator;
@@ -224,6 +227,29 @@ where
             high: self.high,
             input: self.input,
             x: self.x,
+            uninitialised: self.uninitialised,
         }
     }
 }
+
+/// The iterator returned by the [`Model::decode_all`] method
+#[derive(Debug)]
+pub struct DecodeIter<'a, M, R>
+where
+    M: Model,
+    R: BitRead,
+{
+    decoder: &'a mut Decoder<M, R>,
+}
+
+impl<'a, M, R> Iterator for DecodeIter<'a, M, R>
+where
+    M: Model,
+    R: BitRead,
+{
+    type Item = io::Result<M::Symbol>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.decoder.decode().transpose()
+    }
+}
diff --git a/src/encoder.rs b/src/encoder.rs
diff --git a/tests/common/mod.rs b/tests/common/mod.rs
diff --git a/tests/concatenated.rs b/tests/concatenated.rs

Original file line number	Diff line number	Diff line change
`@@ -14,6 +14,7 @@ pub trait BitStore:`
`14`	`14`	`+ AddAssign`
`15`	`15`	`+ PartialOrd`
`16`	`16`	`+ Copy`
	`17`	`+ + std::fmt::Debug`
`17`	`18`	`{`
`18`	`19`	`/// the number of bits needed to represent this type`
`19`	`20`	`const BITS: u32;`
Original file line number	Diff line number	Diff line change
`@@ -30,11 +30,7 @@ where`
`30`	`30`	`M: Model,`
`31`	`31`	`{`
`32`	`32`	`let bitreader = BitReader::endian(buffer, BigEndian);`
`33`		`- let mut decoder = Decoder::new(model, bitreader).expect("failed to initialise decoder");`
`34`		`- let mut output = Vec::new();`
	`33`	`+ let mut decoder = Decoder::new(model, bitreader).unwrap();`
`35`	`34`
`36`		`- while let Some(symbol) = decoder.decode_symbol().expect("failed to encode symbol!") {`
`37`		`- output.push(symbol);`
`38`		`- }`
`39`		`- output`
	`35`	`+ decoder.decode_all().map(Result::unwrap).collect()`
`40`	`36`	`}`