refactor: de-duplicate 'state' code (#57)

danieleades · web-flow · commit ba16ad6fc61f · 2024-08-23T09:32:55.000+01:00
diff --git a/fenwick-model/src/lib.rs b/fenwick-model/src/lib.rs
@@ -62,12 +62,17 @@ impl Weights {
             return None;
         }
 
-        // invariant: low <= our answer < high
         // we seek the lowest number i such that prefix_sum(i) > prefix_sum
         let mut low = 0;
         let mut high = self.len();
-        debug_assert!(low < high);
-        debug_assert!(prefix_sum < self.prefix_sum(Some(high - 1)));
+        // Ensure the search range is valid (low < high)
+        debug_assert!(low < high, "Invalid search range");
+
+        // Verify that prefix_sum is within the valid range of cumulative weights
+        debug_assert!(
+            prefix_sum < self.prefix_sum(Some(high - 1)),
+            "'prefix_sum' is out of bounds"
+        );
         while low + 1 < high {
             let i = (low + high - 1) / 2;
             if self.prefix_sum(Some(i)) > prefix_sum {
@@ -110,7 +115,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic]
+    #[should_panic(expected = "index out of bounds: the len is 4 but the index is 4")]
     fn range_out_of_bounds() {
         let weights = Weights::new(3);
         weights.range(Some(3));
@@ -126,7 +131,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic]
+    #[should_panic(expected = "'prefix_sum' is out of bounds")]
     fn symbol_out_of_bounds() {
         let weights = Weights::new(3);
         weights.symbol(4);
diff --git a/fenwick-model/src/simple.rs b/fenwick-model/src/simple.rs
@@ -55,14 +55,10 @@ impl Model for FenwickModel {
         &self,
         symbol: Option<&Self::Symbol>,
     ) -> Result<std::ops::Range<Self::B>, Self::ValueError> {
-        if let Some(s) = symbol.copied() {
-            if s >= self.weights.len() {
-                Err(ValueError(s))
-            } else {
-                Ok(self.weights.range(Some(s)))
-            }
-        } else {
-            Ok(self.weights.range(None))
+        match symbol {
+            None => Ok(self.weights.range(None)),
+            Some(&s) if s < self.weights.len() => Ok(self.weights.range(Some(s))),
+            Some(&s) => Err(ValueError(s)),
         }
     }
 
diff --git a/src/common.rs b/src/common.rs
@@ -0,0 +1,45 @@
+use std::ops::Range;
+
+use arithmetic_coding_core::BitStore;
+
+#[derive(Debug)]
+pub struct State<B: BitStore> {
+    pub precision: u32,
+    pub low: B,
+    pub high: B,
+}
+
+impl<B> State<B>
+where
+    B: BitStore,
+{
+    pub fn new(precision: u32) -> Self {
+        let low = B::ZERO;
+        let high = B::ONE << precision;
+
+        Self {
+            precision,
+            low,
+            high,
+        }
+    }
+
+    pub fn half(&self) -> B {
+        B::ONE << (self.precision - 1)
+    }
+
+    pub fn quarter(&self) -> B {
+        B::ONE << (self.precision - 2)
+    }
+
+    pub fn three_quarter(&self) -> B {
+        self.half() + self.quarter()
+    }
+
+    pub fn scale(&mut self, p: Range<B>, denominator: B) {
+        let range = self.high - self.low + B::ONE;
+
+        self.high = self.low + (range * p.end) / denominator - B::ONE;
+        self.low += (range * p.start) / denominator;
+    }
+}
diff --git a/src/decoder.rs b/src/decoder.rs
@@ -4,7 +4,7 @@ use std::{io, ops::Range};
 
 use bitstream_io::BitRead;
 
-use crate::{BitStore, Model};
+use crate::{common, BitStore, Model};
 
 // this algorithm is derived from this article - https://marknelson.us/posts/2014/10/19/data-compression-with-arithmetic-coding.html
 
@@ -185,9 +185,7 @@ where
     B: BitStore,
     R: BitRead,
 {
-    precision: u32,
-    low: B,
-    high: B,
+    state: common::State<B>,
     input: R,
     x: B,
     uninitialised: bool,
@@ -200,54 +198,41 @@ where
 {
     /// todo
     pub fn new(precision: u32, input: R) -> Self {
-        let low = B::ZERO;
-        let high = B::ONE << precision;
+        let state = common::State::new(precision);
         let x = B::ZERO;
 
         Self {
-            precision,
-            low,
-            high,
+            state,
             input,
             x,
             uninitialised: true,
         }
     }
 
-    fn half(&self) -> B {
-        B::ONE << (self.precision - 1)
-    }
-
-    fn quarter(&self) -> B {
-        B::ONE << (self.precision - 2)
-    }
-
-    fn three_quarter(&self) -> B {
-        self.half() + self.quarter()
-    }
-
     fn normalise(&mut self) -> io::Result<()> {
-        while self.high < self.half() || self.low >= self.half() {
-            if self.high < self.half() {
-                self.high <<= 1;
-                self.low <<= 1;
+        while self.state.high < self.state.half() || self.state.low >= self.state.half() {
+            if self.state.high < self.state.half() {
+                self.state.high <<= 1;
+                self.state.low <<= 1;
                 self.x <<= 1;
             } else {
                 // self.low >= self.half()
-                self.low = (self.low - self.half()) << 1;
-                self.high = (self.high - self.half()) << 1;
-                self.x = (self.x - self.half()) << 1;
+                self.state.low = (self.state.low - self.state.half()) << 1;
+                self.state.high = (self.state.high - self.state.half()) << 1;
+                self.x = (self.x - self.state.half()) << 1;
             }
 
             if self.input.next_bit()? == Some(true) {
                 self.x += B::ONE;
             }
         }
 
-        while self.low >= self.quarter() && self.high < (self.three_quarter()) {
-            self.low = (self.low - self.quarter()) << 1;
-            self.high = (self.high - self.quarter()) << 1;
-            self.x = (self.x - self.quarter()) << 1;
+        while self.state.low >= self.state.quarter()
+            && self.state.high < (self.state.three_quarter())
+        {
+            self.state.low = (self.state.low - self.state.quarter()) << 1;
+            self.state.high = (self.state.high - self.state.quarter()) << 1;
+            self.x = (self.x - self.state.quarter()) << 1;
 
             if self.input.next_bit()? == Some(true) {
                 self.x += B::ONE;
@@ -258,21 +243,17 @@ where
     }
 
     fn scale(&mut self, p: Range<B>, denominator: B) -> io::Result<()> {
-        let range = self.high - self.low + B::ONE;
-
-        self.high = self.low + (range * p.end) / denominator - B::ONE;
-        self.low += (range * p.start) / denominator;
-
+        self.state.scale(p, denominator);
         self.normalise()
     }
 
     fn value(&self, denominator: B) -> B {
-        let range = self.high - self.low + B::ONE;
-        ((self.x - self.low + B::ONE) * denominator - B::ONE) / range
+        let range = self.state.high - self.state.low + B::ONE;
+        ((self.x - self.state.low + B::ONE) * denominator - B::ONE) / range
     }
 
     fn fill(&mut self) -> io::Result<()> {
-        for _ in 0..self.precision {
+        for _ in 0..self.state.precision {
             self.x <<= 1;
             if self.input.next_bit()? == Some(true) {
                 self.x += B::ONE;
diff --git a/src/encoder.rs b/src/encoder.rs
@@ -4,7 +4,7 @@ use std::{io, ops::Range};
 
 use bitstream_io::BitWrite;
 
-use crate::{BitStore, Error, Model};
+use crate::{common, BitStore, Error, Model};
 
 // this algorithm is derived from this article - https://marknelson.us/posts/2014/10/19/data-compression-with-arithmetic-coding.html
 
@@ -176,9 +176,7 @@ where
     B: BitStore,
     W: BitWrite,
 {
-    precision: u32,
-    low: B,
-    high: B,
+    state: common::State<B>,
     pending: u32,
     output: &'a mut W,
 }
@@ -193,57 +191,40 @@ where
     /// Normally this would be done automatically using the [`Encoder::new`]
     /// method.
     pub fn new(precision: u32, output: &'a mut W) -> Self {
-        let low = B::ZERO;
-        let high = B::ONE << precision;
+        let state = common::State::new(precision);
         let pending = 0;
 
         Self {
-            precision,
-            low,
-            high,
+            state,
             pending,
             output,
         }
     }
 
-    fn three_quarter(&self) -> B {
-        self.half() + self.quarter()
-    }
-
-    fn half(&self) -> B {
-        B::ONE << (self.precision - 1)
-    }
-
-    fn quarter(&self) -> B {
-        B::ONE << (self.precision - 2)
-    }
-
     fn scale(&mut self, p: Range<B>, denominator: B) -> io::Result<()> {
-        let range = self.high - self.low + B::ONE;
-
-        self.high = self.low + (range * p.end) / denominator - B::ONE;
-        self.low += (range * p.start) / denominator;
-
+        self.state.scale(p, denominator);
         self.normalise()
     }
 
     fn normalise(&mut self) -> io::Result<()> {
-        while self.high < self.half() || self.low >= self.half() {
-            if self.high < self.half() {
+        while self.state.high < self.state.half() || self.state.low >= self.state.half() {
+            if self.state.high < self.state.half() {
                 self.emit(false)?;
-                self.high <<= 1;
-                self.low <<= 1;
+                self.state.high <<= 1;
+                self.state.low <<= 1;
             } else {
                 self.emit(true)?;
-                self.low = (self.low - self.half()) << 1;
-                self.high = (self.high - self.half()) << 1;
+                self.state.low = (self.state.low - self.state.half()) << 1;
+                self.state.high = (self.state.high - self.state.half()) << 1;
             }
         }
 
-        while self.low >= self.quarter() && self.high < (self.three_quarter()) {
+        while self.state.low >= self.state.quarter()
+            && self.state.high < (self.state.three_quarter())
+        {
             self.pending += 1;
-            self.low = (self.low - self.quarter()) << 1;
-            self.high = (self.high - self.quarter()) << 1;
+            self.state.low = (self.state.low - self.state.quarter()) << 1;
+            self.state.high = (self.state.high - self.state.quarter()) << 1;
         }
 
         Ok(())
@@ -258,14 +239,16 @@ where
         Ok(())
     }
 
-    /// Flush the internal buffer and write all remaining bits to the output
+    /// Flush the internal buffer and write all remaining bits to the output.
+    /// This method MUST be called when you finish writing symbols to ensure
+    /// they are fully written to the output.
     ///
     /// # Errors
     ///
     /// This method can fail if the output cannot be written to
     pub fn flush(&mut self) -> io::Result<()> {
         self.pending += 1;
-        if self.low <= self.quarter() {
+        if self.state.low <= self.state.quarter() {
             self.emit(false)?;
         } else {
             self.emit(true)?;
diff --git a/src/lib.rs b/src/lib.rs
@@ -4,6 +4,7 @@
 
 pub use arithmetic_coding_core::{fixed_length, max_length, one_shot, BitStore, Model};
 
+mod common;
 pub mod decoder;
 pub mod encoder;