From 03018140e5c53133814bd074054e7a5245cdd78e Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Sun, 16 Aug 2020 14:58:16 -0400 Subject: [PATCH] Use ZPAQ algorithm as specified by zpaq206.pdf --- src/lib.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4cce6e7..60b400a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -473,7 +473,8 @@ impl ChunkerImpl for SizeLimited { } } -const HM: Wrapping = Wrapping(123_456_791); +const HM1: Wrapping = Wrapping(314_159_265); +const HM2: Wrapping = Wrapping(271_828_182); pub struct ZPAQ { nbits: usize, @@ -488,15 +489,15 @@ impl ZPAQ { nbits: 32 - nbits, c1: 0, o1: [0; 256], - h: HM, + h: Wrapping(0), } } pub fn update(&mut self, byte: u8) -> bool { if byte == self.o1[self.c1 as usize] { - self.h = self.h * HM + Wrapping(byte as u32 + 1); + self.h = HM1 * (self.h + Wrapping(byte as u32 + 1)); } else { - self.h = self.h * HM * Wrapping(2) + Wrapping(byte as u32 + 1); + self.h = HM2 * (self.h + Wrapping(byte as u32 + 1)); } self.o1[self.c1 as usize] = byte; self.c1 = byte; @@ -521,7 +522,7 @@ impl ChunkerImpl for ZPAQ { fn reset(&mut self) { self.c1 = 0u8; self.o1.clone_from_slice(&[0u8; 256]); - self.h = HM; + self.h = Wrapping(0); } } @@ -542,7 +543,7 @@ mod tests { let rollinghash = ZPAQ::new(3); // 8-bit chunk average let chunker = Chunker::new(rollinghash); let data = b"defghijklmnopqrstuvwxyz1234567890"; - let expected = b"def|ghijk|lmno|pq|rstuvw|xyz123|4567890|"; + let expected = b"de|fghijklmnopqr|stuvwxyz12345|67890|"; (chunker, data, io::Cursor::new(data), expected) } @@ -632,8 +633,7 @@ mod tests { assert_eq!( result, vec![ - (0, 3), (3, 5), (8, 4), (12, 2), - (14, 6), (20, 6), (26, 7), + (0, 2), (2, 13), (15, 13), (28, 5), ] ); } @@ -654,8 +654,8 @@ mod tests { assert_eq!( result, vec![ - (0, 3), (3, 5), (8, 4), (12, 2), - (14, 5), (19, 5), (24, 3), (27, 5), (32, 1), + (0, 2), (2, 5), (7, 5), (12, 2), + (14, 5), (19, 5), (24, 5), (29, 4), ] ); }