diff --git a/Cargo.toml b/Cargo.toml index 196f7ad..ffb783e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,10 +8,8 @@ license = "Apache-2.0" repository = "https://github.com/mrcroxx/cmsketch-rs" homepage = "https://github.com/mrcroxx/cmsketch-rs" rust-version = "1.81.0" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -paste = "1.0" [dev-dependencies] itertools = "0.14" diff --git a/src/atomic.rs b/src/atomic.rs index d6a70d5..8e108be 100644 --- a/src/atomic.rs +++ b/src/atomic.rs @@ -14,124 +14,6 @@ use std::sync::atomic::{AtomicU16, AtomicU32, AtomicU64, AtomicU8, AtomicUsize, Ordering}; -use paste::paste; - -macro_rules! cmsketch { - ($( {$type:ty, $atomic:ty, $suffix:ident}, )*) => { - paste! { - $( - #[derive(Debug)] - pub struct [] { - width: usize, - depth: usize, - - table: Box<[$atomic]>, - } - - impl [] { - /// 2 / w = eps; w = 2 / eps - /// 1 / 2^depth <= 1 - confidence; depth >= -log2(1 - confidence) - /// - /// estimate confidence => depth: - /// - /// 0.5 => 1 - /// 0.6 => 2 - /// 0.7 => 2 - /// 0.8 => 3 - /// 0.9 => 4 - /// 0.95 => 5 - /// 0.995 => 8 - pub fn new(eps: f64, confidence: f64) ->Self { - - let width = (2.0 / eps).ceil() as usize; - let depth = (- (1.0 - confidence).log2()).ceil() as usize; - debug_assert!(width > 0, "width: {width}"); - debug_assert!(depth > 0, "depth: {depth}"); - - let table = std::iter::repeat_with(|| $atomic::new(0)).take(width * depth).collect(); - - Self { - width, - depth, - table, - } - } - - pub fn inc(&self, hash: u64) { - self.inc_by(hash, 1); - } - - pub fn inc_by(&self, hash: u64, count: $type) { - for depth in 0..self.depth { - let index = self.index(depth, hash); - let _ = self.table[index].fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| { - if x <= $type::MAX - count { Some(x + count) } else { None } - }); - } - } - - pub fn dec(&self, hash: u64) { - self.dec_by(hash, 1); - } - - pub fn dec_by(&self, hash: u64, count: $type) { - for depth in 0..self.depth { - let index = self.index(depth, hash); - let _ = self.table[index].fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| { - if x >= count { Some(x - count) } else { None } - }); - } - } - - pub fn estimate(&self, hash: u64) -> $type { - unsafe { - (0..self.depth).map(|depth| self.table[self.index(depth, hash)].load(Ordering::Relaxed)).min().unwrap_unchecked() - } - } - - pub fn clear(&self) { - self.table.iter().for_each(|v| v.store(0, Ordering::Relaxed)); - } - - pub fn halve(&self) { - self.table.iter().for_each(|v| { - let _ = v.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some(x >> 1)); - }); - } - - pub fn decay(&self, decay: f64) { - self.table.iter().for_each(|v| { - let _ = v.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some((x as f64 * decay) as $type)); - }); - } - - pub fn width(&self) -> usize { - self.width - } - - pub fn depth(&self) -> usize { - self.depth - } - - pub fn capacity(&self) -> $type { - $type::MAX - } - - #[inline(always)] - fn index(&self, depth: usize, hash: u64) -> usize { - depth * self.width - + (combine_hashes(twang_mix64(depth as u64), hash) as usize % self.width) - } - - pub fn memory(&self) -> usize { - ($type::BITS as usize * self.depth * self.width + usize::BITS as usize * 3) / 8 - } - } - )* - } - }; -} - /// Reduce two 64-bit hashes into one. /// /// Ported from CacheLib, which uses the `Hash128to64` function from Google's city hash. @@ -159,221 +41,333 @@ fn twang_mix64(val: u64) -> u64 { val } -macro_rules! for_all_uint_types { - ($macro:ident) => { - $macro! { - {u8, AtomicU8, AtomicU8}, - {u16, AtomicU16, AtomicU16}, - {u32, AtomicU32, AtomicU32}, - {u64, AtomicU64, AtomicU64}, - {usize, AtomicUsize, AtomicUsize}, - } - }; -} +macro_rules! cmsketch { + ($( {$type:ty, $atomic:ty, $sketch:ident}, )*) => { + $( + #[derive(Debug)] + pub struct $sketch { + width: usize, + depth: usize, + + table: Box<[$atomic]>, + } -for_all_uint_types! { cmsketch } + impl $sketch { + /// 2 / w = eps; w = 2 / eps + /// 1 / 2^depth <= 1 - confidence; depth >= -log2(1 - confidence) + /// + /// estimate confidence => depth: + /// + /// 0.5 => 1 + /// 0.6 => 2 + /// 0.7 => 2 + /// 0.8 => 3 + /// 0.9 => 4 + /// 0.95 => 5 + /// 0.995 => 8 + pub fn new(eps: f64, confidence: f64) ->Self { + + let width = (2.0 / eps).ceil() as usize; + let depth = (- (1.0 - confidence).log2()).ceil() as usize; + debug_assert!(width > 0, "width: {width}"); + debug_assert!(depth > 0, "depth: {depth}"); + + let table = std::iter::repeat_with(|| <$atomic>::new(0)).take(width * depth).collect(); + + Self { + width, + depth, + table, + } + } -#[cfg(test)] -mod tests { - use itertools::Itertools; - use rand_mt::Mt64; + pub fn inc(&self, hash: u64) { + self.inc_by(hash, 1); + } - use super::*; + pub fn inc_by(&self, hash: u64, count: $type) { + for depth in 0..self.depth { + let index = self.index(depth, hash); + let _ = self.table[index].fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| { + if x <= <$type>::MAX - count { Some(x + count) } else { None } + }); + } + } - macro_rules! test_cmsketch { - ($( {$type:ty, $atomic:ty, $suffix:ident}, )*) => { - paste! { - $( - #[test] - fn []() { - let cms = []::new(0.01, 0.5); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 1); + pub fn dec(&self, hash: u64) { + self.dec_by(hash, 1); + } - let cms = []::new(0.01, 0.6); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 2); + pub fn dec_by(&self, hash: u64, count: $type) { + for depth in 0..self.depth { + let index = self.index(depth, hash); + let _ = self.table[index].fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| { + if x >= count { Some(x - count) } else { None } + }); + } + } - let cms = []::new(0.01, 0.7); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 2); + pub fn estimate(&self, hash: u64) -> $type { + unsafe { + (0..self.depth).map(|depth| self.table[self.index(depth, hash)].load(Ordering::Relaxed)).min().unwrap_unchecked() + } + } - let cms = []::new(0.01, 0.8); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 3); + pub fn clear(&self) { + self.table.iter().for_each(|v| v.store(0, Ordering::Relaxed)); + } - let cms = []::new(0.01, 0.9); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 4); + pub fn halve(&self) { + self.table.iter().for_each(|v| { + let _ = v.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some(x >> 1)); + }); + } - let cms = []::new(0.01, 0.95); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 5); + pub fn decay(&self, decay: f64) { + self.table.iter().for_each(|v| { + let _ = v.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some((x as f64 * decay) as $type)); + }); + } - let cms = []::new(0.01, 0.995); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 8); - } + pub fn width(&self) -> usize { + self.width + } - #[test] - #[should_panic] - fn []() { - []::new(0.0, 0.0); - } + pub fn depth(&self) -> usize { + self.depth + } + + pub fn capacity(&self) -> $type { + <$type>::MAX + } - #[test] - fn []() { - let cms = []::new(0.01, 0.9); + #[inline(always)] + fn index(&self, depth: usize, hash: u64) -> usize { + depth * self.width + + (combine_hashes(twang_mix64(depth as u64), hash) as usize % self.width) + } - let mut rng = Mt64::new_unseeded(); - let keys = (0..100).map(|_| rng.next_u64()).collect_vec(); + pub fn memory(&self) -> usize { + (<$type>::BITS as usize * self.depth * self.width + usize::BITS as usize * 3) / 8 + } + } + )* + }; +} - for i in 0..100 { - for _ in 0..i { - cms.inc(keys[i]); - } - } +cmsketch! { + {u8, AtomicU8, CMSketchAtomicU8}, + {u16, AtomicU16, CMSketchAtomicU16}, + {u32, AtomicU32, CMSketchAtomicU32}, + {u64, AtomicU64, CMSketchAtomicU64}, + {usize, AtomicUsize, CMSketchAtomicUsize}, +} + +macro_rules! test_cmsketch { + ($( {$module:ident, $type:ty, $atomic:ty, $sketch:ident}, )*) => { + $( + #[cfg(test)] + mod $module { + use itertools::Itertools; + use rand_mt::Mt64; + + use super::*; + + #[test] + fn test_new() { + let cms = $sketch::new(0.01, 0.5); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 1); + + let cms = $sketch::new(0.01, 0.6); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 2); + + let cms = $sketch::new(0.01, 0.7); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 2); + + let cms = $sketch::new(0.01, 0.8); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 3); + + let cms = $sketch::new(0.01, 0.9); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 4); + + let cms = $sketch::new(0.01, 0.95); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 5); + + let cms = $sketch::new(0.01, 0.995); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 8); + } + + #[test] + #[should_panic] + fn test_new_with_invalid_args() { + $sketch::new(0.0, 0.0); + } + + #[test] + fn test_inc() { + let cms = $sketch::new(0.01, 0.9); - for i in 0..100 { - assert!( - cms.estimate(keys[i]) >= std::cmp::min(i as $type, cms.capacity()), - "assert {} >= {} failed", - cms.estimate(keys[i]), std::cmp::min(i as $type, cms.capacity()) - ); + let mut rng = Mt64::new_unseeded(); + let keys = (0..100).map(|_| rng.next_u64()).collect_vec(); + + for i in 0..100 { + for _ in 0..i { + cms.inc(keys[i]); } } - #[test] - fn []() { - let cms = []::new(0.01, 0.9); + for i in 0..100 { + assert!( + cms.estimate(keys[i]) >= std::cmp::min(i as $type, cms.capacity()), + "assert {} >= {} failed", + cms.estimate(keys[i]), std::cmp::min(i as $type, cms.capacity()) + ); + } + } - let mut rng = Mt64::new_unseeded(); - let keys = (0..100).map(|_| rng.next_u64()).collect_vec(); + #[test] + fn test_dec() { + let cms = $sketch::new(0.01, 0.9); + let mut rng = Mt64::new_unseeded(); + let keys = (0..100).map(|_| rng.next_u64()).collect_vec(); - for i in 0..100 { - for _ in 0..i { - cms.inc(keys[i]); - } - } - for i in 0..100 { - for _ in 0..i { - cms.dec(keys[i]); - } + for i in 0..100 { + for _ in 0..i { + cms.inc(keys[i]); } + } - for i in 0..100 { - assert_eq!(cms.estimate(keys[i]), 0); + for i in 0..100 { + for _ in 0..i { + cms.dec(keys[i]); } } - #[test] - fn []() { - let cms = []::new(0.01, 0.9); + for i in 0..100 { + assert_eq!(cms.estimate(keys[i]), 0); + } + } - let mut rng = Mt64::new_unseeded(); - let keys = (0..100).map(|_| rng.next_u64()).collect_vec(); + #[test] + fn test_clear() { + let cms = $sketch::new(0.01, 0.9); - for i in 0..100 { - for _ in 0..i { - cms.inc(keys[i]); - } + let mut rng = Mt64::new_unseeded(); + let keys = (0..100).map(|_| rng.next_u64()).collect_vec(); + + for i in 0..100 { + for _ in 0..i { + cms.inc(keys[i]); } + } - cms.clear(); + cms.clear(); - for i in 0..100 { - assert_eq!(cms.estimate(keys[i]), 0); - } + for i in 0..100 { + assert_eq!(cms.estimate(keys[i]), 0); } + } - #[test] - fn []() { - let cms = []::new(0.01, 0.9); + #[test] + fn test_halve() { + let cms = $sketch::new(0.01, 0.9); - let mut rng = Mt64::new_unseeded(); - let keys = (0..1000).map(|_| rng.next_u64()).collect_vec(); + let mut rng = Mt64::new_unseeded(); + let keys = (0..1000).map(|_| rng.next_u64()).collect_vec(); - for i in 0..1000 { - for _ in 0..i { - cms.inc(keys[i]); - } + for i in 0..1000 { + for _ in 0..i { + cms.inc(keys[i]); } + } - for i in 0..1000 { - assert!( - cms.estimate(keys[i]) >= std::cmp::min(i as $type, cms.capacity()), - "assert {} >= {} failed", - cms.estimate(keys[i]), std::cmp::min(i as $type, cms.capacity()) - ); - } + for i in 0..1000 { + assert!( + cms.estimate(keys[i]) >= std::cmp::min(i as $type, cms.capacity()), + "assert {} >= {} failed", + cms.estimate(keys[i]), std::cmp::min(i as $type, cms.capacity()) + ); + } - cms.halve(); + cms.halve(); - for i in 0..1000 { - assert!( - cms.estimate(keys[i]) >= std::cmp::min(i as $type / 2, cms.capacity()), - "assert {} >= {} failed", - cms.estimate(keys[i]), std::cmp::min(i as $type / 2, cms.capacity()) - ); - } + for i in 0..1000 { + assert!( + cms.estimate(keys[i]) >= std::cmp::min(i as $type / 2, cms.capacity()), + "assert {} >= {} failed", + cms.estimate(keys[i]), std::cmp::min(i as $type / 2, cms.capacity()) + ); } + } - #[test] - fn []() { - let cms = []::new(0.01, 0.9); - let mut rng = Mt64::new_unseeded(); - let keys = (0..1000).map(|_| rng.next_u64()).collect_vec(); + #[test] + fn test_decay() { + let cms = $sketch::new(0.01, 0.9); + let mut rng = Mt64::new_unseeded(); + let keys = (0..1000).map(|_| rng.next_u64()).collect_vec(); - for i in 0..1000 { - for _ in 0..i { - cms.inc(keys[i]); - } + for i in 0..1000 { + for _ in 0..i { + cms.inc(keys[i]); } + } - for i in 0..1000 { - assert!( - cms.estimate(keys[i]) >= std::cmp::min(i as $type, cms.capacity()), - "assert {} >= {} failed", - cms.estimate(keys[i]), std::cmp::min(i as $type, cms.capacity()) - ); - } + for i in 0..1000 { + assert!( + cms.estimate(keys[i]) >= std::cmp::min(i as $type, cms.capacity()), + "assert {} >= {} failed", + cms.estimate(keys[i]), std::cmp::min(i as $type, cms.capacity()) + ); + } - const FACTOR: f64 = 0.5; - cms.decay(FACTOR); + const FACTOR: f64 = 0.5; + cms.decay(FACTOR); - for i in 0..1000 { - assert!(cms.estimate(keys[i]) >= (std::cmp::min(i as $type, cms.capacity()) as f64 * FACTOR).floor() as $type); - } + for i in 0..1000 { + assert!(cms.estimate(keys[i]) >= (std::cmp::min(i as $type, cms.capacity()) as f64 * FACTOR).floor() as $type); } + } - #[test] - fn []() { - let cms = []::new(0.01, 0.9); - let mut rng = Mt64::new_unseeded(); - let keys = (0..1000).map(|_| rng.next_u64()).collect_vec(); - let mut sum = 0; - - // Try inserting more keys than cms table width - for i in 0..1000 { - for _ in 0..i { - cms.inc(keys[i]); - } - sum += i; + #[test] + fn test_collisions() { + let cms = $sketch::new(0.01, 0.9); + let mut rng = Mt64::new_unseeded(); + let keys = (0..1000).map(|_| rng.next_u64()).collect_vec(); + let mut sum = 0; + + // Try inserting more keys than cms table width + for i in 0..1000 { + for _ in 0..i { + cms.inc(keys[i]); } + sum += i; + } - let error = sum as f64 * 0.01; - for i in 0..10 { - assert!(cms.estimate(keys[i]) >= i as $type); - assert!(i as f64 + error >= cms.estimate(keys[i]) as f64); - } + let error = sum as f64 * 0.01; + for i in 0..10 { + assert!(cms.estimate(keys[i]) >= i as $type); + assert!(i as f64 + error >= cms.estimate(keys[i]) as f64); } - )* + } } - } + )* } +} - for_all_uint_types! { test_cmsketch } +test_cmsketch! { + {tests_cmsketch_atomic_u8, u8, AtomicU8, CMSketchAtomicU8}, + {tests_cmsketch_atomic_u16, u16, AtomicU16, CMSketchAtomicU16}, + {tests_cmsketch_atomic_u32, u32, AtomicU32, CMSketchAtomicU32}, + {tests_cmsketch_atomic_u64, u64, AtomicU64, CMSketchAtomicU64}, + {tests_cmsketch_atomic_usize, usize, AtomicUsize, CMSketchAtomicUsize}, } diff --git a/src/base.rs b/src/base.rs index ec39634..b64b584 100644 --- a/src/base.rs +++ b/src/base.rs @@ -12,122 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use paste::paste; - -macro_rules! cmsketch { - ($( {$type:ty, $suffix:ident}, )*) => { - paste! { - $( - #[derive(Debug)] - pub struct [] { - width: usize, - depth: usize, - - table: Box<[$type]>, - } - - impl [] { - /// 2 / w = eps; w = 2 / eps - /// 1 / 2^depth <= 1 - confidence; depth >= -log2(1 - confidence) - /// - /// estimate confidence => depth: - /// - /// 0.5 => 1 - /// 0.6 => 2 - /// 0.7 => 2 - /// 0.8 => 3 - /// 0.9 => 4 - /// 0.95 => 5 - /// 0.995 => 8 - pub fn new(eps: f64, confidence: f64) ->Self { - - let width = (2.0 / eps).ceil() as usize; - let depth = (- (1.0 - confidence).log2()).ceil() as usize; - let table = { - // Use `resize` instead of `vec![]` to avoid page faults caused by delayed allocation. - let mut data = Vec::with_capacity(width * depth); - data.resize(width * depth, 0); - data.into_boxed_slice() - }; - - debug_assert!(width > 0, "width: {width}"); - debug_assert!(depth > 0, "depth: {depth}"); - debug_assert_eq!(table.len(), width * depth); - - Self { - width, - depth, - table, - } - } - - pub fn inc(&mut self, hash: u64) { - self.inc_by(hash, 1); - } - - pub fn inc_by(&mut self, hash: u64, count: $type) { - for depth in 0..self.depth { - let index = self.index(depth, hash); - self.table[index] = self.table[index].saturating_add(count); - } - } - - pub fn dec(&mut self, hash: u64) { - self.dec_by(hash, 1); - } - - pub fn dec_by(&mut self, hash: u64, count: $type) { - for depth in 0..self.depth { - let index = self.index(depth, hash); - self.table[index] = self.table[index].saturating_sub(count); - } - } - - pub fn estimate(&self, hash: u64) -> $type { - unsafe { - (0..self.depth).map(|depth| self.table[self.index(depth, hash)]).min().unwrap_unchecked() - } - } - - pub fn clear(&mut self) { - self.table.iter_mut().for_each(|c| *c = 0); - } - - pub fn halve(&mut self) { - self.table.iter_mut().for_each(|c| *c >>= 1); - } - - pub fn decay(&mut self, decay: f64) { - self.table.iter_mut().for_each(|c| *c = (*c as f64 * decay) as $type); - } - - pub fn width(&self) -> usize { - self.width - } - - pub fn depth(&self) -> usize { - self.depth - } - - pub fn capacity(&self) -> $type { - $type::MAX - } - - #[inline(always)] - fn index(&self, depth: usize, hash: u64) -> usize { - depth * self.width - + (combine_hashes(twang_mix64(depth as u64), hash) as usize % self.width) - } - - pub fn memory(&self) -> usize { - ($type::BITS as usize * self.depth * self.width + usize::BITS as usize * 3) / 8 - } - } - )* - } - }; -} - /// Reduce two 64-bit hashes into one. /// /// Ported from CacheLib, which uses the `Hash128to64` function from Google's city hash. @@ -155,221 +39,331 @@ fn twang_mix64(val: u64) -> u64 { val } -macro_rules! for_all_uint_types { - ($macro:ident) => { - $macro! { - {u8, U8}, - {u16, U16}, - {u32, U32}, - {u64, U64}, - {usize, Usize}, - } - }; -} +macro_rules! cmsketch { + ($( {$type:ty, $sketch:ident}, )*) => { + $( + #[derive(Debug)] + pub struct $sketch { + width: usize, + depth: usize, + + table: Box<[$type]>, + } + + impl $sketch { + /// 2 / w = eps; w = 2 / eps + /// 1 / 2^depth <= 1 - confidence; depth >= -log2(1 - confidence) + /// + /// estimate confidence => depth: + /// + /// 0.5 => 1 + /// 0.6 => 2 + /// 0.7 => 2 + /// 0.8 => 3 + /// 0.9 => 4 + /// 0.95 => 5 + /// 0.995 => 8 + pub fn new(eps: f64, confidence: f64) ->Self { + + let width = (2.0 / eps).ceil() as usize; + let depth = (- (1.0 - confidence).log2()).ceil() as usize; + let table = { + // Use `resize` instead of `vec![]` to avoid page faults caused by delayed allocation. + let mut data = Vec::with_capacity(width * depth); + data.resize(width * depth, 0); + data.into_boxed_slice() + }; + + debug_assert!(width > 0, "width: {width}"); + debug_assert!(depth > 0, "depth: {depth}"); + debug_assert_eq!(table.len(), width * depth); + + Self { + width, + depth, + table, + } + } + + pub fn inc(&mut self, hash: u64) { + self.inc_by(hash, 1); + } + + pub fn inc_by(&mut self, hash: u64, count: $type) { + for depth in 0..self.depth { + let index = self.index(depth, hash); + self.table[index] = self.table[index].saturating_add(count); + } + } -for_all_uint_types! { cmsketch } + pub fn dec(&mut self, hash: u64) { + self.dec_by(hash, 1); + } -#[cfg(test)] -mod tests { - use itertools::Itertools; - use rand_mt::Mt64; + pub fn dec_by(&mut self, hash: u64, count: $type) { + for depth in 0..self.depth { + let index = self.index(depth, hash); + self.table[index] = self.table[index].saturating_sub(count); + } + } - use super::*; + pub fn estimate(&self, hash: u64) -> $type { + unsafe { + (0..self.depth).map(|depth| self.table[self.index(depth, hash)]).min().unwrap_unchecked() + } + } - macro_rules! test_cmsketch { - ($( {$type:ty, $suffix:ident}, )*) => { - paste! { - $( - #[test] - fn []() { - let cms = []::new(0.01, 0.5); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 1); + pub fn clear(&mut self) { + self.table.iter_mut().for_each(|c| *c = 0); + } - let cms = []::new(0.01, 0.6); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 2); + pub fn halve(&mut self) { + self.table.iter_mut().for_each(|c| *c >>= 1); + } - let cms = []::new(0.01, 0.7); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 2); + pub fn decay(&mut self, decay: f64) { + self.table.iter_mut().for_each(|c| *c = (*c as f64 * decay) as $type); + } - let cms = []::new(0.01, 0.8); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 3); + pub fn width(&self) -> usize { + self.width + } - let cms = []::new(0.01, 0.9); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 4); + pub fn depth(&self) -> usize { + self.depth + } - let cms = []::new(0.01, 0.95); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 5); + pub fn capacity(&self) -> $type { + <$type>::MAX + } - let cms = []::new(0.01, 0.995); - assert_eq!(cms.width(), 200); - assert_eq!(cms.depth(), 8); - } + #[inline(always)] + fn index(&self, depth: usize, hash: u64) -> usize { + depth * self.width + + (combine_hashes(twang_mix64(depth as u64), hash) as usize % self.width) + } - #[test] - #[should_panic] - fn []() { - []::new(0.0, 0.0); - } + pub fn memory(&self) -> usize { + (<$type>::BITS as usize * self.depth * self.width + usize::BITS as usize * 3) / 8 + } + } + )* + }; +} - #[test] - fn []() { - let mut cms = []::new(0.01, 0.9); +cmsketch! { + {u8, CMSketchU8}, + {u16, CMSketchU16}, + {u32, CMSketchU32}, + {u64, CMSketchU64}, + {usize, CMSketchUsize}, +} - let mut rng = Mt64::new_unseeded(); - let keys = (0..100).map(|_| rng.next_u64()).collect_vec(); +macro_rules! test_cmsketch { + ($( {$module:ident, $type:ty, $sketch:ident}, )*) => { + $( + #[cfg(test)] + mod $module { + use itertools::Itertools; + use rand_mt::Mt64; + + use super::*; + + #[test] + fn test_new() { + let cms = $sketch::new(0.01, 0.5); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 1); + + let cms = $sketch::new(0.01, 0.6); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 2); + + let cms = $sketch::new(0.01, 0.7); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 2); + + let cms = $sketch::new(0.01, 0.8); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 3); + + let cms = $sketch::new(0.01, 0.9); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 4); + + let cms = $sketch::new(0.01, 0.95); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 5); + + let cms = $sketch::new(0.01, 0.995); + assert_eq!(cms.width(), 200); + assert_eq!(cms.depth(), 8); + } - for i in 0..100 { - for _ in 0..i { - cms.inc(keys[i]); - } - } + #[test] + #[should_panic] + fn test_new_with_invalid_args() { + $sketch::new(0.0, 0.0); + } + + #[test] + fn test_inc() { + let mut cms = $sketch::new(0.01, 0.9); + + let mut rng = Mt64::new_unseeded(); + let keys = (0..100).map(|_| rng.next_u64()).collect_vec(); - for i in 0..100 { - assert!( - cms.estimate(keys[i]) >= std::cmp::min(i as $type, cms.capacity()), - "assert {} >= {} failed", - cms.estimate(keys[i]), std::cmp::min(i as $type, cms.capacity()) - ); + for i in 0..100 { + for _ in 0..i { + cms.inc(keys[i]); } } - #[test] - fn []() { - let mut cms = []::new(0.01, 0.9); + for i in 0..100 { + assert!( + cms.estimate(keys[i]) >= std::cmp::min(i as $type, cms.capacity()), + "assert {} >= {} failed", + cms.estimate(keys[i]), std::cmp::min(i as $type, cms.capacity()) + ); + } + } - let mut rng = Mt64::new_unseeded(); - let keys = (0..100).map(|_| rng.next_u64()).collect_vec(); + #[test] + fn test_dec() { + let mut cms = $sketch::new(0.01, 0.9); + let mut rng = Mt64::new_unseeded(); + let keys = (0..100).map(|_| rng.next_u64()).collect_vec(); - for i in 0..100 { - for _ in 0..i { - cms.inc(keys[i]); - } - } - for i in 0..100 { - for _ in 0..i { - cms.dec(keys[i]); - } + for i in 0..100 { + for _ in 0..i { + cms.inc(keys[i]); } + } - for i in 0..100 { - assert_eq!(cms.estimate(keys[i]), 0); + for i in 0..100 { + for _ in 0..i { + cms.dec(keys[i]); } } - #[test] - fn []() { - let mut cms = []::new(0.01, 0.9); + for i in 0..100 { + assert_eq!(cms.estimate(keys[i]), 0); + } + } + + #[test] + fn test_clear() { + let mut cms = $sketch::new(0.01, 0.9); - let mut rng = Mt64::new_unseeded(); - let keys = (0..100).map(|_| rng.next_u64()).collect_vec(); + let mut rng = Mt64::new_unseeded(); + let keys = (0..100).map(|_| rng.next_u64()).collect_vec(); - for i in 0..100 { - for _ in 0..i { - cms.inc(keys[i]); - } + for i in 0..100 { + for _ in 0..i { + cms.inc(keys[i]); } + } - cms.clear(); + cms.clear(); - for i in 0..100 { - assert_eq!(cms.estimate(keys[i]), 0); - } + for i in 0..100 { + assert_eq!(cms.estimate(keys[i]), 0); } + } - #[test] - fn []() { - let mut cms = []::new(0.01, 0.9); + #[test] + fn test_halve() { + let mut cms = $sketch::new(0.01, 0.9); - let mut rng = Mt64::new_unseeded(); - let keys = (0..1000).map(|_| rng.next_u64()).collect_vec(); + let mut rng = Mt64::new_unseeded(); + let keys = (0..1000).map(|_| rng.next_u64()).collect_vec(); - for i in 0..1000 { - for _ in 0..i { - cms.inc(keys[i]); - } + for i in 0..1000 { + for _ in 0..i { + cms.inc(keys[i]); } + } - for i in 0..1000 { - assert!( - cms.estimate(keys[i]) >= std::cmp::min(i as $type, cms.capacity()), - "assert {} >= {} failed", - cms.estimate(keys[i]), std::cmp::min(i as $type, cms.capacity()) - ); - } + for i in 0..1000 { + assert!( + cms.estimate(keys[i]) >= std::cmp::min(i as $type, cms.capacity()), + "assert {} >= {} failed", + cms.estimate(keys[i]), std::cmp::min(i as $type, cms.capacity()) + ); + } - cms.halve(); + cms.halve(); - for i in 0..1000 { - assert!( - cms.estimate(keys[i]) >= std::cmp::min(i as $type / 2, cms.capacity()), - "assert {} >= {} failed", - cms.estimate(keys[i]), std::cmp::min(i as $type / 2, cms.capacity()) - ); - } + for i in 0..1000 { + assert!( + cms.estimate(keys[i]) >= std::cmp::min(i as $type / 2, cms.capacity()), + "assert {} >= {} failed", + cms.estimate(keys[i]), std::cmp::min(i as $type / 2, cms.capacity()) + ); } + } - #[test] - fn []() { - let mut cms = []::new(0.01, 0.9); - let mut rng = Mt64::new_unseeded(); - let keys = (0..1000).map(|_| rng.next_u64()).collect_vec(); + #[test] + fn test_decay() { + let mut cms = $sketch::new(0.01, 0.9); + let mut rng = Mt64::new_unseeded(); + let keys = (0..1000).map(|_| rng.next_u64()).collect_vec(); - for i in 0..1000 { - for _ in 0..i { - cms.inc(keys[i]); - } + for i in 0..1000 { + for _ in 0..i { + cms.inc(keys[i]); } + } - for i in 0..1000 { - assert!( - cms.estimate(keys[i]) >= std::cmp::min(i as $type, cms.capacity()), - "assert {} >= {} failed", - cms.estimate(keys[i]), std::cmp::min(i as $type, cms.capacity()) - ); - } + for i in 0..1000 { + assert!( + cms.estimate(keys[i]) >= std::cmp::min(i as $type, cms.capacity()), + "assert {} >= {} failed", + cms.estimate(keys[i]), std::cmp::min(i as $type, cms.capacity()) + ); + } - const FACTOR: f64 = 0.5; - cms.decay(FACTOR); + const FACTOR: f64 = 0.5; + cms.decay(FACTOR); - for i in 0..1000 { - assert!(cms.estimate(keys[i]) >= (std::cmp::min(i as $type, cms.capacity()) as f64 * FACTOR).floor() as $type); - } + for i in 0..1000 { + assert!(cms.estimate(keys[i]) >= (std::cmp::min(i as $type, cms.capacity()) as f64 * FACTOR).floor() as $type); } + } - #[test] - fn []() { - let mut cms = []::new(0.01, 0.9); - let mut rng = Mt64::new_unseeded(); - let keys = (0..1000).map(|_| rng.next_u64()).collect_vec(); - let mut sum = 0; - - // Try inserting more keys than cms table width - for i in 0..1000 { - for _ in 0..i { - cms.inc(keys[i]); - } - sum += i; + #[test] + fn test_collisions() { + let mut cms = $sketch::new(0.01, 0.9); + let mut rng = Mt64::new_unseeded(); + let keys = (0..1000).map(|_| rng.next_u64()).collect_vec(); + let mut sum = 0; + + // Try inserting more keys than cms table width + for i in 0..1000 { + for _ in 0..i { + cms.inc(keys[i]); } + sum += i; + } - let error = sum as f64 * 0.01; - for i in 0..10 { - assert!(cms.estimate(keys[i]) >= i as $type); - assert!(i as f64 + error >= cms.estimate(keys[i]) as f64); - } + let error = sum as f64 * 0.01; + for i in 0..10 { + assert!(cms.estimate(keys[i]) >= i as $type); + assert!(i as f64 + error >= cms.estimate(keys[i]) as f64); } - )* + } } - } - } + )* + }; +} - for_all_uint_types! { test_cmsketch } +test_cmsketch! { + {tests_u8, u8, CMSketchU8}, + {tests_u16, u16, CMSketchU16}, + {tests_u32, u32, CMSketchU32}, + {tests_u64, u64, CMSketchU64}, + {tests_usize, usize, CMSketchUsize}, }