22//! Based on the paper: https://arxiv.org/abs/2004.08604
33
44use serde:: { Deserialize , Serialize } ;
5+ use std:: collections:: hash_map:: Entry ;
56use std:: collections:: HashMap ;
67
78#[ cfg( test) ]
89use ordered_float:: OrderedFloat ;
910#[ cfg( test) ]
1011use std:: collections:: HashSet ;
12+
1113#[ cfg( test) ]
1214extern crate quickcheck;
1315#[ cfg( test) ]
@@ -122,9 +124,9 @@ impl SketchHashMap {
122124 }
123125 }
124126
125- // Increment the count at a key, creating the entry if needed.
127+ /// Increment the count at a key, creating the entry if needed.
126128 fn increment ( & mut self , key : SketchHashKey ) {
127- self . entry ( key) . count += 1 ;
129+ self . entry_upsert ( key, 1 ) ;
128130 }
129131
130132 fn iter ( & self ) -> SketchHashIterator {
@@ -134,26 +136,72 @@ impl SketchHashMap {
134136 }
135137 }
136138
137- // Returns the entry for a given key.
138- // If the entry doesn't yet exist, this function will create it
139- // with 0 count and ensure the list of keys is correctly updated.
140- fn entry ( & mut self , key : SketchHashKey ) -> & mut SketchHashEntry {
141- let mut next = self . head ;
142- if !self . map . contains_key ( & key) {
143- if key < self . head {
144- self . head = key;
145- } else {
146- let mut prev = SketchHashKey :: Invalid ;
147- while key > next {
148- prev = next;
149- next = self . map [ & next] . next ;
150- }
151- self . map . get_mut ( & prev) . expect ( "Invalid key found" ) . next = key;
139+ /// Splits an entry if `key` is supposed to come right after it
140+ /// Returns the key *after* the one that was split.
141+ #[ inline]
142+ #[ must_use] // The caller should really do something with this information.
143+ fn entry_split ( & mut self , key : SketchHashKey ) -> SketchHashKey {
144+ debug_assert_ne ! (
145+ key,
146+ SketchHashKey :: Invalid ,
147+ "Invalid should never be used as a key into the SketchHashMap"
148+ ) ;
149+
150+ let next: SketchHashKey ;
151+
152+ // Special case, if we're actually in front of the Head,
153+ // we're not really splitting the linked list, but prepending.
154+ if key < self . head {
155+ next = self . head ;
156+ self . head = key;
157+ return next;
158+ }
159+
160+ // Unfortunately, we'll now have to walk the whole map in order
161+ // to find the location where we should be inserted
162+ // into the single-linked list
163+ for ( k, e) in self . map . iter_mut ( ) {
164+ if * k < key && e. next > key {
165+ next = e. next ;
166+ e. next = key;
167+ return next;
152168 }
153169 }
154- self . map
155- . entry ( key)
156- . or_insert ( SketchHashEntry { count : 0 , next } )
170+
171+ unreachable ! ( "Invalid key found" ) ;
172+ }
173+
174+ /// Upsert the given key/count into our map. This function
175+ /// ensures the Linked List is in good shape afterwards.
176+ #[ inline]
177+ fn entry_upsert ( & mut self , key : SketchHashKey , count : u64 ) {
178+ match self . map . entry ( key) {
179+ Entry :: Occupied ( mut o) => {
180+ o. get_mut ( ) . count += count;
181+ // Great, we don't have to update the Linked List
182+ return ;
183+ }
184+ Entry :: Vacant ( v) if self . head > key => {
185+ v. insert ( SketchHashEntry {
186+ count,
187+ next : self . head ,
188+ } ) ;
189+ self . head = key;
190+ // Great, we don't have to update the Linked List
191+ return ;
192+ }
193+ Entry :: Vacant ( _) => ( ) , // We need to release our &mut map here, as we need to update 2 entries
194+ } ;
195+
196+ // We've just inserted a new value, but need to ensure we fix the linked list again.
197+ let new_next = self . entry_split ( key) ;
198+ self . map . insert (
199+ key,
200+ SketchHashEntry {
201+ count,
202+ next : new_next,
203+ } ,
204+ ) ;
157205 }
158206
159207 fn len ( & self ) -> usize {
@@ -324,7 +372,8 @@ impl UDDSketch {
324372 for _ in 0 ..extra_compactions {
325373 key = key. compact_key ( ) ;
326374 }
327- self . buckets . entry ( key) . count += count;
375+
376+ self . buckets . entry_upsert ( key, count) ;
328377 }
329378
330379 while self . buckets . len ( ) > self . max_buckets as usize {
@@ -368,7 +417,7 @@ impl UDDSketch {
368417
369418 for entry in other. buckets . iter ( ) {
370419 let ( key, value) = entry;
371- self . buckets . entry ( key) . count += value;
420+ self . buckets . entry_upsert ( key, value) ;
372421 }
373422
374423 while self . buckets . len ( ) > self . max_buckets as usize {
@@ -767,20 +816,31 @@ mod tests {
767816 }
768817 }
769818
819+ #[ test]
820+ #[ should_panic]
821+ fn test_entry_invalid_hashmap_key ( ) {
822+ let mut map = SketchHashMap {
823+ map : HashMap :: new ( ) ,
824+ head : Invalid ,
825+ } ;
826+
827+ map. entry_upsert ( Invalid , 0 ) ;
828+ }
829+
770830 #[ test]
771831 fn test_entry_insertion_order ( ) {
772832 let mut map = SketchHashMap {
773833 map : HashMap :: new ( ) ,
774834 head : Invalid ,
775835 } ;
776836
777- map. entry ( SketchHashKey :: Negative ( i64:: MIN ) ) . count += 5 ;
778- map. entry ( SketchHashKey :: Negative ( 10 ) ) . count += 1 ;
779- map. entry ( SketchHashKey :: Positive ( i64:: MAX - 100 ) ) . count += 17 ;
780- map. entry ( SketchHashKey :: Zero ) . count += 7 ;
781- map. entry ( SketchHashKey :: Positive ( -10 ) ) . count += 11 ;
782- map. entry ( SketchHashKey :: Negative ( -10 ) ) . count += 3 ;
783- map. entry ( SketchHashKey :: Positive ( 10 ) ) . count += 13 ;
837+ map. entry_upsert ( SketchHashKey :: Negative ( i64:: MIN ) , 5 ) ;
838+ map. entry_upsert ( SketchHashKey :: Negative ( 10 ) , 1 ) ;
839+ map. entry_upsert ( SketchHashKey :: Positive ( i64:: MAX - 100 ) , 17 ) ;
840+ map. entry_upsert ( SketchHashKey :: Zero , 7 ) ;
841+ map. entry_upsert ( SketchHashKey :: Positive ( -10 ) , 11 ) ;
842+ map. entry_upsert ( SketchHashKey :: Negative ( -10 ) , 3 ) ;
843+ map. entry_upsert ( SketchHashKey :: Positive ( 10 ) , 13 ) ;
784844
785845 let keys: Vec < _ > = map. iter ( ) . collect :: < Vec < _ > > ( ) ;
786846 assert_eq ! (
@@ -798,12 +858,12 @@ mod tests {
798858
799859 // We add some things before the current head, insert some new ones,
800860 // add some to the end, and again inbetween some others
801- map. entry ( SketchHashKey :: Negative ( i64:: MAX ) ) . count += 3 ;
802- map. entry ( SketchHashKey :: Negative ( -10 ) ) . count += 23 ;
803- map. entry ( SketchHashKey :: Positive ( 10 ) ) . count += 123 ;
804- map. entry ( SketchHashKey :: Positive ( 9 ) ) . count += 29 ;
805- map. entry ( SketchHashKey :: Positive ( 11 ) ) . count += 31 ;
806- map. entry ( SketchHashKey :: Positive ( i64 :: MAX ) ) . count += 8 ;
861+ map. entry_upsert ( SketchHashKey :: Negative ( i64:: MAX ) , 3 ) ;
862+ map. entry_upsert ( SketchHashKey :: Negative ( -10 ) , 23 ) ;
863+ map. entry_upsert ( SketchHashKey :: Positive ( 9 ) , 29 ) ;
864+ map. entry_upsert ( SketchHashKey :: Positive ( i64 :: MAX ) , 8 ) ;
865+ map. entry_upsert ( SketchHashKey :: Positive ( 10 ) , 123 ) ;
866+ map. entry_upsert ( SketchHashKey :: Positive ( 11 ) , 31 ) ;
807867
808868 let keys: Vec < _ > = map. iter ( ) . collect :: < Vec < _ > > ( ) ;
809869 assert_eq ! (
0 commit comments