Skip to content

Commit 21cb95a

Browse files
Replace entry() with entry_upsert()
Profiling showed that this function is quite the hotspot. By changing the implementation slightly, instead of walking the tree using the Linked List, but iterate directly over the values, we improve the throughput of certain CPU bound queries. We've seen a reduction in time needed of > 50% for certain rollup queries. Due to the way entry() was called, and the way the Borrow Checker is unable to help us keep 2 mutable references into a map, we were doing double lookups into the backing HashMap pretty much always when this function was called. However, looking at the code, the only callers of this function only wanted to either increment by 1, or by a count. Therefore, make a function to actually support that usecase, which doesn't have this problem with the Borrow Checker, as it doesn't have to return a mutable reference: It actually does the work immediately
1 parent 4bdf9b1 commit 21cb95a

File tree

1 file changed

+95
-35
lines changed

1 file changed

+95
-35
lines changed

crates/udd-sketch/src/lib.rs

Lines changed: 95 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
//! Based on the paper: https://arxiv.org/abs/2004.08604
33
44
use serde::{Deserialize, Serialize};
5+
use std::collections::hash_map::Entry;
56
use std::collections::HashMap;
67

78
#[cfg(test)]
89
use ordered_float::OrderedFloat;
910
#[cfg(test)]
1011
use std::collections::HashSet;
12+
1113
#[cfg(test)]
1214
extern crate quickcheck;
1315
#[cfg(test)]
@@ -122,9 +124,9 @@ impl SketchHashMap {
122124
}
123125
}
124126

125-
// Increment the count at a key, creating the entry if needed.
127+
/// Increment the count at a key, creating the entry if needed.
126128
fn increment(&mut self, key: SketchHashKey) {
127-
self.entry(key).count += 1;
129+
self.entry_upsert(key, 1);
128130
}
129131

130132
fn iter(&self) -> SketchHashIterator {
@@ -134,26 +136,72 @@ impl SketchHashMap {
134136
}
135137
}
136138

137-
// Returns the entry for a given key.
138-
// If the entry doesn't yet exist, this function will create it
139-
// with 0 count and ensure the list of keys is correctly updated.
140-
fn entry(&mut self, key: SketchHashKey) -> &mut SketchHashEntry {
141-
let mut next = self.head;
142-
if !self.map.contains_key(&key) {
143-
if key < self.head {
144-
self.head = key;
145-
} else {
146-
let mut prev = SketchHashKey::Invalid;
147-
while key > next {
148-
prev = next;
149-
next = self.map[&next].next;
150-
}
151-
self.map.get_mut(&prev).expect("Invalid key found").next = key;
139+
/// Splits an entry if `key` is supposed to come right after it
140+
/// Returns the key *after* the one that was split.
141+
#[inline]
142+
#[must_use] // The caller should really do something with this information.
143+
fn entry_split(&mut self, key: SketchHashKey) -> SketchHashKey {
144+
debug_assert_ne!(
145+
key,
146+
SketchHashKey::Invalid,
147+
"Invalid should never be used as a key into the SketchHashMap"
148+
);
149+
150+
let next: SketchHashKey;
151+
152+
// Special case, if we're actually in front of the Head,
153+
// we're not really splitting the linked list, but prepending.
154+
if key < self.head {
155+
next = self.head;
156+
self.head = key;
157+
return next;
158+
}
159+
160+
// Unfortunately, we'll now have to walk the whole map in order
161+
// to find the location where we should be inserted
162+
// into the single-linked list
163+
for (k, e) in self.map.iter_mut() {
164+
if *k < key && e.next > key {
165+
next = e.next;
166+
e.next = key;
167+
return next;
152168
}
153169
}
154-
self.map
155-
.entry(key)
156-
.or_insert(SketchHashEntry { count: 0, next })
170+
171+
unreachable!("Invalid key found");
172+
}
173+
174+
/// Upsert the given key/count into our map. This function
175+
/// ensures the Linked List is in good shape afterwards.
176+
#[inline]
177+
fn entry_upsert(&mut self, key: SketchHashKey, count: u64) {
178+
match self.map.entry(key) {
179+
Entry::Occupied(mut o) => {
180+
o.get_mut().count += count;
181+
// Great, we don't have to update the Linked List
182+
return;
183+
}
184+
Entry::Vacant(v) if self.head > key => {
185+
v.insert(SketchHashEntry {
186+
count,
187+
next: self.head,
188+
});
189+
self.head = key;
190+
// Great, we don't have to update the Linked List
191+
return;
192+
}
193+
Entry::Vacant(_) => (), // We need to release our &mut map here, as we need to update 2 entries
194+
};
195+
196+
// We've just inserted a new value, but need to ensure we fix the linked list again.
197+
let new_next = self.entry_split(key);
198+
self.map.insert(
199+
key,
200+
SketchHashEntry {
201+
count,
202+
next: new_next,
203+
},
204+
);
157205
}
158206

159207
fn len(&self) -> usize {
@@ -324,7 +372,8 @@ impl UDDSketch {
324372
for _ in 0..extra_compactions {
325373
key = key.compact_key();
326374
}
327-
self.buckets.entry(key).count += count;
375+
376+
self.buckets.entry_upsert(key, count);
328377
}
329378

330379
while self.buckets.len() > self.max_buckets as usize {
@@ -368,7 +417,7 @@ impl UDDSketch {
368417

369418
for entry in other.buckets.iter() {
370419
let (key, value) = entry;
371-
self.buckets.entry(key).count += value;
420+
self.buckets.entry_upsert(key, value);
372421
}
373422

374423
while self.buckets.len() > self.max_buckets as usize {
@@ -767,20 +816,31 @@ mod tests {
767816
}
768817
}
769818

819+
#[test]
820+
#[should_panic]
821+
fn test_entry_invalid_hashmap_key() {
822+
let mut map = SketchHashMap {
823+
map: HashMap::new(),
824+
head: Invalid,
825+
};
826+
827+
map.entry_upsert(Invalid, 0);
828+
}
829+
770830
#[test]
771831
fn test_entry_insertion_order() {
772832
let mut map = SketchHashMap {
773833
map: HashMap::new(),
774834
head: Invalid,
775835
};
776836

777-
map.entry(SketchHashKey::Negative(i64::MIN)).count += 5;
778-
map.entry(SketchHashKey::Negative(10)).count += 1;
779-
map.entry(SketchHashKey::Positive(i64::MAX - 100)).count += 17;
780-
map.entry(SketchHashKey::Zero).count += 7;
781-
map.entry(SketchHashKey::Positive(-10)).count += 11;
782-
map.entry(SketchHashKey::Negative(-10)).count += 3;
783-
map.entry(SketchHashKey::Positive(10)).count += 13;
837+
map.entry_upsert(SketchHashKey::Negative(i64::MIN), 5);
838+
map.entry_upsert(SketchHashKey::Negative(10), 1);
839+
map.entry_upsert(SketchHashKey::Positive(i64::MAX - 100), 17);
840+
map.entry_upsert(SketchHashKey::Zero, 7);
841+
map.entry_upsert(SketchHashKey::Positive(-10), 11);
842+
map.entry_upsert(SketchHashKey::Negative(-10), 3);
843+
map.entry_upsert(SketchHashKey::Positive(10), 13);
784844

785845
let keys: Vec<_> = map.iter().collect::<Vec<_>>();
786846
assert_eq!(
@@ -798,12 +858,12 @@ mod tests {
798858

799859
// We add some things before the current head, insert some new ones,
800860
// add some to the end, and again inbetween some others
801-
map.entry(SketchHashKey::Negative(i64::MAX)).count += 3;
802-
map.entry(SketchHashKey::Negative(-10)).count += 23;
803-
map.entry(SketchHashKey::Positive(10)).count += 123;
804-
map.entry(SketchHashKey::Positive(9)).count += 29;
805-
map.entry(SketchHashKey::Positive(11)).count += 31;
806-
map.entry(SketchHashKey::Positive(i64::MAX)).count += 8;
861+
map.entry_upsert(SketchHashKey::Negative(i64::MAX), 3);
862+
map.entry_upsert(SketchHashKey::Negative(-10), 23);
863+
map.entry_upsert(SketchHashKey::Positive(9), 29);
864+
map.entry_upsert(SketchHashKey::Positive(i64::MAX), 8);
865+
map.entry_upsert(SketchHashKey::Positive(10), 123);
866+
map.entry_upsert(SketchHashKey::Positive(11), 31);
807867

808868
let keys: Vec<_> = map.iter().collect::<Vec<_>>();
809869
assert_eq!(

0 commit comments

Comments
 (0)