Skip to content

Commit f7ce843

Browse files
committed
Expose the precomputed hash using a trait so that I can use it from rust-selectors.
This allows us to get rid of the extra hashing overhead every time we check the bloom filter.
1 parent 8b4a846 commit f7ce843

File tree

5 files changed

+41
-2
lines changed

5 files changed

+41
-2
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ log-events = ["rustc-serialize"]
2828
unstable = []
2929

3030
[dependencies]
31+
precomputed-hash = "0.1"
3132
lazy_static = "0.2"
3233
serde = "0.9"
3334
phf_shared = "0.7.4"

src/atom.rs

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ pub struct PhfStrSet {
177177
pub key: u64,
178178
pub disps: &'static [(u32, u32)],
179179
pub atoms: &'static [&'static str],
180+
pub hashes: &'static [u32],
180181
}
181182

182183
pub struct EmptyStaticAtomSet;
@@ -189,6 +190,8 @@ impl StaticAtomSet for EmptyStaticAtomSet {
189190
key: 0,
190191
disps: &[(0, 0)],
191192
atoms: &[""],
193+
// "" SipHash'd, and xored with u64_hash_to_u32.
194+
hashes: &[0x3ddddef3],
192195
};
193196
&SET
194197
}
@@ -219,14 +222,37 @@ impl<Static: StaticAtomSet> HeapSizeOf for Atom<Static> {
219222
}
220223
}
221224

225+
impl<Static: StaticAtomSet> ::precomputed_hash::PrecomputedHash for Atom<Static> {
226+
fn precomputed_hash(&self) -> u32 {
227+
self.get_hash()
228+
}
229+
}
230+
231+
fn u64_hash_as_u32(h: u64) -> u32 {
232+
// This may or may not be great...
233+
((h >> 32) ^ h) as u32
234+
}
235+
222236
impl<Static: StaticAtomSet> Atom<Static> {
223237
#[inline(always)]
224238
unsafe fn unpack(&self) -> UnpackedAtom {
225239
UnpackedAtom::from_packed(self.unsafe_data)
226240
}
227241

228242
pub fn get_hash(&self) -> u32 {
229-
((self.unsafe_data >> 32) ^ self.unsafe_data) as u32
243+
match unsafe { self.unpack() } {
244+
Static(index) => {
245+
let static_set = Static::get();
246+
static_set.hashes[index as usize]
247+
}
248+
Dynamic(entry) => {
249+
let entry = entry as *mut StringCacheEntry;
250+
u64_hash_as_u32(unsafe { (*entry).hash })
251+
}
252+
Inline(..) => {
253+
u64_hash_as_u32(self.unsafe_data)
254+
}
255+
}
230256
}
231257
}
232258

src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@
1919
#[cfg(test)] extern crate rand;
2020
#[macro_use] extern crate lazy_static;
2121
#[macro_use] extern crate debug_unreachable;
22-
extern crate serde;
2322
extern crate phf_shared;
23+
extern crate precomputed_hash;
24+
extern crate serde;
2425
extern crate string_cache_shared as shared;
2526

2627
pub use atom::{Atom, StaticAtomSet, PhfStrSet, EmptyStaticAtomSet, DefaultAtom};

string-cache-codegen/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,5 @@ path = "lib.rs"
1515
[dependencies]
1616
string_cache_shared = {path = "../shared", version = "0.3"}
1717
phf_generator = "0.7.15"
18+
phf_shared = "0.7.4"
1819
quote = "0.3.9"

string-cache-codegen/lib.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
// option. This file may not be copied, modified, or distributed
88
// except according to those terms.
99

10+
#![recursion_limit = "128"]
11+
1012
extern crate phf_generator;
13+
extern crate phf_shared;
1114
extern crate string_cache_shared as shared;
1215
#[macro_use] extern crate quote;
1316

@@ -93,6 +96,12 @@ impl AtomType {
9396
let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32;
9497
let data = (0..atoms.len()).map(|i| quote::Hex(shared::pack_static(i as u32)));
9598

99+
let hashes: Vec<u32> =
100+
atoms.iter().map(|string| {
101+
let hash = phf_shared::hash(string, key);
102+
((hash >> 32) ^ hash) as u32
103+
}).collect();
104+
96105
let type_name = if let Some(position) = self.path.rfind("::") {
97106
&self.path[position + "::".len() ..]
98107
} else {
@@ -112,6 +121,7 @@ impl AtomType {
112121
key: #key,
113122
disps: &#disps,
114123
atoms: &#atoms,
124+
hashes: &#hashes
115125
};
116126
&SET
117127
}

0 commit comments

Comments
 (0)