Skip to content

Commit b5174ea

Browse files
committed
Move the global hash map to its own module
1 parent a578c80 commit b5174ea

File tree

3 files changed

+138
-145
lines changed

3 files changed

+138
-145
lines changed

src/atom.rs

Lines changed: 27 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,10 @@
99

1010
#![allow(non_upper_case_globals)]
1111

12+
use crate::dynamic_set::{Entry, DYNAMIC_SET};
1213
use debug_unreachable::debug_unreachable;
13-
use lazy_static::lazy_static;
1414
use phf_shared;
1515
use serde::{Deserialize, Deserializer, Serialize, Serializer};
16-
1716
use std::borrow::Cow;
1817
use std::cmp::Ordering::{self, Equal};
1918
use std::fmt;
@@ -24,120 +23,18 @@ use std::num::NonZeroU64;
2423
use std::ops;
2524
use std::slice;
2625
use std::str;
27-
use std::sync::atomic::AtomicIsize;
2826
use std::sync::atomic::Ordering::SeqCst;
29-
use std::sync::Mutex;
3027

3128
use self::UnpackedAtom::{Dynamic, Inline, Static};
3229

3330
const DYNAMIC_TAG: u8 = 0b_00;
3431
const INLINE_TAG: u8 = 0b_01; // len in upper nybble
3532
const STATIC_TAG: u8 = 0b_10;
3633
const TAG_MASK: u64 = 0b_11;
37-
const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging.
3834

3935
const MAX_INLINE_LEN: usize = 7;
40-
4136
const STATIC_SHIFT_BITS: usize = 32;
4237

43-
const NB_BUCKETS: usize = 1 << 12; // 4096
44-
const BUCKET_MASK: u64 = (1 << 12) - 1;
45-
46-
struct StringCache {
47-
buckets: Box<[Option<Box<StringCacheEntry>>; NB_BUCKETS]>,
48-
}
49-
50-
lazy_static! {
51-
static ref STRING_CACHE: Mutex<StringCache> = Mutex::new(StringCache::new());
52-
}
53-
54-
struct StringCacheEntry {
55-
next_in_bucket: Option<Box<StringCacheEntry>>,
56-
hash: u64,
57-
ref_count: AtomicIsize,
58-
string: Box<str>,
59-
}
60-
61-
impl StringCacheEntry {
62-
fn new(next: Option<Box<StringCacheEntry>>, hash: u64, string: String) -> StringCacheEntry {
63-
StringCacheEntry {
64-
next_in_bucket: next,
65-
hash: hash,
66-
ref_count: AtomicIsize::new(1),
67-
string: string.into_boxed_str(),
68-
}
69-
}
70-
}
71-
72-
impl StringCache {
73-
fn new() -> StringCache {
74-
type T = Option<Box<StringCacheEntry>>;
75-
let _static_assert_size_eq = std::mem::transmute::<T, usize>;
76-
let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]);
77-
StringCache {
78-
buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) },
79-
}
80-
}
81-
82-
fn add(&mut self, string: Cow<str>, hash: u64) -> *mut StringCacheEntry {
83-
let bucket_index = (hash & BUCKET_MASK) as usize;
84-
{
85-
let mut ptr: Option<&mut Box<StringCacheEntry>> = self.buckets[bucket_index].as_mut();
86-
87-
while let Some(entry) = ptr.take() {
88-
if entry.hash == hash && &*entry.string == &*string {
89-
if entry.ref_count.fetch_add(1, SeqCst) > 0 {
90-
return &mut **entry;
91-
}
92-
// Uh-oh. The pointer's reference count was zero, which means someone may try
93-
// to free it. (Naive attempts to defend against this, for example having the
94-
// destructor check to see whether the reference count is indeed zero, don't
95-
// work due to ABA.) Thus we need to temporarily add a duplicate string to the
96-
// list.
97-
entry.ref_count.fetch_sub(1, SeqCst);
98-
break;
99-
}
100-
ptr = entry.next_in_bucket.as_mut();
101-
}
102-
}
103-
debug_assert!(mem::align_of::<StringCacheEntry>() >= ENTRY_ALIGNMENT);
104-
let string = string.into_owned();
105-
let mut entry = Box::new(StringCacheEntry::new(
106-
self.buckets[bucket_index].take(),
107-
hash,
108-
string,
109-
));
110-
let ptr: *mut StringCacheEntry = &mut *entry;
111-
self.buckets[bucket_index] = Some(entry);
112-
113-
ptr
114-
}
115-
116-
fn remove(&mut self, ptr: *mut StringCacheEntry) {
117-
let bucket_index = {
118-
let value: &StringCacheEntry = unsafe { &*ptr };
119-
debug_assert!(value.ref_count.load(SeqCst) == 0);
120-
(value.hash & BUCKET_MASK) as usize
121-
};
122-
123-
let mut current: &mut Option<Box<StringCacheEntry>> = &mut self.buckets[bucket_index];
124-
125-
loop {
126-
let entry_ptr: *mut StringCacheEntry = match current.as_mut() {
127-
Some(entry) => &mut **entry,
128-
None => break,
129-
};
130-
if entry_ptr == ptr {
131-
mem::drop(mem::replace(current, unsafe {
132-
(*entry_ptr).next_in_bucket.take()
133-
}));
134-
break;
135-
}
136-
current = unsafe { &mut (*entry_ptr).next_in_bucket };
137-
}
138-
}
139-
}
140-
14138
/// A static `PhfStrSet`
14239
///
14340
/// This trait is implemented by static sets of interned strings generated using
@@ -324,7 +221,7 @@ impl<Static: StaticAtomSet> Atom<Static> {
324221
static_set.hashes[index as usize]
325222
}
326223
Dynamic(entry) => {
327-
let entry = entry as *mut StringCacheEntry;
224+
let entry = entry as *mut Entry;
328225
u64_hash_as_u32(unsafe { (*entry).hash })
329226
}
330227
Inline(..) => u64_hash_as_u32(self.unsafe_data.get()),
@@ -384,7 +281,7 @@ impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
384281
Inline(len as u8, buf)
385282
} else {
386283
let hash = (hash.g as u64) << 32 | (hash.f1 as u64);
387-
Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ())
284+
Dynamic(DYNAMIC_SET.lock().unwrap().insert(string_to_add, hash) as *mut ())
388285
}
389286
};
390287

@@ -412,7 +309,7 @@ impl<Static: StaticAtomSet> Clone for Atom<Static> {
412309
unsafe {
413310
match from_packed_dynamic(self.unsafe_data.get()) {
414311
Some(entry) => {
415-
let entry = entry as *mut StringCacheEntry;
312+
let entry = entry as *mut Entry;
416313
(*entry).ref_count.fetch_add(1, SeqCst);
417314
}
418315
None => (),
@@ -430,16 +327,16 @@ impl<Static> Drop for Atom<Static> {
430327
fn drop(&mut self) {
431328
// Out of line to guide inlining.
432329
fn drop_slow<Static>(this: &mut Atom<Static>) {
433-
STRING_CACHE
330+
DYNAMIC_SET
434331
.lock()
435332
.unwrap()
436-
.remove(this.unsafe_data.get() as *mut StringCacheEntry);
333+
.remove(this.unsafe_data.get() as *mut Entry);
437334
}
438335

439336
unsafe {
440337
match from_packed_dynamic(self.unsafe_data.get()) {
441338
Some(entry) => {
442-
let entry = entry as *mut StringCacheEntry;
339+
let entry = entry as *mut Entry;
443340
if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 {
444341
drop_slow(self);
445342
}
@@ -466,7 +363,7 @@ impl<Static: StaticAtomSet> ops::Deref for Atom<Static> {
466363
.get(idx as usize)
467364
.expect("bad static atom"),
468365
Dynamic(entry) => {
469-
let entry = entry as *mut StringCacheEntry;
366+
let entry = entry as *mut Entry;
470367
&(*entry).string
471368
}
472369
}
@@ -723,41 +620,26 @@ unsafe fn inline_orig_bytes<'a>(data: &'a NonZeroU64) -> &'a [u8] {
723620

724621
// Some minor tests of internal layout here. See ../integration-tests for much
725622
// more.
726-
#[cfg(test)]
727-
mod tests {
728-
use super::{DefaultAtom, StringCacheEntry, ENTRY_ALIGNMENT};
623+
#[test]
624+
fn assert_sizes() {
729625
use std::mem;
730-
731-
#[test]
732-
fn assert_sizes() {
733-
use std::mem;
734-
struct EmptyWithDrop;
735-
impl Drop for EmptyWithDrop {
736-
fn drop(&mut self) {}
737-
}
738-
let compiler_uses_inline_drop_flags = mem::size_of::<EmptyWithDrop>() > 0;
739-
740-
// Guard against accidental changes to the sizes of things.
741-
assert_eq!(
742-
mem::size_of::<DefaultAtom>(),
743-
if compiler_uses_inline_drop_flags {
744-
16
745-
} else {
746-
8
747-
}
748-
);
749-
assert_eq!(
750-
mem::size_of::<Option<DefaultAtom>>(),
751-
mem::size_of::<DefaultAtom>(),
752-
);
753-
assert_eq!(
754-
mem::size_of::<super::StringCacheEntry>(),
755-
8 + 4 * mem::size_of::<usize>()
756-
);
626+
struct EmptyWithDrop;
627+
impl Drop for EmptyWithDrop {
628+
fn drop(&mut self) {}
757629
}
630+
let compiler_uses_inline_drop_flags = mem::size_of::<EmptyWithDrop>() > 0;
758631

759-
#[test]
760-
fn string_cache_entry_alignment_is_sufficient() {
761-
assert!(mem::align_of::<StringCacheEntry>() >= ENTRY_ALIGNMENT);
762-
}
632+
// Guard against accidental changes to the sizes of things.
633+
assert_eq!(
634+
mem::size_of::<DefaultAtom>(),
635+
if compiler_uses_inline_drop_flags {
636+
16
637+
} else {
638+
8
639+
}
640+
);
641+
assert_eq!(
642+
mem::size_of::<Option<DefaultAtom>>(),
643+
mem::size_of::<DefaultAtom>(),
644+
);
763645
}

src/dynamic_set.rs

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
// Copyright 2014 The Servo Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution.
3+
//
4+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7+
// option. This file may not be copied, modified, or distributed
8+
// except according to those terms.
9+
10+
use lazy_static::lazy_static;
11+
use std::borrow::Cow;
12+
use std::mem;
13+
use std::sync::atomic::AtomicIsize;
14+
use std::sync::atomic::Ordering::SeqCst;
15+
use std::sync::Mutex;
16+
17+
const NB_BUCKETS: usize = 1 << 12; // 4096
18+
const BUCKET_MASK: u64 = (1 << 12) - 1;
19+
20+
pub(crate) struct Set {
21+
buckets: Box<[Option<Box<Entry>>; NB_BUCKETS]>,
22+
}
23+
24+
pub(crate) struct Entry {
25+
pub(crate) string: Box<str>,
26+
pub(crate) hash: u64,
27+
pub(crate) ref_count: AtomicIsize,
28+
next_in_bucket: Option<Box<Entry>>,
29+
}
30+
31+
// Addresses are a multiples of this,
32+
// and therefore have have TAG_MASK bits unset, available for tagging.
33+
pub(crate) const ENTRY_ALIGNMENT: usize = 4;
34+
35+
#[test]
36+
fn entry_alignment_is_sufficient() {
37+
assert!(mem::align_of::<Entry>() >= ENTRY_ALIGNMENT);
38+
}
39+
40+
lazy_static! {
41+
pub(crate) static ref DYNAMIC_SET: Mutex<Set> = Mutex::new({
42+
type T = Option<Box<Entry>>;
43+
let _static_assert_size_eq = std::mem::transmute::<T, usize>;
44+
let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]);
45+
Set {
46+
buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) },
47+
}
48+
});
49+
}
50+
51+
impl Set {
52+
pub(crate) fn insert(&mut self, string: Cow<str>, hash: u64) -> *mut Entry {
53+
let bucket_index = (hash & BUCKET_MASK) as usize;
54+
{
55+
let mut ptr: Option<&mut Box<Entry>> = self.buckets[bucket_index].as_mut();
56+
57+
while let Some(entry) = ptr.take() {
58+
if entry.hash == hash && &*entry.string == &*string {
59+
if entry.ref_count.fetch_add(1, SeqCst) > 0 {
60+
return &mut **entry;
61+
}
62+
// Uh-oh. The pointer's reference count was zero, which means someone may try
63+
// to free it. (Naive attempts to defend against this, for example having the
64+
// destructor check to see whether the reference count is indeed zero, don't
65+
// work due to ABA.) Thus we need to temporarily add a duplicate string to the
66+
// list.
67+
entry.ref_count.fetch_sub(1, SeqCst);
68+
break;
69+
}
70+
ptr = entry.next_in_bucket.as_mut();
71+
}
72+
}
73+
debug_assert!(mem::align_of::<Entry>() >= ENTRY_ALIGNMENT);
74+
let string = string.into_owned();
75+
let mut entry = Box::new(Entry {
76+
next_in_bucket: self.buckets[bucket_index].take(),
77+
hash,
78+
ref_count: AtomicIsize::new(1),
79+
string: string.into_boxed_str(),
80+
});
81+
let ptr: *mut Entry = &mut *entry;
82+
self.buckets[bucket_index] = Some(entry);
83+
84+
ptr
85+
}
86+
87+
pub(crate) fn remove(&mut self, ptr: *mut Entry) {
88+
let bucket_index = {
89+
let value: &Entry = unsafe { &*ptr };
90+
debug_assert!(value.ref_count.load(SeqCst) == 0);
91+
(value.hash & BUCKET_MASK) as usize
92+
};
93+
94+
let mut current: &mut Option<Box<Entry>> = &mut self.buckets[bucket_index];
95+
96+
loop {
97+
let entry_ptr: *mut Entry = match current.as_mut() {
98+
Some(entry) => &mut **entry,
99+
None => break,
100+
};
101+
if entry_ptr == ptr {
102+
mem::drop(mem::replace(current, unsafe {
103+
(*entry_ptr).next_in_bucket.take()
104+
}));
105+
break;
106+
}
107+
current = unsafe { &mut (*entry_ptr).next_in_bucket };
108+
}
109+
}
110+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,4 @@
108108
pub use crate::atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet};
109109

110110
mod atom;
111+
mod dynamic_set;

0 commit comments

Comments
 (0)