Skip to content

Commit 1a92659

Browse files
committed
Remove UnpackedAtom, inline its code
1 parent e25b4c6 commit 1a92659

File tree

2 files changed

+78
-182
lines changed

2 files changed

+78
-182
lines changed

src/atom.rs

Lines changed: 74 additions & 179 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ use std::slice;
2525
use std::str;
2626
use std::sync::atomic::Ordering::SeqCst;
2727

28-
use self::UnpackedAtom::{Dynamic, Inline, Static};
29-
3028
const DYNAMIC_TAG: u8 = 0b_00;
3129
const INLINE_TAG: u8 = 0b_01; // len in upper nybble
3230
const STATIC_TAG: u8 = 0b_10;
3331
const TAG_MASK: u64 = 0b_11;
32+
const LEN_OFFSET: u64 = 4;
33+
const LEN_MASK: u64 = 0xF0;
3434

3535
const MAX_INLINE_LEN: usize = 7;
3636
const STATIC_SHIFT_BITS: usize = 32;
@@ -151,11 +151,6 @@ impl<'a, Static: StaticAtomSet> From<&'a Atom<Static>> for Atom<Static> {
151151
}
152152
}
153153

154-
fn u64_hash_as_u32(h: u64) -> u32 {
155-
// This may or may not be great...
156-
((h >> 32) ^ h) as u32
157-
}
158-
159154
// FIXME: bound removed from the struct definition before of this error for pack_static:
160155
// "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable"
161156
// https://github.com/rust-lang/rust/issues/57563
@@ -166,20 +161,19 @@ impl<Static> Atom<Static> {
166161
pub const fn pack_static(n: u32) -> Self {
167162
Self {
168163
unsafe_data: unsafe {
169-
// STATIC_TAG ensure this is non-zero
164+
// STATIC_TAG ensures this is non-zero
170165
NonZeroU64::new_unchecked((STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS))
171166
},
172167
phantom: PhantomData,
173168
}
174169
}
175-
}
176170

177-
impl<Static: StaticAtomSet> Atom<Static> {
178-
#[inline(always)]
179-
unsafe fn unpack(&self) -> UnpackedAtom {
180-
UnpackedAtom::from_packed(self.unsafe_data)
171+
fn tag(&self) -> u8 {
172+
(self.unsafe_data.get() & TAG_MASK) as u8
181173
}
174+
}
182175

176+
impl<Static: StaticAtomSet> Atom<Static> {
183177
/// Return the internal repersentation. For testing.
184178
#[doc(hidden)]
185179
pub fn unsafe_data(&self) -> u64 {
@@ -189,42 +183,39 @@ impl<Static: StaticAtomSet> Atom<Static> {
189183
/// Return true if this is a static Atom. For testing.
190184
#[doc(hidden)]
191185
pub fn is_static(&self) -> bool {
192-
match unsafe { self.unpack() } {
193-
Static(..) => true,
194-
_ => false,
195-
}
186+
self.tag() == STATIC_TAG
196187
}
197188

198189
/// Return true if this is a dynamic Atom. For testing.
199190
#[doc(hidden)]
200191
pub fn is_dynamic(&self) -> bool {
201-
match unsafe { self.unpack() } {
202-
Dynamic(..) => true,
203-
_ => false,
204-
}
192+
self.tag() == DYNAMIC_TAG
205193
}
206194

207195
/// Return true if this is an inline Atom. For testing.
208196
#[doc(hidden)]
209197
pub fn is_inline(&self) -> bool {
210-
match unsafe { self.unpack() } {
211-
Inline(..) => true,
212-
_ => false,
213-
}
198+
self.tag() == INLINE_TAG
199+
}
200+
201+
fn static_index(&self) -> u64 {
202+
self.unsafe_data.get() >> STATIC_SHIFT_BITS
214203
}
215204

216205
/// Get the hash of the string as it is stored in the set.
217206
pub fn get_hash(&self) -> u32 {
218-
match unsafe { self.unpack() } {
219-
Static(index) => {
220-
let static_set = Static::get();
221-
static_set.hashes[index as usize]
222-
}
223-
Dynamic(entry) => {
224-
let entry = entry as *mut Entry;
207+
match self.tag() {
208+
DYNAMIC_TAG => {
209+
let entry = self.unsafe_data.get() as *const Entry;
225210
unsafe { (*entry).hash }
226211
}
227-
Inline(..) => u64_hash_as_u32(self.unsafe_data.get()),
212+
STATIC_TAG => Static::get().hashes[self.static_index() as usize],
213+
INLINE_TAG => {
214+
let data = self.unsafe_data.get();
215+
// This may or may not be great...
216+
((data >> 32) ^ data) as u32
217+
}
218+
_ => unsafe { debug_unreachable!() },
228219
}
229220
}
230221
}
@@ -265,31 +256,40 @@ impl<Static: StaticAtomSet> PartialEq<String> for Atom<Static> {
265256
}
266257

267258
impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
268-
#[inline]
269259
fn from(string_to_add: Cow<'a, str>) -> Self {
270260
let static_set = Static::get();
271261
let hash = phf_shared::hash(&*string_to_add, &static_set.key);
272262
let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len());
273263

274-
let unpacked = if static_set.atoms[index as usize] == string_to_add {
275-
Static(index)
264+
if static_set.atoms[index as usize] == string_to_add {
265+
Self::pack_static(index)
276266
} else {
277267
let len = string_to_add.len();
278268
if len <= MAX_INLINE_LEN {
279-
let mut buf: [u8; 7] = [0; 7];
280-
buf[..len].copy_from_slice(string_to_add.as_bytes());
281-
Inline(len as u8, buf)
269+
let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
270+
{
271+
let dest = inline_atom_slice_mut(&mut data);
272+
dest[..len].copy_from_slice(string_to_add.as_bytes())
273+
}
274+
Atom {
275+
// INLINE_TAG ensures this is never zero
276+
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
277+
phantom: PhantomData,
278+
}
282279
} else {
283-
Dynamic(
284-
DYNAMIC_SET
285-
.lock()
286-
.unwrap()
287-
.insert(string_to_add, hash.g) as *mut (),
288-
)
280+
let ptr: std::ptr::NonNull<Entry> = DYNAMIC_SET
281+
.lock()
282+
.unwrap()
283+
.insert(string_to_add, hash.g);
284+
let data = ptr.as_ptr() as u64;
285+
debug_assert!(0 == data & TAG_MASK);
286+
Atom {
287+
// The address of a ptr::NonNull is non-zero
288+
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
289+
phantom: PhantomData,
290+
}
289291
}
290-
};
291-
292-
unsafe { unpacked.pack() }
292+
}
293293
}
294294
}
295295

@@ -310,44 +310,31 @@ impl<Static: StaticAtomSet> From<String> for Atom<Static> {
310310
impl<Static: StaticAtomSet> Clone for Atom<Static> {
311311
#[inline(always)]
312312
fn clone(&self) -> Self {
313-
unsafe {
314-
match from_packed_dynamic(self.unsafe_data.get()) {
315-
Some(entry) => {
316-
let entry = entry as *mut Entry;
317-
(*entry).ref_count.fetch_add(1, SeqCst);
318-
}
319-
None => (),
320-
}
321-
}
322-
Atom {
323-
unsafe_data: self.unsafe_data,
324-
phantom: PhantomData,
313+
if self.tag() == DYNAMIC_TAG {
314+
let entry = self.unsafe_data.get() as *const Entry;
315+
unsafe { &*entry }.ref_count.fetch_add(1, SeqCst);
325316
}
317+
Atom { ..*self }
326318
}
327319
}
328320

329321
impl<Static> Drop for Atom<Static> {
330322
#[inline]
331323
fn drop(&mut self) {
324+
if self.tag() == DYNAMIC_TAG {
325+
let entry = self.unsafe_data.get() as *const Entry;
326+
if unsafe { &*entry }.ref_count.fetch_sub(1, SeqCst) == 1 {
327+
drop_slow(self)
328+
}
329+
}
330+
332331
// Out of line to guide inlining.
333332
fn drop_slow<Static>(this: &mut Atom<Static>) {
334333
DYNAMIC_SET
335334
.lock()
336335
.unwrap()
337336
.remove(this.unsafe_data.get() as *mut Entry);
338337
}
339-
340-
unsafe {
341-
match from_packed_dynamic(self.unsafe_data.get()) {
342-
Some(entry) => {
343-
let entry = entry as *mut Entry;
344-
if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 {
345-
drop_slow(self);
346-
}
347-
}
348-
_ => (),
349-
}
350-
}
351338
}
352339
}
353340

@@ -357,19 +344,18 @@ impl<Static: StaticAtomSet> ops::Deref for Atom<Static> {
357344
#[inline]
358345
fn deref(&self) -> &str {
359346
unsafe {
360-
match self.unpack() {
361-
Inline(..) => {
362-
let buf = inline_orig_bytes(&self.unsafe_data);
363-
str::from_utf8_unchecked(buf)
364-
}
365-
Static(idx) => Static::get()
366-
.atoms
367-
.get(idx as usize)
368-
.expect("bad static atom"),
369-
Dynamic(entry) => {
370-
let entry = entry as *mut Entry;
347+
match self.tag() {
348+
DYNAMIC_TAG => {
349+
let entry = self.unsafe_data.get() as *const Entry;
371350
&(*entry).string
372351
}
352+
INLINE_TAG => {
353+
let len = (self.unsafe_data() & LEN_MASK) >> LEN_OFFSET;
354+
let src = inline_atom_slice(&self.unsafe_data);
355+
str::from_utf8_unchecked(&src[..(len as usize)])
356+
}
357+
STATIC_TAG => Static::get().atoms[self.static_index() as usize],
358+
_ => debug_unreachable!(),
373359
}
374360
}
375361
}
@@ -386,10 +372,11 @@ impl<Static: StaticAtomSet> fmt::Debug for Atom<Static> {
386372
#[inline]
387373
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
388374
let ty_str = unsafe {
389-
match self.unpack() {
390-
Dynamic(..) => "dynamic",
391-
Inline(..) => "inline",
392-
Static(..) => "static",
375+
match self.tag() {
376+
DYNAMIC_TAG => "dynamic",
377+
INLINE_TAG => "inline",
378+
STATIC_TAG => "static",
379+
_ => debug_unreachable!(),
393380
}
394381
};
395382

@@ -502,20 +489,6 @@ impl<Static: StaticAtomSet> Atom<Static> {
502489
}
503490
}
504491

505-
// Atoms use a compact representation which fits this enum in a single u64.
506-
// Inlining avoids actually constructing the unpacked representation in memory.
507-
#[allow(missing_copy_implementations)]
508-
enum UnpackedAtom {
509-
/// Pointer to a dynamic table entry. Must be 16-byte aligned!
510-
Dynamic(*mut ()),
511-
512-
/// Length + bytes of string.
513-
Inline(u8, [u8; 7]),
514-
515-
/// Index in static interning table.
516-
Static(u32),
517-
}
518-
519492
#[inline(always)]
520493
fn inline_atom_slice(x: &NonZeroU64) -> &[u8] {
521494
unsafe {
@@ -544,84 +517,6 @@ fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] {
544517
}
545518
}
546519

547-
impl UnpackedAtom {
548-
/// Pack a key, fitting it into a u64 with flags and data. See `string_cache_shared` for
549-
/// hints for the layout.
550-
#[inline(always)]
551-
unsafe fn pack<Static: StaticAtomSet>(self) -> Atom<Static> {
552-
match self {
553-
Static(n) => Atom::pack_static(n),
554-
Dynamic(p) => {
555-
let data = p as u64;
556-
debug_assert!(0 == data & TAG_MASK);
557-
Atom {
558-
// Callers are responsible for calling this with a valid, non-null pointer
559-
unsafe_data: NonZeroU64::new_unchecked(data),
560-
phantom: PhantomData,
561-
}
562-
}
563-
Inline(len, buf) => {
564-
debug_assert!((len as usize) <= MAX_INLINE_LEN);
565-
let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4);
566-
{
567-
let dest = inline_atom_slice_mut(&mut data);
568-
dest.copy_from_slice(&buf)
569-
}
570-
Atom {
571-
// INLINE_TAG ensures this is never zero
572-
unsafe_data: NonZeroU64::new_unchecked(data),
573-
phantom: PhantomData,
574-
}
575-
}
576-
}
577-
}
578-
579-
/// Unpack a key, extracting information from a single u64 into useable structs.
580-
#[inline(always)]
581-
unsafe fn from_packed(data: NonZeroU64) -> UnpackedAtom {
582-
debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged
583-
584-
match (data.get() & TAG_MASK) as u8 {
585-
DYNAMIC_TAG => Dynamic(data.get() as *mut ()),
586-
STATIC_TAG => Static((data.get() >> STATIC_SHIFT_BITS) as u32),
587-
INLINE_TAG => {
588-
let len = ((data.get() & 0xf0) >> 4) as usize;
589-
debug_assert!(len <= MAX_INLINE_LEN);
590-
let mut buf: [u8; 7] = [0; 7];
591-
let src = inline_atom_slice(&data);
592-
buf.copy_from_slice(src);
593-
Inline(len as u8, buf)
594-
}
595-
_ => debug_unreachable!(),
596-
}
597-
}
598-
}
599-
600-
/// Used for a fast path in Clone and Drop.
601-
#[inline(always)]
602-
unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> {
603-
if (DYNAMIC_TAG as u64) == (data & TAG_MASK) {
604-
Some(data as *mut ())
605-
} else {
606-
None
607-
}
608-
}
609-
610-
/// For as_slice on inline atoms, we need a pointer into the original
611-
/// string contents.
612-
///
613-
/// It's undefined behavior to call this on a non-inline atom!!
614-
#[inline(always)]
615-
unsafe fn inline_orig_bytes<'a>(data: &'a NonZeroU64) -> &'a [u8] {
616-
match UnpackedAtom::from_packed(*data) {
617-
Inline(len, _) => {
618-
let src = inline_atom_slice(&data);
619-
&src[..(len as usize)]
620-
}
621-
_ => debug_unreachable!(),
622-
}
623-
}
624-
625520
// Some minor tests of internal layout here. See ../integration-tests for much
626521
// more.
627522
#[test]

0 commit comments

Comments
 (0)