@@ -9,7 +9,6 @@ use std::{
99 ptr:: { self , NonNull } ,
1010} ;
1111
12- use rustc_hash:: FxHasher ;
1312use triomphe:: ThinArc ;
1413
1514use crate :: {
@@ -204,10 +203,143 @@ impl Storage for &'_ mut AtomStore {
204203}
205204
206205#[ inline( never) ]
207- fn calc_hash ( text : & str ) -> u64 {
208- let mut hasher = FxHasher :: default ( ) ;
209- text. hash ( & mut hasher) ;
210- hasher. finish ( )
206+ pub ( crate ) const fn calc_hash ( text : & str ) -> u64 {
207+ hash_bytes ( text. as_bytes ( ) )
208+ }
209+
210+ // Nothing special, digits of pi.
211+ const SEED1 : u64 = 0x243f6a8885a308d3 ;
212+ const SEED2 : u64 = 0x13198a2e03707344 ;
213+ const PREVENT_TRIVIAL_ZERO_COLLAPSE : u64 = 0xa4093822299f31d0 ;
214+
215+ #[ inline]
216+ const fn multiply_mix ( x : u64 , y : u64 ) -> u64 {
217+ #[ cfg( target_pointer_width = "64" ) ]
218+ {
219+ // We compute the full u64 x u64 -> u128 product, this is a single mul
220+ // instruction on x86-64, one mul plus one mulhi on ARM64.
221+ let full = ( x as u128 ) * ( y as u128 ) ;
222+ let lo = full as u64 ;
223+ let hi = ( full >> 64 ) as u64 ;
224+
225+ // The middle bits of the full product fluctuate the most with small
226+ // changes in the input. This is the top bits of lo and the bottom bits
227+ // of hi. We can thus make the entire output fluctuate with small
228+ // changes to the input by XOR'ing these two halves.
229+ lo ^ hi
230+
231+ // Unfortunately both 2^64 + 1 and 2^64 - 1 have small prime factors,
232+ // otherwise combining with + or - could result in a really strong hash,
233+ // as: x * y = 2^64 * hi + lo = (-1) * hi + lo = lo - hi,
234+ // (mod 2^64 + 1) x * y = 2^64 * hi + lo = 1 * hi + lo =
235+ // lo + hi, (mod 2^64 - 1) Multiplicative hashing is universal
236+ // in a field (like mod p).
237+ }
238+
239+ #[ cfg( target_pointer_width = "32" ) ]
240+ {
241+ // u64 x u64 -> u128 product is prohibitively expensive on 32-bit.
242+ // Decompose into 32-bit parts.
243+ let lx = x as u32 ;
244+ let ly = y as u32 ;
245+ let hx = ( x >> 32 ) as u32 ;
246+ let hy = ( y >> 32 ) as u32 ;
247+
248+ // u32 x u32 -> u64 the low bits of one with the high bits of the other.
249+ let afull = ( lx as u64 ) * ( hy as u64 ) ;
250+ let bfull = ( hx as u64 ) * ( ly as u64 ) ;
251+
252+ // Combine, swapping low/high of one of them so the upper bits of the
253+ // product of one combine with the lower bits of the other.
254+ afull ^ bfull. rotate_right ( 32 )
255+ }
256+ }
257+
258+ // Const compatible helper function to read a u64 from a byte array at a given
259+ // offset
260+ const fn read_u64_le ( bytes : & [ u8 ] , offset : usize ) -> u64 {
261+ ( bytes[ offset] as u64 )
262+ | ( ( bytes[ offset + 1 ] as u64 ) << 8 )
263+ | ( ( bytes[ offset + 2 ] as u64 ) << 16 )
264+ | ( ( bytes[ offset + 3 ] as u64 ) << 24 )
265+ | ( ( bytes[ offset + 4 ] as u64 ) << 32 )
266+ | ( ( bytes[ offset + 5 ] as u64 ) << 40 )
267+ | ( ( bytes[ offset + 6 ] as u64 ) << 48 )
268+ | ( ( bytes[ offset + 7 ] as u64 ) << 56 )
269+ }
270+
271+ // Const compatible helper function to read a u32 from a byte array at a given
272+ // offset
273+ const fn read_u32_le ( bytes : & [ u8 ] , offset : usize ) -> u32 {
274+ ( bytes[ offset] as u32 )
275+ | ( ( bytes[ offset + 1 ] as u32 ) << 8 )
276+ | ( ( bytes[ offset + 2 ] as u32 ) << 16 )
277+ | ( ( bytes[ offset + 3 ] as u32 ) << 24 )
278+ }
279+
280+ /// Copied from `hash_bytes` of `rustc-hash`.
281+ ///
282+ /// See: https://github.com/rust-lang/rustc-hash/blob/dc5c33f1283de2da64d8d7a06401d91aded03ad4/src/lib.rs#L252-L297
283+ ///
284+ /// ---
285+ ///
286+ /// A wyhash-inspired non-collision-resistant hash for strings/slices designed
287+ /// by Orson Peters, with a focus on small strings and small codesize.
288+ ///
289+ /// The 64-bit version of this hash passes the SMHasher3 test suite on the full
290+ /// 64-bit output, that is, f(hash_bytes(b) ^ f(seed)) for some good avalanching
291+ /// permutation f() passed all tests with zero failures. When using the 32-bit
292+ /// version of multiply_mix this hash has a few non-catastrophic failures where
293+ /// there are a handful more collisions than an optimal hash would give.
294+ ///
295+ /// We don't bother avalanching here as we'll feed this hash into a
296+ /// multiplication after which we take the high bits, which avalanches for us.
297+ #[ inline]
298+ #[ doc( hidden) ]
299+ const fn hash_bytes ( bytes : & [ u8 ] ) -> u64 {
300+ let len = bytes. len ( ) ;
301+ let mut s0 = SEED1 ;
302+ let mut s1 = SEED2 ;
303+
304+ if len <= 16 {
305+ // XOR the input into s0, s1.
306+ if len >= 8 {
307+ s0 ^= read_u64_le ( bytes, 0 ) ;
308+ s1 ^= read_u64_le ( bytes, len - 8 ) ;
309+ } else if len >= 4 {
310+ s0 ^= read_u32_le ( bytes, 0 ) as u64 ;
311+ s1 ^= read_u32_le ( bytes, len - 4 ) as u64 ;
312+ } else if len > 0 {
313+ let lo = bytes[ 0 ] ;
314+ let mid = bytes[ len / 2 ] ;
315+ let hi = bytes[ len - 1 ] ;
316+ s0 ^= lo as u64 ;
317+ s1 ^= ( ( hi as u64 ) << 8 ) | mid as u64 ;
318+ }
319+ } else {
320+ // Handle bulk (can partially overlap with suffix).
321+ let mut off = 0 ;
322+ while off < len - 16 {
323+ let x = read_u64_le ( bytes, off) ;
324+ let y = read_u64_le ( bytes, off + 8 ) ;
325+
326+ // Replace s1 with a mix of s0, x, and y, and s0 with s1.
327+ // This ensures the compiler can unroll this loop into two
328+ // independent streams, one operating on s0, the other on s1.
329+ //
330+ // Since zeroes are a common input we prevent an immediate trivial
331+ // collapse of the hash function by XOR'ing a constant with y.
332+ let t = multiply_mix ( s0 ^ x, PREVENT_TRIVIAL_ZERO_COLLAPSE ^ y) ;
333+ s0 = s1;
334+ s1 = t;
335+ off += 16 ;
336+ }
337+
338+ s0 ^= read_u64_le ( bytes, len - 16 ) ;
339+ s1 ^= read_u64_le ( bytes, len - 8 ) ;
340+ }
341+
342+ multiply_mix ( s0, s1) ^ ( len as u64 )
211343}
212344
213345type BuildEntryHasher = BuildHasherDefault < EntryHasher > ;
@@ -253,7 +385,11 @@ impl Hasher for EntryHasher {
253385
254386#[ cfg( test) ]
255387mod tests {
256- use crate :: { dynamic:: GLOBAL_DATA , global_atom_store_gc, Atom } ;
388+ use std:: hash:: { Hash , Hasher } ;
389+
390+ use rustc_hash:: FxHasher ;
391+
392+ use crate :: { atom, dynamic:: GLOBAL_DATA , global_atom_store_gc, Atom } ;
257393
258394 fn expect_size ( expected : usize ) {
259395 // This is a helper function to count the number of bytes in the global store.
@@ -327,4 +463,50 @@ mod tests {
327463 global_atom_store_gc ( ) ;
328464 expect_size ( 0 ) ;
329465 }
466+
467+ // Ensure that the hash value is the same as the one generated by FxHasher.
468+ //
469+ // This is important for `Borrow<str>` implementation to be correct.
470+ // Note that if we enable `nightly` feature of `rustc-hash`, we need to remove
471+ // `state.write_u8(0xff);` from the hash implementation of `RcStr`.
472+ #[ test]
473+ fn test_hash ( ) {
474+ const LONG_STRING : & str = "A very long long long string that would not be inlined" ;
475+
476+ {
477+ let u64_value = super :: hash_bytes ( LONG_STRING . as_bytes ( ) ) ;
478+ dbg ! ( u64_value) ;
479+ let mut hasher = FxHasher :: default ( ) ;
480+ hasher. write_u64 ( u64_value) ;
481+ let expected = hasher. finish ( ) ;
482+
483+ println ! ( "Expected: {expected:?}" ) ;
484+ }
485+
486+ let str = Atom :: from ( LONG_STRING ) ;
487+ assert_eq ! ( fxhash( str . clone( ) ) , fxhash( LONG_STRING ) ) ;
488+ assert_eq ! ( fxhash( str . clone( ) ) , fxhash( atom!( LONG_STRING ) ) ) ;
489+ assert_eq ! ( fxhash( ( 1 , str , 1 ) ) , fxhash( ( 1 , LONG_STRING , 1 ) ) ) ;
490+ }
491+
492+ fn fxhash < T : Hash > ( value : T ) -> u64 {
493+ let mut hasher = FxHasher :: default ( ) ;
494+ value. hash ( & mut hasher) ;
495+ hasher. finish ( )
496+ }
497+
498+ #[ test]
499+ fn static_items_are_not_in_the_store ( ) {
500+ const VALUE : & str = "hello a long string that cannot be inline" ;
501+ expect_size ( 0 ) ;
502+ let long_str = atom ! ( VALUE ) ;
503+ expect_size ( 0 ) ;
504+ let store_str = Atom :: new ( VALUE ) ;
505+ expect_size ( 1 ) ;
506+ drop ( store_str) ;
507+ expect_size ( 1 ) ;
508+ global_atom_store_gc ( ) ;
509+ drop ( long_str) ;
510+ expect_size ( 0 ) ;
511+ }
330512}
0 commit comments