11# This file is a part of Julia. License is MIT: https://julialang.org/license
22
3- # # hashing a single value ##
3+ const HASH_SEED = UInt == UInt64 ? 0xbdd89aa982704029 : 0xeabe9406
4+ const HASH_SECRET = tuple (
5+ 0x2d358dccaa6c78a5 ,
6+ 0x8bb84b93962eacc9 ,
7+ 0x4b33a62ed433d4a3 ,
8+ )
49
510"""
611 hash(x[, h::UInt])::UInt
@@ -17,75 +22,52 @@ The hash value may change when a new Julia process is started.
1722
1823```jldoctest; filter = r"0x[0-9a-f]{16}"
1924julia> a = hash(10)
20- 0x95ea2955abd45275
25+ 0x759d18cc5346a65f
2126
2227julia> hash(10, a) # only use the output of another hash function as the second argument
23- 0xd42bad54a8575b16
28+ 0x03158cd61b1b0bd1
2429```
2530
2631See also: [`objectid`](@ref), [`Dict`](@ref), [`Set`](@ref).
2732"""
28- hash (x :: Any ) = hash (x, zero (UInt) )
33+ hash (data :: Any ) = hash (data, HASH_SEED )
2934hash (w:: WeakRef , h:: UInt ) = hash (w. value, h)
3035
3136# Types can't be deleted, so marking as total allows the compiler to look up the hash
32- hash (T:: Type , h:: UInt ) = hash_uint (3 h - @assume_effects :total ccall (:jl_type_hash , UInt, (Any,), T))
37+ hash (T:: Type , h:: UInt ) =
38+ hash ((@assume_effects :total ccall (:jl_type_hash , UInt, (Any,), T)), h)
39+ hash (@nospecialize (data), h:: UInt ) = hash (objectid (data), h)
3340
34- # # hashing general objects ##
35-
36- hash (@nospecialize (x), h:: UInt ) = hash_uint (3 h - objectid (x))
37-
38- hash (x:: Symbol ) = objectid (x)
39-
40- # # core data hashing functions ##
41-
42- function hash_64_64 (n:: UInt64 )
43- a:: UInt64 = n
44- a = ~ a + a << 21
45- a = a ⊻ a >> 24
46- a = a + a << 3 + a << 8
47- a = a ⊻ a >> 14
48- a = a + a << 2 + a << 4
49- a = a ⊻ a >> 28
50- a = a + a << 31
51- return a
41+ function mul_parts (a:: UInt64 , b:: UInt64 )
42+ p = widemul (a, b)
43+ return (p >> 64 ) % UInt64, p % UInt64
5244end
53-
54- function hash_64_32 (n:: UInt64 )
55- a:: UInt64 = n
56- a = ~ a + a << 18
57- a = a ⊻ a >> 31
58- a = a * 21
59- a = a ⊻ a >> 11
60- a = a + a << 6
61- a = a ⊻ a >> 22
62- return a % UInt32
45+ hash_mix (a:: UInt64 , b:: UInt64 ) = ⊻ (mul_parts (a, b)... )
46+
47+ # faster-but-weaker than hash_mix intended for small keys
48+ hash_mix_linear (x:: UInt64 , h:: UInt ) = 3 h - x
49+ function hash_finalizer (x:: UInt64 )
50+ x ⊻= (x >> 32 )
51+ x *= 0x63652a4cd374b267
52+ x ⊻= (x >> 33 )
53+ return x
6354end
6455
65- function hash_32_32 (n:: UInt32 )
66- a:: UInt32 = n
67- a = a + 0x7ed55d16 + a << 12
68- a = a ⊻ 0xc761c23c ⊻ a >> 19
69- a = a + 0x165667b1 + a << 5
70- a = a + 0xd3a2646c ⊻ a << 9
71- a = a + 0xfd7046c5 + a << 3
72- a = a ⊻ 0xb55a4f09 ⊻ a >> 16
73- return a
74- end
56+ hash_64_64 (data:: UInt64 ) = hash_finalizer (data)
57+ hash_64_32 (data:: UInt64 ) = hash_64_64 (data) % UInt32
58+ hash_32_32 (data:: UInt32 ) = hash_64_32 (UInt64 (data))
7559
7660if UInt === UInt64
77- hash_uint64 (x :: UInt64 ) = hash_64_64 (x)
78- hash_uint (x :: UInt ) = hash_64_64 (x)
61+ const hash_uint64 = hash_64_64
62+ const hash_uint = hash_64_64
7963else
80- hash_uint64 (x :: UInt64 ) = hash_64_32 (x)
81- hash_uint (x :: UInt ) = hash_32_32 (x)
64+ const hash_uint64 = hash_64_32
65+ const hash_uint = hash_32_32
8266end
8367
84- # # efficient value-based hashing of integers ##
85-
86- hash (x:: Int64 , h:: UInt ) = hash_uint64 (bitcast (UInt64, x)) - 3 h
87- hash (x:: UInt64 , h:: UInt ) = hash_uint64 (x) - 3 h
88- hash (x:: Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32} , h:: UInt ) = hash (Int64 (x), h)
68+ hash (x:: UInt64 , h:: UInt ) = hash_uint64 (hash_mix_linear (x, h))
69+ hash (x:: Int64 , h:: UInt ) = hash (bitcast (UInt64, x), h)
70+ hash (x:: Union{Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32} , h:: UInt ) = hash (Int64 (x), h)
8971
9072function hash_integer (n:: Integer , h:: UInt )
9173 h ⊻= hash_uint ((n % UInt) ⊻ h)
10082
10183# # efficient value-based hashing of floats ##
10284
103- const hx_NaN = hash_uint64 (reinterpret (UInt64, NaN ))
85+ const hx_NaN = hash (reinterpret (UInt64, NaN ))
10486function hash (x:: Float64 , h:: UInt )
10587 # see comments on trunc and hash(Real, UInt)
10688 if typemin (Int64) <= x < typemax (Int64)
@@ -116,7 +98,7 @@ function hash(x::Float64, h::UInt)
11698 elseif isnan (x)
11799 return hx_NaN ⊻ h # NaN does not have a stable bit pattern
118100 end
119- return hash_uint64 (bitcast (UInt64, x)) - 3 h
101+ return hash (bitcast (UInt64, x), h)
120102end
121103
122104hash (x:: Float32 , h:: UInt ) = hash (Float64 (x), h)
@@ -131,7 +113,7 @@ function hash(x::Float16, h::UInt)
131113 elseif isnan (x)
132114 return hx_NaN ⊻ h # NaN does not have a stable bit pattern
133115 end
134- return hash_uint64 (bitcast (UInt64, Float64 (x))) - 3 h
116+ return hash (bitcast (UInt64, Float64 (x)), h)
135117end
136118
137119# # generic hashing for rational values ##
@@ -180,21 +162,100 @@ end
180162
181163
182164# # symbol & expression hashing ##
183-
184165if UInt === UInt64
185- hash (x:: Expr , h:: UInt ) = hash (x. args, hash (x. head, h + 0x83c7900696d26dc6 ))
186- hash (x:: QuoteNode , h:: UInt ) = hash (x. value, h + 0x2c97bf8b3de87020 )
166+ hash (x:: Expr , h:: UInt ) = hash (x. args, hash (x. head, h ⊻ 0x83c7900696d26dc6 ))
167+ hash (x:: QuoteNode , h:: UInt ) = hash (x. value, h ⊻ 0x2c97bf8b3de87020 )
187168else
188- hash (x:: Expr , h:: UInt ) = hash (x. args, hash (x. head, h + 0x96d26dc6 ))
189- hash (x:: QuoteNode , h:: UInt ) = hash (x. value, h + 0x469d72af )
169+ hash (x:: Expr , h:: UInt ) = hash (x. args, hash (x. head, h ⊻ 0x469d72af ))
170+ hash (x:: QuoteNode , h:: UInt ) = hash (x. value, h ⊻ 0x469d72af )
190171end
191172
192- # # hashing strings ##
173+ hash (x :: Symbol ) = objectid (x)
193174
194- const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
195- const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81
196175
197- @assume_effects :total function hash (s:: String , h:: UInt )
198- h += memhash_seed
199- ccall (memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), s, sizeof (s), h % UInt32) + h
176+ load_le (:: Type{T} , ptr:: Ptr{UInt8} , i) where {T <: Union{UInt32, UInt64} } =
177+ unsafe_load (convert (Ptr{T}, ptr + i - 1 ))
178+
179+ function read_small (ptr:: Ptr{UInt8} , n:: Int )
180+ return (UInt64 (unsafe_load (ptr)) << 56 ) |
181+ (UInt64 (unsafe_load (ptr, div (n, 2 ) + 1 )) << 32 ) |
182+ UInt64 (unsafe_load (ptr, n))
200183end
184+
185+ @assume_effects :terminates_globally function hash_bytes (
186+ ptr:: Ptr{UInt8} ,
187+ n:: Int ,
188+ seed:: UInt64 ,
189+ secret:: NTuple{3, UInt64}
190+ )
191+ # Adapted with gratitude from [rapidhash](https://github.com/Nicoshev/rapidhash)
192+ buflen = UInt64 (n)
193+ seed = seed ⊻ (hash_mix (seed ⊻ secret[1 ], secret[2 ]) ⊻ buflen)
194+
195+ a = zero (UInt64)
196+ b = zero (UInt64)
197+
198+ if buflen ≤ 16
199+ if buflen ≥ 4
200+ a = (UInt64 (load_le (UInt32, ptr, 1 )) << 32 ) |
201+ UInt64 (load_le (UInt32, ptr, n - 3 ))
202+
203+ delta = (buflen & 24 ) >>> (buflen >>> 3 )
204+ b = (UInt64 (load_le (UInt32, ptr, delta + 1 )) << 32 ) |
205+ UInt64 (load_le (UInt32, ptr, n - 3 - delta))
206+ elseif buflen > 0
207+ a = read_small (ptr, n)
208+ end
209+ else
210+ pos = 1
211+ i = buflen
212+ while i ≥ 48
213+ see1 = seed
214+ see2 = seed
215+ while i ≥ 48
216+ seed = hash_mix (
217+ load_le (UInt64, ptr, pos) ⊻ secret[1 ],
218+ load_le (UInt64, ptr, pos + 8 ) ⊻ seed
219+ )
220+ see1 = hash_mix (
221+ load_le (UInt64, ptr, pos + 16 ) ⊻ secret[2 ],
222+ load_le (UInt64, ptr, pos + 24 ) ⊻ see1
223+ )
224+ see2 = hash_mix (
225+ load_le (UInt64, ptr, pos + 32 ) ⊻ secret[3 ],
226+ load_le (UInt64, ptr, pos + 40 ) ⊻ see2
227+ )
228+ pos += 48
229+ i -= 48
230+ end
231+ seed = seed ⊻ see1 ⊻ see2
232+ end
233+ if i > 16
234+ seed = hash_mix (
235+ load_le (UInt64, ptr, pos) ⊻ secret[3 ],
236+ load_le (UInt64, ptr, pos + 8 ) ⊻ seed ⊻ secret[2 ]
237+ )
238+ if i > 32
239+ seed = hash_mix (
240+ load_le (UInt64, ptr, pos + 16 ) ⊻ secret[3 ],
241+ load_le (UInt64, ptr, pos + 24 ) ⊻ seed
242+ )
243+ end
244+ end
245+
246+ a = load_le (UInt64, ptr, n - 15 )
247+ b = load_le (UInt64, ptr, n - 7 )
248+ end
249+
250+ a = a ⊻ secret[2 ]
251+ b = b ⊻ seed
252+ b, a = mul_parts (a, b)
253+ return hash_mix (a ⊻ secret[1 ] ⊻ buflen, b ⊻ secret[2 ])
254+ end
255+
256+ @assume_effects :total hash (data:: String , h:: UInt ) =
257+ GC. @preserve data hash_bytes (pointer (data), sizeof (data), UInt64 (h), HASH_SECRET) % UInt
258+
259+ # no longer used in Base, but a lot of packages access these internals
260+ const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
261+ const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81
0 commit comments