1
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2
2
3
- # # hashing a single value ##
3
+ const HASH_SEED = UInt == UInt64 ? 0xbdd89aa982704029 : 0xeabe9406
4
+ const HASH_SECRET = tuple (
5
+ 0x2d358dccaa6c78a5 ,
6
+ 0x8bb84b93962eacc9 ,
7
+ 0x4b33a62ed433d4a3 ,
8
+ )
4
9
5
10
"""
6
11
hash(x[, h::UInt])::UInt
@@ -17,75 +22,52 @@ The hash value may change when a new Julia process is started.
17
22
18
23
```jldoctest; filter = r"0x[0-9a-f]{16}"
19
24
julia> a = hash(10)
20
- 0x95ea2955abd45275
25
+ 0x759d18cc5346a65f
21
26
22
27
julia> hash(10, a) # only use the output of another hash function as the second argument
23
- 0xd42bad54a8575b16
28
+ 0x03158cd61b1b0bd1
24
29
```
25
30
26
31
See also: [`objectid`](@ref), [`Dict`](@ref), [`Set`](@ref).
27
32
"""
28
- hash (x :: Any ) = hash (x, zero (UInt) )
33
+ hash (data :: Any ) = hash (data, HASH_SEED )
29
34
hash (w:: WeakRef , h:: UInt ) = hash (w. value, h)
30
35
31
36
# Types can't be deleted, so marking as total allows the compiler to look up the hash
32
- hash (T:: Type , h:: UInt ) = hash_uint (3 h - @assume_effects :total ccall (:jl_type_hash , UInt, (Any,), T))
37
+ hash (T:: Type , h:: UInt ) =
38
+ hash ((@assume_effects :total ccall (:jl_type_hash , UInt, (Any,), T)), h)
39
+ hash (@nospecialize (data), h:: UInt ) = hash (objectid (data), h)
33
40
34
- # # hashing general objects ##
35
-
36
- hash (@nospecialize (x), h:: UInt ) = hash_uint (3 h - objectid (x))
37
-
38
- hash (x:: Symbol ) = objectid (x)
39
-
40
- # # core data hashing functions ##
41
-
42
- function hash_64_64 (n:: UInt64 )
43
- a:: UInt64 = n
44
- a = ~ a + a << 21
45
- a = a ⊻ a >> 24
46
- a = a + a << 3 + a << 8
47
- a = a ⊻ a >> 14
48
- a = a + a << 2 + a << 4
49
- a = a ⊻ a >> 28
50
- a = a + a << 31
51
- return a
41
+ function mul_parts (a:: UInt64 , b:: UInt64 )
42
+ p = widemul (a, b)
43
+ return (p >> 64 ) % UInt64, p % UInt64
52
44
end
53
-
54
- function hash_64_32 (n:: UInt64 )
55
- a:: UInt64 = n
56
- a = ~ a + a << 18
57
- a = a ⊻ a >> 31
58
- a = a * 21
59
- a = a ⊻ a >> 11
60
- a = a + a << 6
61
- a = a ⊻ a >> 22
62
- return a % UInt32
45
+ hash_mix (a:: UInt64 , b:: UInt64 ) = ⊻ (mul_parts (a, b)... )
46
+
47
+ # faster-but-weaker than hash_mix intended for small keys
48
+ hash_mix_linear (x:: UInt64 , h:: UInt ) = 3 h - x
49
+ function hash_finalizer (x:: UInt64 )
50
+ x ⊻= (x >> 32 )
51
+ x *= 0x63652a4cd374b267
52
+ x ⊻= (x >> 33 )
53
+ return x
63
54
end
64
55
65
- function hash_32_32 (n:: UInt32 )
66
- a:: UInt32 = n
67
- a = a + 0x7ed55d16 + a << 12
68
- a = a ⊻ 0xc761c23c ⊻ a >> 19
69
- a = a + 0x165667b1 + a << 5
70
- a = a + 0xd3a2646c ⊻ a << 9
71
- a = a + 0xfd7046c5 + a << 3
72
- a = a ⊻ 0xb55a4f09 ⊻ a >> 16
73
- return a
74
- end
56
+ hash_64_64 (data:: UInt64 ) = hash_finalizer (data)
57
+ hash_64_32 (data:: UInt64 ) = hash_64_64 (data) % UInt32
58
+ hash_32_32 (data:: UInt32 ) = hash_64_32 (UInt64 (data))
75
59
76
60
if UInt === UInt64
77
- hash_uint64 (x :: UInt64 ) = hash_64_64 (x)
78
- hash_uint (x :: UInt ) = hash_64_64 (x)
61
+ const hash_uint64 = hash_64_64
62
+ const hash_uint = hash_64_64
79
63
else
80
- hash_uint64 (x :: UInt64 ) = hash_64_32 (x)
81
- hash_uint (x :: UInt ) = hash_32_32 (x)
64
+ const hash_uint64 = hash_64_32
65
+ const hash_uint = hash_32_32
82
66
end
83
67
84
- # # efficient value-based hashing of integers ##
85
-
86
- hash (x:: Int64 , h:: UInt ) = hash_uint64 (bitcast (UInt64, x)) - 3 h
87
- hash (x:: UInt64 , h:: UInt ) = hash_uint64 (x) - 3 h
88
- hash (x:: Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32} , h:: UInt ) = hash (Int64 (x), h)
68
+ hash (x:: UInt64 , h:: UInt ) = hash_uint64 (hash_mix_linear (x, h))
69
+ hash (x:: Int64 , h:: UInt ) = hash (bitcast (UInt64, x), h)
70
+ hash (x:: Union{Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32} , h:: UInt ) = hash (Int64 (x), h)
89
71
90
72
function hash_integer (n:: Integer , h:: UInt )
91
73
h ⊻= hash_uint ((n % UInt) ⊻ h)
100
82
101
83
# # efficient value-based hashing of floats ##
102
84
103
- const hx_NaN = hash_uint64 (reinterpret (UInt64, NaN ))
85
+ const hx_NaN = hash (reinterpret (UInt64, NaN ))
104
86
function hash (x:: Float64 , h:: UInt )
105
87
# see comments on trunc and hash(Real, UInt)
106
88
if typemin (Int64) <= x < typemax (Int64)
@@ -116,7 +98,7 @@ function hash(x::Float64, h::UInt)
116
98
elseif isnan (x)
117
99
return hx_NaN ⊻ h # NaN does not have a stable bit pattern
118
100
end
119
- return hash_uint64 (bitcast (UInt64, x)) - 3 h
101
+ return hash (bitcast (UInt64, x), h)
120
102
end
121
103
122
104
hash (x:: Float32 , h:: UInt ) = hash (Float64 (x), h)
@@ -131,7 +113,7 @@ function hash(x::Float16, h::UInt)
131
113
elseif isnan (x)
132
114
return hx_NaN ⊻ h # NaN does not have a stable bit pattern
133
115
end
134
- return hash_uint64 (bitcast (UInt64, Float64 (x))) - 3 h
116
+ return hash (bitcast (UInt64, Float64 (x)), h)
135
117
end
136
118
137
119
# # generic hashing for rational values ##
@@ -180,21 +162,100 @@ end
180
162
181
163
182
164
# # symbol & expression hashing ##
183
-
184
165
if UInt === UInt64
185
- hash (x:: Expr , h:: UInt ) = hash (x. args, hash (x. head, h + 0x83c7900696d26dc6 ))
186
- hash (x:: QuoteNode , h:: UInt ) = hash (x. value, h + 0x2c97bf8b3de87020 )
166
+ hash (x:: Expr , h:: UInt ) = hash (x. args, hash (x. head, h ⊻ 0x83c7900696d26dc6 ))
167
+ hash (x:: QuoteNode , h:: UInt ) = hash (x. value, h ⊻ 0x2c97bf8b3de87020 )
187
168
else
188
- hash (x:: Expr , h:: UInt ) = hash (x. args, hash (x. head, h + 0x96d26dc6 ))
189
- hash (x:: QuoteNode , h:: UInt ) = hash (x. value, h + 0x469d72af )
169
+ hash (x:: Expr , h:: UInt ) = hash (x. args, hash (x. head, h ⊻ 0x469d72af ))
170
+ hash (x:: QuoteNode , h:: UInt ) = hash (x. value, h ⊻ 0x469d72af )
190
171
end
191
172
192
- # # hashing strings ##
173
+ hash (x :: Symbol ) = objectid (x)
193
174
194
- const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
195
- const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81
196
175
197
- @assume_effects :total function hash (s:: String , h:: UInt )
198
- h += memhash_seed
199
- ccall (memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), s, sizeof (s), h % UInt32) + h
176
+ load_le (:: Type{T} , ptr:: Ptr{UInt8} , i) where {T <: Union{UInt32, UInt64} } =
177
+ unsafe_load (convert (Ptr{T}, ptr + i - 1 ))
178
+
179
+ function read_small (ptr:: Ptr{UInt8} , n:: Int )
180
+ return (UInt64 (unsafe_load (ptr)) << 56 ) |
181
+ (UInt64 (unsafe_load (ptr, div (n, 2 ) + 1 )) << 32 ) |
182
+ UInt64 (unsafe_load (ptr, n))
200
183
end
184
+
185
+ @assume_effects :terminates_globally function hash_bytes (
186
+ ptr:: Ptr{UInt8} ,
187
+ n:: Int ,
188
+ seed:: UInt64 ,
189
+ secret:: NTuple{3, UInt64}
190
+ )
191
+ # Adapted with gratitude from [rapidhash](https://github.com/Nicoshev/rapidhash)
192
+ buflen = UInt64 (n)
193
+ seed = seed ⊻ (hash_mix (seed ⊻ secret[1 ], secret[2 ]) ⊻ buflen)
194
+
195
+ a = zero (UInt64)
196
+ b = zero (UInt64)
197
+
198
+ if buflen ≤ 16
199
+ if buflen ≥ 4
200
+ a = (UInt64 (load_le (UInt32, ptr, 1 )) << 32 ) |
201
+ UInt64 (load_le (UInt32, ptr, n - 3 ))
202
+
203
+ delta = (buflen & 24 ) >>> (buflen >>> 3 )
204
+ b = (UInt64 (load_le (UInt32, ptr, delta + 1 )) << 32 ) |
205
+ UInt64 (load_le (UInt32, ptr, n - 3 - delta))
206
+ elseif buflen > 0
207
+ a = read_small (ptr, n)
208
+ end
209
+ else
210
+ pos = 1
211
+ i = buflen
212
+ while i ≥ 48
213
+ see1 = seed
214
+ see2 = seed
215
+ while i ≥ 48
216
+ seed = hash_mix (
217
+ load_le (UInt64, ptr, pos) ⊻ secret[1 ],
218
+ load_le (UInt64, ptr, pos + 8 ) ⊻ seed
219
+ )
220
+ see1 = hash_mix (
221
+ load_le (UInt64, ptr, pos + 16 ) ⊻ secret[2 ],
222
+ load_le (UInt64, ptr, pos + 24 ) ⊻ see1
223
+ )
224
+ see2 = hash_mix (
225
+ load_le (UInt64, ptr, pos + 32 ) ⊻ secret[3 ],
226
+ load_le (UInt64, ptr, pos + 40 ) ⊻ see2
227
+ )
228
+ pos += 48
229
+ i -= 48
230
+ end
231
+ seed = seed ⊻ see1 ⊻ see2
232
+ end
233
+ if i > 16
234
+ seed = hash_mix (
235
+ load_le (UInt64, ptr, pos) ⊻ secret[3 ],
236
+ load_le (UInt64, ptr, pos + 8 ) ⊻ seed ⊻ secret[2 ]
237
+ )
238
+ if i > 32
239
+ seed = hash_mix (
240
+ load_le (UInt64, ptr, pos + 16 ) ⊻ secret[3 ],
241
+ load_le (UInt64, ptr, pos + 24 ) ⊻ seed
242
+ )
243
+ end
244
+ end
245
+
246
+ a = load_le (UInt64, ptr, n - 15 )
247
+ b = load_le (UInt64, ptr, n - 7 )
248
+ end
249
+
250
+ a = a ⊻ secret[2 ]
251
+ b = b ⊻ seed
252
+ b, a = mul_parts (a, b)
253
+ return hash_mix (a ⊻ secret[1 ] ⊻ buflen, b ⊻ secret[2 ])
254
+ end
255
+
256
+ @assume_effects :total hash (data:: String , h:: UInt ) =
257
+ GC. @preserve data hash_bytes (pointer (data), sizeof (data), UInt64 (h), HASH_SECRET) % UInt
258
+
259
+ # no longer used in Base, but a lot of packages access these internals
260
+ const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
261
+ const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81
0 commit comments