-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmicro-hash.h
More file actions
373 lines (316 loc) · 9.14 KB
/
micro-hash.h
File metadata and controls
373 lines (316 loc) · 9.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
//////////////////////////////////////////////////////////////////////
// SPDX-License-Identifier: MIT
//
// micro-hash.h
// ============
//
// Quick and dirty hash functions in C99, with some benchmarks. Lots
// of functions for int32, int64 and bytes keys!
//
// THESE FUNCTIONS ARE NOT CRYPTOGRAPHICALLY SECURE. IF YOU NEED A
// CRYPTOGRAPHICALLY SECURE HASH FUNCTION THEN GO USE A KNOWN
// IMPLEMENTATION OF AN APPROVED HASH FUNCTION FROM NIST
//
// Author: Giovanni Santini
// Mail: giovanni.santini@proton.me
// License: MIT
//
//
// Documentation
// -------------
//
// This header-only library is a collection of fast hash functions
// gathered from all around the interweb. If you are looking for a
// quick hash function for your needs, be my guest.
//
// Here you will find:
// - micro_hash_int32_wang
// - micro_hash_int32_wang2
// - micro_hash_int32_rob
// - micro_hash_int64_wang
// - micro_hash_int6432_wang
// - micro_hash_bytes_curl
// - micro_hash_bytes_jenkins
// - micro_hash_str_stb
// - micro_hash_str_djb2
// - micro_hash_str_sdbm
//
// Check out the signatures to see the type that they accept and
// generate.
//
//
// Benchmarks
// ----------
//
// There are some benchmarks under the `tests/` directory, for now
// only the functions that work with integers are tested.
//
// If you run `make check`, you should get similar results:
//
// /----------------------------------------------------------
// | hash function | collisions | non-uniformity |
// | ----------------------- | ------------ | --------------- |
// | micro_hash_int32_rob | 0 | 39.740234375000 |
// | micro_hash_int32_wang2 | 0 | 39.412597656250 |
// | micro_hash_int6432_wang | 11580 | 39.166503906250 |
// | micro_hash_int64_wang | 0 | 38.696289062500 |
// | micro_hash_int32_wang | 0 | 39.614257812500 |
// \----------------------------------------------------------/
//
// `collisions` is the number of collisions found by generating
// ITERATIONS number of random values. `non-uniformity` is a measure
// of how much the hash values are not distributed uniformly, more
// specifically it is the mean of the difference between the the expected
// distribution (uniform) and the actual distribution of the hashes,
// with the hashes being mapped over a smaller space (2^PRECISION)
// compared to total hash space (for practical reasons). Lower is
// better.
//
//
// Usage
// -----
//
// Do this:
//
// #define MICRO_HASH_IMPLEMENTATION
//
// before you include this file in *one* C or C++ file to create the
// implementation.
//
// i.e. it should look like this:
//
// #include ...
// #include ...
// #include ...
// #define MICRO_HASH_IMPLEMENTATION
// #include "micro-hash.h"
//
// Then use whatever hash function you fancy most.
//
// Some more hash functions:
// - https://en.wikipedia.org/wiki/List_of_hash_functions
//
//
// Code
// ----
//
// The official git repository of micro-hash.h is hosted at:
//
// https://github.com/San7o/micro-hash.h
//
// This is part of a bigger collection of header-only C99 libraries
// called "micro-headers", contributions are welcome:
//
// https://github.com/San7o/micro-headers
//
#ifndef MICRO_HASH
#define MICRO_HASH
#define MICRO_HASH_MAJOR 0
#define MICRO_HASH_MINOR 1
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
//
// Configuration
//
// Config: Prefix for all functions
// For function inlining, set this to `static inline` and then define
// the implementation in all the files
#ifndef MICRO_HASH_DEF
#define MICRO_HASH_DEF extern
#endif
//
// Function declarations
//
// Integer
// -------
//
// An integer hash function accepts an integer hash key, and returns
// an integer hash result with uniform distribution.
// This function uses a combination of bit shifts and integer
// multiplication to hash the input key.
// Credits: Thomas Wang
MICRO_HASH_DEF uint32_t micro_hash_int32_wang(uint32_t key);
// Credits: Thomas Wang
MICRO_HASH_DEF uint32_t micro_hash_int32_wang2(uint32_t key);
// Credits: Robert Jenkins
MICRO_HASH_DEF uint32_t micro_hash_int32_rob(uint32_t key);
// Credits: Thomas Wang
MICRO_HASH_DEF uint64_t micro_hash_int64_wang(uint64_t key);
// Credits: Thomas Wang
MICRO_HASH_DEF uint32_t micro_hash_int6432_wang(uint64_t key);
// Bytes
// -----
//
// Hash a sequence of bytes
// curl/lib/hash.c
MICRO_HASH_DEF size_t micro_hash_bytes_curl(void *key, size_t key_length);
// https://en.wikipedia.org/wiki/Jenkins_hash_function
MICRO_HASH_DEF uint32_t micro_hash_bytes_jenkins(uint8_t* key, size_t key_length);
// String
// ------
//
// Hash a string
// stb/stb_ds.h
MICRO_HASH_DEF size_t micro_hash_str_stb(char *str, size_t seed);
// djb2 algorithm
//
// This algorithm (k=33) was first reported by dan bernstein many
// years ago in comp.lang.c. another version of this algorithm (now
// favored by bernstein) uses xor: hash(i) = hash(i - 1) * 33 ^
// str[i]; the magic of number 33 (why it works better than many other
// constants, prime or not) has never been adequately explained.
MICRO_HASH_DEF unsigned long micro_hash_str_djb2(unsigned char *str);
// sdbm
//
// This algorithm was created for sdbm (a public-domain
// reimplementation of ndbm) database library. it was found to do well
// in scrambling bits, causing better distribution of the keys and
// fewer splits. it also happens to be a good general hashing function
// with good distribution. the actual function is hash(i) = hash(i -
// 1) * 65599 + str[i]; what is included below is the faster version
// used in gawk.
MICRO_HASH_DEF unsigned long micro_hash_str_sdbm(unsigned char *str);
//
// Implementation
//
#ifdef MICRO_HASH_IMPLEMENTATION
// Integer
MICRO_HASH_DEF uint32_t micro_hash_int32_wang(uint32_t a)
{
a = (a ^ 61) ^ (a >> 16);
a = a + (a << 3);
a = a ^ (a >> 4);
a = a * 0x27d4eb2d;
a = a ^ (a >> 15);
return a;
}
MICRO_HASH_DEF uint32_t micro_hash_int32_wang2(uint32_t key)
{
key = ~key + (key << 15); // key = (key << 15) - key - 1;
key = key ^ (key >> 12);
key = key + (key << 2);
key = key ^ (key >> 4);
key = key * 2057; // key = (key + (key << 3)) + (key << 11);
key = key ^ (key >> 16);
return key;
}
MICRO_HASH_DEF uint32_t micro_hash_int32_rob(uint32_t a)
{
a = (a+0x7ed55d16) + (a<<12);
a = (a^0xc761c23c) ^ (a>>19);
a = (a+0x165667b1) + (a<<5);
a = (a+0xd3a2646c) ^ (a<<9);
a = (a+0xfd7046c5) + (a<<3);
a = (a^0xb55a4f09) ^ (a>>16);
return a;
}
MICRO_HASH_DEF uint64_t micro_hash_int64_wang(uint64_t key)
{
key = (~key) + (key << 21); // key = (key << 21) - key - 1;
key = key ^ (key >> 24);
key = (key + (key << 3)) + (key << 8); // key * 265
key = key ^ (key >> 14);
key = (key + (key << 2)) + (key << 4); // key * 21
key = key ^ (key >> 28);
key = key + (key << 31);
return key;
}
MICRO_HASH_DEF uint32_t micro_hash_int6432_wang(uint64_t key)
{
key = (~key) + (key << 18); // key = (key << 18) - key - 1;
key = key ^ (key >> 31);
key = key * 21; // key = (key + (key << 2)) + (key << 4);
key = key ^ (key >> 11);
key = key + (key << 6);
key = key ^ (key >> 22);
return (int) key;
}
// Bytes
MICRO_HASH_DEF size_t micro_hash_bytes_curl(void *key, size_t key_length)
{
char *key_str = (char *) key;
char *end = key_str + key_length;
size_t h = 5381;
while(key_str < end) {
size_t j = (size_t)*key_str++;
h += h << 5;
h ^= j;
}
return h;
}
MICRO_HASH_DEF uint32_t micro_hash_bytes_jenkins(uint8_t* key, size_t length)
{
size_t i = 0;
uint32_t hash = 0;
while (i != length) {
hash += key[i++];
hash += hash << 10;
hash ^= hash >> 6;
}
hash += hash << 3;
hash ^= hash >> 11;
hash += hash << 15;
return hash;
}
// Strings
MICRO_HASH_DEF size_t micro_hash_str_stb(char *str, size_t seed)
{
#define SIZE_T_BITS ((sizeof (size_t)) * 8)
#define ROTATE_LEFT(val, n) (((val) << (n)) | ((val) >> (SIZE_T_BITS - (n))))
#define ROTATE_RIGHT(val, n) (((val) >> (n)) | ((val) << (SIZE_T_BITS - (n))))
size_t hash = seed;
while (*str)
hash = ROTATE_LEFT(hash, 9) + (unsigned char) *str++;
// Thomas Wang 64-to-32 bit mix function, hopefully also works in 32 bits
hash ^= seed;
hash = (~hash) + (hash << 18);
hash ^= hash ^ ROTATE_RIGHT(hash,31);
hash = hash * 21;
hash ^= hash ^ ROTATE_RIGHT(hash,11);
hash += (hash << 6);
hash ^= ROTATE_RIGHT(hash,22);
return hash+seed;
#undef ROTATE_LEFT
#undef ROTATE_RIGHT
#undef SIZE_T_BITS
}
MICRO_HASH_DEF unsigned long micro_hash_str_djb2(unsigned char *str)
{
unsigned long hash = 5381;
int c;
while ((c = *str++))
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash;
}
MICRO_HASH_DEF unsigned long micro_hash_str_sdbm(unsigned char *str)
{
unsigned long hash = 0;
int c;
while ((c = *str++))
hash = c + (hash << 6) + (hash << 16) - hash;
return hash;
}
#endif // MICRO_HASH_IMPLEMENTATION
//
// Examples
//
#if 0
#define MICRO_HASH_IMPLEMENTATION
#include "micro-hash.h"
#include <stdio.h>
int main(void)
{
uint32_t key = 69;
uint32_t hash = micro_hash_int32_wang(key);
printf("Key: %d\n", key);
printf("Hash: %d\n", hash);
return 0;
}
#endif // 0
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // MICRO_HASH