Skip to content

Commit 56ff9b9

Browse files
committed
refactor: optimize hash function and improve readability
1 parent 12a10e9 commit 56ff9b9

File tree

1 file changed

+53
-40
lines changed

1 file changed

+53
-40
lines changed

include/ankerl/unordered_dense.h

Lines changed: 53 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -191,33 +191,36 @@ inline void mum(std::uint64_t* a, std::uint64_t* b) {
191191
// This is a modified ChibiHash version 2: https://github.com/N-R-K/ChibiHash
192192
// hash results will change on different endian machines!
193193

194-
[[nodiscard]] auto constexpr rotl(std::uint64_t value, unsigned shift) -> std::uint64_t {
195-
const unsigned width = 64;
196-
const unsigned mask = width - 1;
197-
if ((shift &= mask) == 0) { // NOLINT(bugprone-assignment-in-if-condition)
198-
return value;
199-
}
200-
return (value << shift) | (value >> (width - shift));
194+
[[nodiscard]] auto constexpr rotl(std::uint64_t x, unsigned n) -> std::uint64_t {
195+
n &= 63U;
196+
return (x << n) | (x >> ((-n) & 63U));
201197
}
202198

203199
[[nodiscard]] inline auto r4(const std::uint8_t* p) -> std::uint64_t {
204-
return static_cast<std::uint64_t>(p[0]) << 0U | static_cast<std::uint64_t>(p[1]) << 8U |
205-
static_cast<std::uint64_t>(p[2]) << 16U | static_cast<std::uint64_t>(p[3]) << 24U;
200+
// return static_cast<std::uint64_t>(p[0]) << 0U | static_cast<std::uint64_t>(p[1]) << 8U |
201+
// static_cast<std::uint64_t>(p[2]) << 16U | static_cast<std::uint64_t>(p[3]) << 24U;
202+
std::uint32_t v{};
203+
std::memcpy(&v, p, 4);
204+
return v;
206205
}
207206

208207
// read functions. WARNING: we don't care about endianness, so results are different on big endian!
209208
[[nodiscard]] inline auto r8(const std::uint8_t* p) -> std::uint64_t {
210-
return r4(p) | (r4(p + 4) << 32U);
209+
// return r4(p) | (r4(p + 4) << 32U);
210+
std::uint64_t v{};
211+
std::memcpy(&v, p, 8U);
212+
return v;
211213
}
212214

213-
[[maybe_unused]] [[nodiscard]] inline auto hash(void const* key, std::size_t len) -> std::uint64_t {
214-
auto const* p = static_cast<std::uint8_t const*>(key);
215-
auto l = static_cast<std::ptrdiff_t>(len);
215+
[[maybe_unused]] [[nodiscard]] inline auto hash(void const* key, std::size_t l) -> std::uint64_t {
216+
static constexpr auto seed = UINT64_C(0xa0761d6478bd642f);
216217

217-
static constexpr std::uint64_t seed = UINT64_C(0xa0761d6478bd642f);
218-
static constexpr std::uint64_t k = UINT64_C(0x2B7E151628AED2A7); // digits of e
219-
static constexpr std::uint64_t seed2 = rotl(seed - k, 15) + rotl(seed - k, 47);
220-
std::array<std::uint64_t, 4> h = {seed, seed + k, seed2, seed2 + (k * k ^ k)};
218+
static constexpr auto k = UINT64_C(0x2B7E151628AED2A7); // digits of e
219+
static constexpr auto seed2 = rotl(seed - k, 15) + rotl(seed - k, 47);
220+
auto h0 = seed;
221+
auto h1 = seed + k;
222+
auto h2 = seed2;
223+
auto h3 = seed2 + ((k * k) ^ k);
221224

222225
// depending on your system unrolling might (or might not) make things
223226
// a tad bit faster on large strings. on my system, it actually makes
@@ -227,39 +230,49 @@ inline void mum(std::uint64_t* a, std::uint64_t* b) {
227230
// but depending on your needs, you may want to uncomment the pragma
228231
// below to unroll the loop.
229232
// # pragma GCC unroll 2
230-
for (; l >= 32; l -= 32) {
231-
for (unsigned i = 0; i < 4; ++i, p += 8) {
232-
auto stripe = r8(p);
233-
h[i] = (stripe + h[i]) * k;
234-
h[(i + 1) & 3U] += rotl(stripe, 27);
235-
}
236-
}
233+
auto const* p = static_cast<std::uint8_t const*>(key);
234+
235+
if (ANKERL_UNORDERED_DENSE_UNLIKELY(l >= 32)) {
236+
do {
237+
auto const stripe0 = r8(p);
238+
auto const stripe1 = r8(p + 8);
239+
auto const stripe2 = r8(p + 16);
240+
auto const stripe3 = r8(p + 24);
237241

238-
for (; l >= 8; l -= 8, p += 8) {
239-
h[0] ^= r4(p + 0);
240-
h[0] *= k;
241-
h[1] ^= r4(p + 4);
242-
h[1] *= k;
242+
h0 = (stripe0 + h0) * k + rotl(stripe3, 27);
243+
h1 = (stripe1 + h1 + rotl(stripe0, 27)) * k;
244+
h2 = (stripe2 + h2 + rotl(stripe1, 27)) * k;
245+
h3 = (stripe3 + h3 + rotl(stripe2, 27)) * k;
246+
247+
l -= 32;
248+
p += 32;
249+
} while (ANKERL_UNORDERED_DENSE_LIKELY(l >= 32));
250+
}
251+
while (ANKERL_UNORDERED_DENSE_LIKELY(l >= 8)) {
252+
h0 = (h0 ^ r4(p + 0)) * k;
253+
h1 = (h1 ^ r4(p + 4)) * k;
254+
l -= 8;
255+
p += 8;
243256
}
244257

245258
if (l >= 4) {
246-
h[2] ^= r4(p);
247-
h[3] ^= r4(p + l - 4);
259+
h2 ^= r4(p);
260+
h3 ^= r4(p + l - 4);
248261
} else if (l > 0) {
249-
h[2] ^= p[0];
250-
h[3] ^= p[l / 2] | (static_cast<std::uint64_t>(p[l - 1]) << 8U);
262+
h2 ^= p[0];
263+
h3 ^= p[l >> 1U] | (static_cast<std::uint64_t>(p[l - 1]) << 8U);
251264
}
252265

253-
h[0] += rotl(h[2] * k, 31) ^ (h[2] >> 31U);
254-
h[1] += rotl(h[3] * k, 31) ^ (h[3] >> 31U);
255-
h[0] *= k;
256-
h[0] ^= h[0] >> 31U;
257-
h[1] += h[0];
266+
h0 += rotl(h2 * k, 31) ^ (h2 >> 31U);
267+
h1 += rotl(h3 * k, 31) ^ (h3 >> 31U);
268+
h0 *= k;
269+
h0 ^= h0 >> 31U;
270+
h1 += h0;
258271

259-
auto x = static_cast<std::uint64_t>(len) * k;
272+
auto x = static_cast<std::uint64_t>(l) * k;
260273
x ^= rotl(x, 29);
261274
x += seed;
262-
x ^= h[1];
275+
x ^= h1;
263276

264277
x ^= rotl(x, 15) ^ rotl(x, 42);
265278
x *= k;

0 commit comments

Comments
 (0)