Skip to content

Commit db675aa

Browse files
committed
refactor: hash using now a5hash, but its still slower
1 parent 56ff9b9 commit db675aa

File tree

1 file changed

+87
-66
lines changed

1 file changed

+87
-66
lines changed

include/ankerl/unordered_dense.h

Lines changed: 87 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,35 @@ inline void mum(std::uint64_t* a, std::uint64_t* b) {
182182
# endif
183183
}
184184

185+
inline void umul128(std::uint64_t const u, std::uint64_t const v, std::uint64_t* const rl, std::uint64_t* const rh) {
186+
# if defined(__SIZEOF_INT128__)
187+
__uint128_t r = u;
188+
r *= v;
189+
*rl = static_cast<std::uint64_t>(r);
190+
*rh = static_cast<std::uint64_t>(r >> 64U);
191+
# elif defined(_MSC_VER) && defined(_M_X64)
192+
*rl = _umul128(u, v, rh);
193+
# else
194+
std::uint64_t ha = u >> 32U;
195+
std::uint64_t hb = v >> 32U;
196+
std::uint64_t la = static_cast<std::uint32_t>(u);
197+
std::uint64_t lb = static_cast<std::uint32_t>(v);
198+
std::uint64_t hi{};
199+
std::uint64_t lo{};
200+
std::uint64_t rh = ha * hb;
201+
std::uint64_t rm0 = ha * lb;
202+
std::uint64_t rm1 = hb * la;
203+
std::uint64_t rl = la * lb;
204+
std::uint64_t t = rl + (rm0 << 32U);
205+
auto c = static_cast<std::uint64_t>(t < rl);
206+
lo = t + (rm1 << 32U);
207+
c += static_cast<std::uint64_t>(lo < t);
208+
hi = rh + (rm0 >> 32U) + (rm1 >> 32U) + c;
209+
*rl = lo;
210+
*rh = hi;
211+
# endif
212+
}
213+
185214
// multiply and xor mix function, aka MUM
186215
[[nodiscard]] inline auto mix(std::uint64_t a, std::uint64_t b) -> std::uint64_t {
187216
mum(&a, &b);
@@ -196,6 +225,11 @@ inline void mum(std::uint64_t* a, std::uint64_t* b) {
196225
return (x << n) | (x >> ((-n) & 63U));
197226
}
198227

228+
// reads 1, 2, or 3 bytes
229+
[[nodiscard]] inline auto r3(const std::uint8_t* p, std::size_t k) -> std::uint64_t {
230+
return (static_cast<std::uint64_t>(p[0]) << 16U) | (static_cast<std::uint64_t>(p[k >> 1U]) << 8U) | p[k - 1];
231+
}
232+
199233
[[nodiscard]] inline auto r4(const std::uint8_t* p) -> std::uint64_t {
200234
// return static_cast<std::uint64_t>(p[0]) << 0U | static_cast<std::uint64_t>(p[1]) << 8U |
201235
// static_cast<std::uint64_t>(p[2]) << 16U | static_cast<std::uint64_t>(p[3]) << 24U;
@@ -213,72 +247,59 @@ inline void mum(std::uint64_t* a, std::uint64_t* b) {
213247
}
214248

215249
[[maybe_unused]] [[nodiscard]] inline auto hash(void const* key, std::size_t l) -> std::uint64_t {
216-
static constexpr auto seed = UINT64_C(0xa0761d6478bd642f);
217-
218-
static constexpr auto k = UINT64_C(0x2B7E151628AED2A7); // digits of e
219-
static constexpr auto seed2 = rotl(seed - k, 15) + rotl(seed - k, 47);
220-
auto h0 = seed;
221-
auto h1 = seed + k;
222-
auto h2 = seed2;
223-
auto h3 = seed2 + ((k * k) ^ k);
224-
225-
// depending on your system unrolling might (or might not) make things
226-
// a tad bit faster on large strings. on my system, it actually makes
227-
// things slower.
228-
// generally speaking, the cost of bigger code size is usually not
229-
// worth the trade-off since larger code-size will hinder inlinability
230-
// but depending on your needs, you may want to uncomment the pragma
231-
// below to unroll the loop.
232-
// # pragma GCC unroll 2
233-
auto const* p = static_cast<std::uint8_t const*>(key);
234-
235-
if (ANKERL_UNORDERED_DENSE_UNLIKELY(l >= 32)) {
236-
do {
237-
auto const stripe0 = r8(p);
238-
auto const stripe1 = r8(p + 8);
239-
auto const stripe2 = r8(p + 16);
240-
auto const stripe3 = r8(p + 24);
241-
242-
h0 = (stripe0 + h0) * k + rotl(stripe3, 27);
243-
h1 = (stripe1 + h1 + rotl(stripe0, 27)) * k;
244-
h2 = (stripe2 + h2 + rotl(stripe1, 27)) * k;
245-
h3 = (stripe3 + h3 + rotl(stripe2, 27)) * k;
246-
247-
l -= 32;
248-
p += 32;
249-
} while (ANKERL_UNORDERED_DENSE_LIKELY(l >= 32));
250-
}
251-
while (ANKERL_UNORDERED_DENSE_LIKELY(l >= 8)) {
252-
h0 = (h0 ^ r4(p + 0)) * k;
253-
h1 = (h1 ^ r4(p + 4)) * k;
254-
l -= 8;
255-
p += 8;
256-
}
257-
258-
if (l >= 4) {
259-
h2 ^= r4(p);
260-
h3 ^= r4(p + l - 4);
261-
} else if (l > 0) {
262-
h2 ^= p[0];
263-
h3 ^= p[l >> 1U] | (static_cast<std::uint64_t>(p[l - 1]) << 8U);
264-
}
265-
266-
h0 += rotl(h2 * k, 31) ^ (h2 >> 31U);
267-
h1 += rotl(h3 * k, 31) ^ (h3 >> 31U);
268-
h0 *= k;
269-
h0 ^= h0 >> 31U;
270-
h1 += h0;
271-
272-
auto x = static_cast<std::uint64_t>(l) * k;
273-
x ^= rotl(x, 29);
274-
x += seed;
275-
x ^= h1;
276-
277-
x ^= rotl(x, 15) ^ rotl(x, 42);
278-
x *= k;
279-
x ^= rotl(x, 13) ^ rotl(x, 31);
280-
281-
return x;
250+
static constexpr auto use_seed = UINT64_C(0);
251+
252+
// The seeds are initialized to mantissa bits of PI.
253+
auto seed1 = UINT64_C(0x243F6A8885A308D3) ^ l;
254+
auto seed2 = UINT64_C(0x452821E638D01377) ^ l;
255+
256+
auto val01 = UINT64_C(0xAAAAAAAAAAAAAAAA); ///< `10` bit-pairs.
257+
auto val10 = UINT64_C(0x5555555555555555); ///< `01` bit-pairs.
258+
umul128(seed2 ^ (use_seed & val10), seed1 ^ (use_seed & val01), &seed1, &seed2);
259+
260+
auto const* msg = static_cast<const uint8_t*>(key);
261+
262+
if (ANKERL_UNORDERED_DENSE_UNLIKELY(l > 16))
263+
ANKERL_UNORDERED_DENSE_UNLIKELY_ATTR {
264+
val01 ^= seed1;
265+
val10 ^= seed2;
266+
267+
do {
268+
umul128(r8(msg) ^ seed1, r8(msg + 8) ^ seed2, &seed1, &seed2);
269+
270+
l -= 16;
271+
msg += 16;
272+
273+
seed1 += val01;
274+
seed2 += val10;
275+
276+
} while (ANKERL_UNORDERED_DENSE_LIKELY(l > 16));
277+
}
278+
279+
std::uint64_t a = 0;
280+
std::uint64_t b = 0;
281+
if (ANKERL_UNORDERED_DENSE_LIKELY(l >= 4)) {
282+
const uint8_t* const msg4 = msg + l - 4;
283+
const size_t mo = l >> 3U;
284+
285+
a = r4(msg) << 32U | r4(msg4);
286+
b = r4(msg + (mo * 4)) << 32U | r4(msg4 - (mo * 4));
287+
} else {
288+
// a = r3(msg, l);
289+
if (l != 0) {
290+
a = msg[0];
291+
if (l != 1) {
292+
a |= static_cast<std::uint64_t>(msg[1]) << 8;
293+
if (l != 2) {
294+
a |= static_cast<std::uint64_t>(msg[2]) << 16;
295+
}
296+
}
297+
}
298+
}
299+
umul128(a ^ seed1, b ^ seed2, &seed1, &seed2);
300+
umul128(val01 ^ seed1, seed2, &a, &b);
301+
302+
return (a ^ b);
282303
}
283304

284305
[[nodiscard]] inline auto hash(std::uint64_t x) -> std::uint64_t {

0 commit comments

Comments
 (0)