Skip to content

Commit 3d0850c

Browse files
author
MarcoFalke
committed
Merge bitcoin/bitcoin#23994: Consolidate all uses of the fast range mapping technique in util/fastrange.h
efab28b Add FastRange32 function and use it throughout the codebase (Pieter Wuille) 96ecd6f scripted-diff: rename MapIntoRange to FastRange64 (Pieter Wuille) c6d15c4 [moveonly] Move MapIntoRange() to separate util/fastrange.h (Pieter Wuille) Pull request description: Several places in the codebase use the fast range mapping technique described in https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/, some for 32-bit ranges, some for 64-bit ones. Move all of these to `util/fastrange.h`, and give them a consistent name. ACKs for top commit: Sjors: ACK efab28b shaavan: reACK efab28b MarcoFalke: review ACK efab28b 🍸 Tree-SHA512: 3190a25ef21d17f0ab2afcd9b8d5a1813fdfac0d93996878017e84ff62eee412c823d6149ae8e92cfc3214458641e83ace4b022b4a0fe0679f78dbaee21c6227
2 parents 542e405 + efab28b commit 3d0850c

File tree

7 files changed

+71
-58
lines changed

7 files changed

+71
-58
lines changed

src/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ BITCOIN_CORE_H = \
240240
util/check.h \
241241
util/epochguard.h \
242242
util/error.h \
243+
util/fastrange.h \
243244
util/fees.h \
244245
util/getuniquepath.h \
245246
util/golombrice.h \

src/blockfilter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ uint64_t GCSFilter::HashToRange(const Element& element) const
2929
uint64_t hash = CSipHasher(m_params.m_siphash_k0, m_params.m_siphash_k1)
3030
.Write(element.data(), element.size())
3131
.Finalize();
32-
return MapIntoRange(hash, m_F);
32+
return FastRange64(hash, m_F);
3333
}
3434

3535
std::vector<uint64_t> GCSFilter::BuildHashedSet(const ElementSet& elements) const

src/common/bloom.cpp

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <script/standard.h>
1212
#include <span.h>
1313
#include <streams.h>
14+
#include <util/fastrange.h>
1415

1516
#include <algorithm>
1617
#include <cmath>
@@ -191,14 +192,6 @@ static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak,
191192
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);
192193
}
193194

194-
195-
// A replacement for x % n. This assumes that x and n are 32bit integers, and x is a uniformly random distributed 32bit value
196-
// which should be the case for a good hash.
197-
// See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
198-
static inline uint32_t FastMod(uint32_t x, size_t n) {
199-
return ((uint64_t)x * (uint64_t)n) >> 32;
200-
}
201-
202195
void CRollingBloomFilter::insert(Span<const unsigned char> vKey)
203196
{
204197
if (nEntriesThisGeneration == nEntriesPerGeneration) {
@@ -223,7 +216,7 @@ void CRollingBloomFilter::insert(Span<const unsigned char> vKey)
223216
uint32_t h = RollingBloomHash(n, nTweak, vKey);
224217
int bit = h & 0x3F;
225218
/* FastMod works with the upper bits of h, so it is safe to ignore that the lower bits of h are already used for bit. */
226-
uint32_t pos = FastMod(h, data.size());
219+
uint32_t pos = FastRange32(h, data.size());
227220
/* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
228221
data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit;
229222
data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit;
@@ -235,7 +228,7 @@ bool CRollingBloomFilter::contains(Span<const unsigned char> vKey) const
235228
for (int n = 0; n < nHashFuncs; n++) {
236229
uint32_t h = RollingBloomHash(n, nTweak, vKey);
237230
int bit = h & 0x3F;
238-
uint32_t pos = FastMod(h, data.size());
231+
uint32_t pos = FastRange32(h, data.size());
239232
/* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
240233
if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) {
241234
return false;

src/cuckoocache.h

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
#ifndef BITCOIN_CUCKOOCACHE_H
66
#define BITCOIN_CUCKOOCACHE_H
77

8+
#include <util/fastrange.h>
9+
810
#include <algorithm> // std::find
911
#include <array>
1012
#include <atomic>
@@ -219,13 +221,8 @@ class cache
219221
* One option would be to implement the same trick the compiler uses and compute the
220222
* constants for exact division based on the size, as described in "{N}-bit Unsigned
221223
* Division via {N}-bit Multiply-Add" by Arch D. Robison in 2005. But that code is
222-
* somewhat complicated and the result is still slower than other options:
223-
*
224-
* Instead we treat the 32-bit random number as a Q32 fixed-point number in the range
225-
* [0, 1) and simply multiply it by the size. Then we just shift the result down by
226-
* 32-bits to get our bucket number. The result has non-uniformity the same as a
227-
* mod, but it is much faster to compute. More about this technique can be found at
228-
* https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ .
224+
* somewhat complicated and the result is still slower than an even simpler option:
225+
* see the FastRange32 function in util/fastrange.h.
229226
*
230227
* The resulting non-uniformity is also more equally distributed which would be
231228
* advantageous for something like linear probing, though it shouldn't matter
@@ -241,14 +238,14 @@ class cache
241238
*/
242239
inline std::array<uint32_t, 8> compute_hashes(const Element& e) const
243240
{
244-
return {{(uint32_t)(((uint64_t)hash_function.template operator()<0>(e) * (uint64_t)size) >> 32),
245-
(uint32_t)(((uint64_t)hash_function.template operator()<1>(e) * (uint64_t)size) >> 32),
246-
(uint32_t)(((uint64_t)hash_function.template operator()<2>(e) * (uint64_t)size) >> 32),
247-
(uint32_t)(((uint64_t)hash_function.template operator()<3>(e) * (uint64_t)size) >> 32),
248-
(uint32_t)(((uint64_t)hash_function.template operator()<4>(e) * (uint64_t)size) >> 32),
249-
(uint32_t)(((uint64_t)hash_function.template operator()<5>(e) * (uint64_t)size) >> 32),
250-
(uint32_t)(((uint64_t)hash_function.template operator()<6>(e) * (uint64_t)size) >> 32),
251-
(uint32_t)(((uint64_t)hash_function.template operator()<7>(e) * (uint64_t)size) >> 32)}};
241+
return {{FastRange32(hash_function.template operator()<0>(e), size),
242+
FastRange32(hash_function.template operator()<1>(e), size),
243+
FastRange32(hash_function.template operator()<2>(e), size),
244+
FastRange32(hash_function.template operator()<3>(e), size),
245+
FastRange32(hash_function.template operator()<4>(e), size),
246+
FastRange32(hash_function.template operator()<5>(e), size),
247+
FastRange32(hash_function.template operator()<6>(e), size),
248+
FastRange32(hash_function.template operator()<7>(e), size)}};
252249
}
253250

254251
/** invalid returns a special index that can never be inserted to

src/test/fuzz/golomb_rice.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ uint64_t HashToRange(const std::vector<uint8_t>& element, const uint64_t f)
2525
const uint64_t hash = CSipHasher(0x0706050403020100ULL, 0x0F0E0D0C0B0A0908ULL)
2626
.Write(element.data(), element.size())
2727
.Finalize();
28-
return MapIntoRange(hash, f);
28+
return FastRange64(hash, f);
2929
}
3030

3131
std::vector<uint64_t> BuildHashedSet(const std::unordered_set<std::vector<uint8_t>, ByteVectorHash>& elements, const uint64_t f)

src/util/fastrange.h

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// Copyright (c) 2018-2020 The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#ifndef BITCOIN_UTIL_FASTRANGE_H
6+
#define BITCOIN_UTIL_FASTRANGE_H
7+
8+
#include <cstdint>
9+
10+
/* This file offers implementations of the fast range reduction technique described
11+
* in https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
12+
*
13+
* In short, they take an integer x and a range n, and return the upper bits of
14+
* (x * n). If x is uniformly distributed over its domain, the result is as close to
15+
* uniformly distributed over [0, n) as (x mod n) would be, but significantly faster.
16+
*/
17+
18+
/** Fast range reduction with 32-bit input and 32-bit range. */
19+
static inline uint32_t FastRange32(uint32_t x, uint32_t n)
20+
{
21+
return (uint64_t{x} * n) >> 32;
22+
}
23+
24+
/** Fast range reduction with 64-bit input and 64-bit range. */
25+
static inline uint64_t FastRange64(uint64_t x, uint64_t n)
26+
{
27+
#ifdef __SIZEOF_INT128__
28+
return (static_cast<unsigned __int128>(x) * static_cast<unsigned __int128>(n)) >> 64;
29+
#else
30+
// To perform the calculation on 64-bit numbers without losing the
31+
// result to overflow, split the numbers into the most significant and
32+
// least significant 32 bits and perform multiplication piece-wise.
33+
//
34+
// See: https://stackoverflow.com/a/26855440
35+
const uint64_t x_hi = x >> 32;
36+
const uint64_t x_lo = x & 0xFFFFFFFF;
37+
const uint64_t n_hi = n >> 32;
38+
const uint64_t n_lo = n & 0xFFFFFFFF;
39+
40+
const uint64_t ac = x_hi * n_hi;
41+
const uint64_t ad = x_hi * n_lo;
42+
const uint64_t bc = x_lo * n_hi;
43+
const uint64_t bd = x_lo * n_lo;
44+
45+
const uint64_t mid34 = (bd >> 32) + (bc & 0xFFFFFFFF) + (ad & 0xFFFFFFFF);
46+
const uint64_t upper64 = ac + (bc >> 32) + (ad >> 32) + (mid34 >> 32);
47+
return upper64;
48+
#endif
49+
}
50+
51+
#endif // BITCOIN_UTIL_FASTRANGE_H

src/util/golombrice.h

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
#ifndef BITCOIN_UTIL_GOLOMBRICE_H
66
#define BITCOIN_UTIL_GOLOMBRICE_H
77

8+
#include <util/fastrange.h>
9+
810
#include <streams.h>
911

1012
#include <cstdint>
@@ -40,35 +42,4 @@ uint64_t GolombRiceDecode(BitStreamReader<IStream>& bitreader, uint8_t P)
4042
return (q << P) + r;
4143
}
4244

43-
// Map a value x that is uniformly distributed in the range [0, 2^64) to a
44-
// value uniformly distributed in [0, n) by returning the upper 64 bits of
45-
// x * n.
46-
//
47-
// See: https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
48-
static inline uint64_t MapIntoRange(uint64_t x, uint64_t n)
49-
{
50-
#ifdef __SIZEOF_INT128__
51-
return (static_cast<unsigned __int128>(x) * static_cast<unsigned __int128>(n)) >> 64;
52-
#else
53-
// To perform the calculation on 64-bit numbers without losing the
54-
// result to overflow, split the numbers into the most significant and
55-
// least significant 32 bits and perform multiplication piece-wise.
56-
//
57-
// See: https://stackoverflow.com/a/26855440
58-
const uint64_t x_hi = x >> 32;
59-
const uint64_t x_lo = x & 0xFFFFFFFF;
60-
const uint64_t n_hi = n >> 32;
61-
const uint64_t n_lo = n & 0xFFFFFFFF;
62-
63-
const uint64_t ac = x_hi * n_hi;
64-
const uint64_t ad = x_hi * n_lo;
65-
const uint64_t bc = x_lo * n_hi;
66-
const uint64_t bd = x_lo * n_lo;
67-
68-
const uint64_t mid34 = (bd >> 32) + (bc & 0xFFFFFFFF) + (ad & 0xFFFFFFFF);
69-
const uint64_t upper64 = ac + (bc >> 32) + (ad >> 32) + (mid34 >> 32);
70-
return upper64;
71-
#endif
72-
}
73-
7445
#endif // BITCOIN_UTIL_GOLOMBRICE_H

0 commit comments

Comments
 (0)