Skip to content

Commit f17032f

Browse files
committed
Merge #7934: Improve rolling bloom filter performance and benchmark
1953c40 More efficient bitsliced rolling Bloom filter (Pieter Wuille) aa62b68 Benchmark rolling bloom filter (Pieter Wuille)
2 parents fbd8478 + 1953c40 commit f17032f

File tree

5 files changed

+77
-27
lines changed

5 files changed

+77
-27
lines changed

src/Makefile.bench.include

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ bench_bench_bitcoin_SOURCES = \
77
bench/bench_bitcoin.cpp \
88
bench/bench.cpp \
99
bench/bench.h \
10-
bench/Examples.cpp
10+
bench/Examples.cpp \
11+
bench/rollingbloom.cpp
1112

1213
bench_bench_bitcoin_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES) $(EVENT_CLFAGS) $(EVENT_PTHREADS_CFLAGS) -I$(builddir)/bench/
1314
bench_bench_bitcoin_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)

src/bench/rollingbloom.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// Copyright (c) 2016 The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#include <iostream>
6+
7+
#include "bench.h"
8+
#include "bloom.h"
9+
#include "utiltime.h"
10+
11+
static void RollingBloom(benchmark::State& state)
12+
{
13+
CRollingBloomFilter filter(120000, 0.000001);
14+
std::vector<unsigned char> data(32);
15+
uint32_t count = 0;
16+
uint32_t nEntriesPerGeneration = (120000 + 1) / 2;
17+
uint32_t countnow = 0;
18+
uint64_t match = 0;
19+
while (state.KeepRunning()) {
20+
count++;
21+
data[0] = count;
22+
data[1] = count >> 8;
23+
data[2] = count >> 16;
24+
data[3] = count >> 24;
25+
if (countnow == nEntriesPerGeneration) {
26+
int64_t b = GetTimeMicros();
27+
filter.insert(data);
28+
int64_t e = GetTimeMicros();
29+
std::cout << "RollingBloom-refresh,1," << (e-b)*0.000001 << "," << (e-b)*0.000001 << "," << (e-b)*0.000001 << "\n";
30+
countnow = 0;
31+
} else {
32+
filter.insert(data);
33+
}
34+
countnow++;
35+
data[0] = count >> 24;
36+
data[1] = count >> 16;
37+
data[2] = count >> 8;
38+
data[3] = count;
39+
match += filter.contains(data);
40+
}
41+
}
42+
43+
BENCHMARK(RollingBloom);

src/bloom.cpp

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -234,14 +234,18 @@ CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate)
234234
*/
235235
uint32_t nFilterBits = (uint32_t)ceil(-1.0 * nHashFuncs * nMaxElements / log(1.0 - exp(logFpRate / nHashFuncs)));
236236
data.clear();
237-
/* We store up to 16 'bits' per data element. */
238-
data.resize((nFilterBits + 15) / 16);
237+
/* For each data element we need to store 2 bits. If both bits are 0, the
238+
* bit is treated as unset. If the bits are (01), (10), or (11), the bit is
239+
* treated as set in generation 1, 2, or 3 respectively.
240+
* These bits are stored in separate integers: position P corresponds to bit
241+
* (P & 63) of the integers data[(P >> 6) * 2] and data[(P >> 6) * 2 + 1]. */
242+
data.resize(((nFilterBits + 63) / 64) << 1);
239243
reset();
240244
}
241245

242246
/* Similar to CBloomFilter::Hash */
243-
inline unsigned int CRollingBloomFilter::Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const {
244-
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash) % (data.size() * 16);
247+
static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak, const std::vector<unsigned char>& vDataToHash) {
248+
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);
245249
}
246250

247251
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
@@ -252,18 +256,25 @@ void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
252256
if (nGeneration == 4) {
253257
nGeneration = 1;
254258
}
259+
uint64_t nGenerationMask1 = -(uint64_t)(nGeneration & 1);
260+
uint64_t nGenerationMask2 = -(uint64_t)(nGeneration >> 1);
255261
/* Wipe old entries that used this generation number. */
256-
for (uint32_t p = 0; p < data.size() * 16; p++) {
257-
if (get(p) == nGeneration) {
258-
put(p, 0);
259-
}
262+
for (uint32_t p = 0; p < data.size(); p += 2) {
263+
uint64_t p1 = data[p], p2 = data[p + 1];
264+
uint64_t mask = (p1 ^ nGenerationMask1) | (p2 ^ nGenerationMask2);
265+
data[p] = p1 & mask;
266+
data[p + 1] = p2 & mask;
260267
}
261268
}
262269
nEntriesThisGeneration++;
263270

264271
for (int n = 0; n < nHashFuncs; n++) {
265-
uint32_t h = Hash(n, vKey);
266-
put(h, nGeneration);
272+
uint32_t h = RollingBloomHash(n, nTweak, vKey);
273+
int bit = h & 0x3F;
274+
uint32_t pos = (h >> 6) % data.size();
275+
/* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
276+
data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit;
277+
data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit;
267278
}
268279
}
269280

@@ -276,8 +287,11 @@ void CRollingBloomFilter::insert(const uint256& hash)
276287
bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
277288
{
278289
for (int n = 0; n < nHashFuncs; n++) {
279-
uint32_t h = Hash(n, vKey);
280-
if (get(h) == 0) {
290+
uint32_t h = RollingBloomHash(n, nTweak, vKey);
291+
int bit = h & 0x3F;
292+
uint32_t pos = (h >> 6) % data.size();
293+
/* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
294+
if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) {
281295
return false;
282296
}
283297
}
@@ -295,7 +309,7 @@ void CRollingBloomFilter::reset()
295309
nTweak = GetRand(std::numeric_limits<unsigned int>::max());
296310
nEntriesThisGeneration = 0;
297311
nGeneration = 1;
298-
for (std::vector<uint32_t>::iterator it = data.begin(); it != data.end(); it++) {
312+
for (std::vector<uint64_t>::iterator it = data.begin(); it != data.end(); it++) {
299313
*it = 0;
300314
}
301315
}

src/bloom.h

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -135,20 +135,9 @@ class CRollingBloomFilter
135135
int nEntriesPerGeneration;
136136
int nEntriesThisGeneration;
137137
int nGeneration;
138-
std::vector<uint32_t> data;
138+
std::vector<uint64_t> data;
139139
unsigned int nTweak;
140140
int nHashFuncs;
141-
142-
unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;
143-
144-
inline int get(uint32_t position) const {
145-
return (data[(position >> 4) % data.size()] >> (2 * (position & 0xF))) & 0x3;
146-
}
147-
148-
inline void put(uint32_t position, uint32_t val) {
149-
uint32_t& cell = data[(position >> 4) % data.size()];
150-
cell = (cell & ~(((uint32_t)3) << (2 * (position & 0xF)))) | (val << (2 * (position & 0xF)));
151-
}
152141
};
153142

154143
#endif // BITCOIN_BLOOM_H

src/test/bloom_tests.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -514,11 +514,14 @@ BOOST_AUTO_TEST_CASE(rolling_bloom)
514514
if (i >= 100)
515515
BOOST_CHECK(rb1.contains(data[i-100]));
516516
rb1.insert(data[i]);
517+
BOOST_CHECK(rb1.contains(data[i]));
517518
}
518519

519520
// Insert 999 more random entries:
520521
for (int i = 0; i < 999; i++) {
521-
rb1.insert(RandomData());
522+
std::vector<unsigned char> d = RandomData();
523+
rb1.insert(d);
524+
BOOST_CHECK(rb1.contains(d));
522525
}
523526
// Sanity check to make sure the filter isn't just filling up:
524527
nHits = 0;

0 commit comments

Comments
 (0)