Skip to content

Commit 575ca49

Browse files
committed
bloomfilter: use libdivide to compute the bit location
1 parent 120768e commit 575ca49

File tree

2 files changed

+30
-9
lines changed

2 files changed

+30
-9
lines changed

src/Interpreters/BloomFilter.cpp

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <DataTypes/DataTypeArray.h>
77
#include <DataTypes/DataTypeNullable.h>
88
#include <DataTypes/DataTypeLowCardinality.h>
9+
#include <libdivide.h>
910

1011

1112
namespace DB
@@ -39,7 +40,8 @@ BloomFilter::BloomFilter(const BloomFilterParameters & params)
3940
}
4041

4142
BloomFilter::BloomFilter(size_t size_, size_t hashes_, size_t seed_)
42-
: size(size_), hashes(hashes_), seed(seed_), words((size + sizeof(UnderType) - 1) / sizeof(UnderType)), filter(words, 0)
43+
: size(size_), hashes(hashes_), seed(seed_), words((size + sizeof(UnderType) - 1) / sizeof(UnderType)),
44+
modulus(8 * size_), divider(modulus), filter(words, 0)
4345
{
4446
chassert(size != 0);
4547
chassert(hashes != 0);
@@ -49,6 +51,8 @@ void BloomFilter::resize(size_t size_)
4951
{
5052
size = size_;
5153
words = ((size + sizeof(UnderType) - 1) / sizeof(UnderType));
54+
modulus = 8 * size;
55+
divider = libdivide::divider<size_t, libdivide::BRANCHFREE>(modulus);
5256
filter.resize(words);
5357
}
5458

@@ -57,11 +61,16 @@ bool BloomFilter::find(const char * data, size_t len)
5761
size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed);
5862
size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B);
5963

64+
// acc = hash1 + hash2 * i
65+
size_t acc = hash1;
66+
6067
for (size_t i = 0; i < hashes; ++i)
6168
{
62-
size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
63-
if (!(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType))))))
69+
// pos = (hash1 + hash2*i + i*i) % (8 * size)
70+
size_t pos = fastMod(acc + i*i);
71+
if (!(filter[pos / word_bits] & (1ULL << (pos % word_bits))))
6472
return false;
73+
acc += hash2;
6574
}
6675
return true;
6776
}
@@ -71,10 +80,15 @@ void BloomFilter::add(const char * data, size_t len)
7180
size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed);
7281
size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B);
7382

83+
// acc = hash1 + hash2 * i
84+
size_t acc = hash1;
85+
7486
for (size_t i = 0; i < hashes; ++i)
7587
{
76-
size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
77-
filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
88+
// pos = (hash1 + hash2*i + i*i) % (8 * size)
89+
size_t pos = fastMod(acc + i*i);
90+
filter[pos / word_bits] |= (1ULL << (pos % word_bits));
91+
acc += hash2;
7892
}
7993
}
8094

@@ -116,14 +130,14 @@ bool operator== (const BloomFilter & a, const BloomFilter & b)
116130

117131
void BloomFilter::addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed)
118132
{
119-
size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)) % (8 * size);
120-
filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
133+
size_t pos = fastMod(CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)));
134+
filter[pos / word_bits] |= (1ULL << (pos % word_bits));
121135
}
122136

123137
bool BloomFilter::findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed)
124138
{
125-
size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)) % (8 * size);
126-
return bool(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType)))));
139+
size_t pos = fastMod(CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)));
140+
return bool(filter[pos / word_bits] & (1ULL << (pos % word_bits)));
127141
}
128142

129143
DataTypePtr BloomFilter::getPrimitiveType(const DataTypePtr & data_type)

src/Interpreters/BloomFilter.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <base/types.h>
44
#include <Columns/IColumn_fwd.h>
55
#include <DataTypes/IDataType.h>
6+
#include <libdivide.h>
67

78
#include <vector>
89

@@ -57,12 +58,18 @@ class BloomFilter
5758
friend bool operator== (const BloomFilter & a, const BloomFilter & b);
5859
private:
5960

61+
static constexpr size_t word_bits = 8 * sizeof(UnderType);
62+
6063
size_t size;
6164
size_t hashes;
6265
size_t seed;
6366
size_t words;
67+
size_t modulus; /// 8 * size, cached for fast modulo.
68+
libdivide::divider<size_t, libdivide::BRANCHFREE> divider; /// Divider for fast modulo by modulus.
6469
Container filter;
6570

71+
inline size_t fastMod(size_t value) const { return value - (value / divider) * modulus; }
72+
6673
public:
6774
static ColumnPtr getPrimitiveColumn(const ColumnPtr & column);
6875
static DataTypePtr getPrimitiveType(const DataTypePtr & data_type);

0 commit comments

Comments
 (0)