66#include < DataTypes/DataTypeArray.h>
77#include < DataTypes/DataTypeNullable.h>
88#include < DataTypes/DataTypeLowCardinality.h>
9+ #include < libdivide.h>
910
1011
1112namespace DB
@@ -39,7 +40,8 @@ BloomFilter::BloomFilter(const BloomFilterParameters & params)
3940}
4041
4142BloomFilter::BloomFilter (size_t size_, size_t hashes_, size_t seed_)
42- : size(size_), hashes(hashes_), seed(seed_), words((size + sizeof (UnderType) - 1 ) / sizeof (UnderType)), filter(words, 0 )
43+ : size(size_), hashes(hashes_), seed(seed_), words((size + sizeof (UnderType) - 1 ) / sizeof (UnderType)),
44+ modulus (8 * size_), divider(modulus), filter(words, 0 )
4345{
4446 chassert (size != 0 );
4547 chassert (hashes != 0 );
@@ -49,6 +51,8 @@ void BloomFilter::resize(size_t size_)
4951{
5052 size = size_;
5153 words = ((size + sizeof (UnderType) - 1 ) / sizeof (UnderType));
54+ modulus = 8 * size;
55+ divider = libdivide::divider<size_t , libdivide::BRANCHFREE>(modulus);
5256 filter.resize (words);
5357}
5458
@@ -57,11 +61,16 @@ bool BloomFilter::find(const char * data, size_t len)
5761 size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed (data, len, seed);
5862 size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed (data, len, SEED_GEN_A * seed + SEED_GEN_B);
5963
64+ // acc = hash1 + hash2 * i
65+ size_t acc = hash1;
66+
6067 for (size_t i = 0 ; i < hashes; ++i)
6168 {
62- size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
63- if (!(filter[pos / (8 * sizeof (UnderType))] & (1ULL << (pos % (8 * sizeof (UnderType))))))
69+ // pos = (hash1 + hash2*i + i*i) % (8 * size)
70+ size_t pos = fastMod (acc + i*i);
71+ if (!(filter[pos / word_bits] & (1ULL << (pos % word_bits))))
6472 return false ;
73+ acc += hash2;
6574 }
6675 return true ;
6776}
@@ -71,10 +80,15 @@ void BloomFilter::add(const char * data, size_t len)
7180 size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed (data, len, seed);
7281 size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed (data, len, SEED_GEN_A * seed + SEED_GEN_B);
7382
83+ // acc = hash1 + hash2 * i
84+ size_t acc = hash1;
85+
7486 for (size_t i = 0 ; i < hashes; ++i)
7587 {
76- size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
77- filter[pos / (8 * sizeof (UnderType))] |= (1ULL << (pos % (8 * sizeof (UnderType))));
88+ // pos = (hash1 + hash2*i + i*i) % (8 * size)
89+ size_t pos = fastMod (acc + i*i);
90+ filter[pos / word_bits] |= (1ULL << (pos % word_bits));
91+ acc += hash2;
7892 }
7993}
8094
@@ -116,14 +130,14 @@ bool operator== (const BloomFilter & a, const BloomFilter & b)
116130
117131void BloomFilter::addHashWithSeed (const UInt64 & hash, const UInt64 & hash_seed)
118132{
119- size_t pos = CityHash_v1_0_2::Hash128to64 (CityHash_v1_0_2::uint128 (hash, hash_seed)) % ( 8 * size );
120- filter[pos / ( 8 * sizeof (UnderType)) ] |= (1ULL << (pos % ( 8 * sizeof (UnderType)) ));
133+ size_t pos = fastMod ( CityHash_v1_0_2::Hash128to64 (CityHash_v1_0_2::uint128 (hash, hash_seed)));
134+ filter[pos / word_bits ] |= (1ULL << (pos % word_bits ));
121135}
122136
123137bool BloomFilter::findHashWithSeed (const UInt64 & hash, const UInt64 & hash_seed)
124138{
125- size_t pos = CityHash_v1_0_2::Hash128to64 (CityHash_v1_0_2::uint128 (hash, hash_seed)) % ( 8 * size );
126- return bool (filter[pos / ( 8 * sizeof (UnderType)) ] & (1ULL << (pos % ( 8 * sizeof (UnderType)) )));
139+ size_t pos = fastMod ( CityHash_v1_0_2::Hash128to64 (CityHash_v1_0_2::uint128 (hash, hash_seed)));
140+ return bool (filter[pos / word_bits ] & (1ULL << (pos % word_bits )));
127141}
128142
129143DataTypePtr BloomFilter::getPrimitiveType (const DataTypePtr & data_type)
0 commit comments