66#include < DataTypes/DataTypeArray.h>
77#include < DataTypes/DataTypeNullable.h>
88#include < DataTypes/DataTypeLowCardinality.h>
9+ #include < libdivide.h>
910
1011
1112namespace DB
@@ -39,7 +40,8 @@ BloomFilter::BloomFilter(const BloomFilterParameters & params)
3940}
4041
4142BloomFilter::BloomFilter (size_t size_, size_t hashes_, size_t seed_)
42- : size(size_), hashes(hashes_), seed(seed_), words((size + sizeof (UnderType) - 1 ) / sizeof (UnderType)), filter(words, 0 )
43+ : size(size_), hashes(hashes_), seed(seed_), words((size + sizeof (UnderType) - 1 ) / sizeof (UnderType)),
44+ modulus (8 * size_), divider(modulus), filter(words, 0 )
4345{
4446 chassert (size != 0 );
4547 chassert (hashes != 0 );
@@ -49,6 +51,8 @@ void BloomFilter::resize(size_t size_)
4951{
5052 size = size_;
5153 words = ((size + sizeof (UnderType) - 1 ) / sizeof (UnderType));
54+ modulus = 8 * size;
55+ divider = libdivide::divider<size_t , libdivide::BRANCHFREE>(modulus);
5256 filter.resize (words);
5357}
5458
@@ -57,11 +61,15 @@ bool BloomFilter::find(const char * data, size_t len)
5761 size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed (data, len, seed);
5862 size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed (data, len, SEED_GEN_A * seed + SEED_GEN_B);
5963
64+ size_t acc = hash1;
6065 for (size_t i = 0 ; i < hashes; ++i)
6166 {
62- size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
63- if (!(filter[pos / (8 * sizeof (UnderType))] & (1ULL << (pos % (8 * sizeof (UnderType))))))
67+ // / It accumulates in the loop as follows:
68+ // / pos = (hash1 + hash2 * i + i * i) % (8 * size)
69+ size_t pos = fastMod (acc + i * i);
70+ if (!(filter[pos / word_bits] & (1ULL << (pos % word_bits))))
6471 return false ;
72+ acc += hash2;
6573 }
6674 return true ;
6775}
@@ -71,10 +79,14 @@ void BloomFilter::add(const char * data, size_t len)
7179 size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed (data, len, seed);
7280 size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed (data, len, SEED_GEN_A * seed + SEED_GEN_B);
7381
82+ size_t acc = hash1;
7483 for (size_t i = 0 ; i < hashes; ++i)
7584 {
76- size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
77- filter[pos / (8 * sizeof (UnderType))] |= (1ULL << (pos % (8 * sizeof (UnderType))));
85+ // / It accumulates in the loop as follows:
86+ // / pos = (hash1 + hash2 * i + i * i) % (8 * size)
87+ size_t pos = fastMod (acc + i * i);
88+ filter[pos / word_bits] |= (1ULL << (pos % word_bits));
89+ acc += hash2;
7890 }
7991}
8092
@@ -116,14 +128,14 @@ bool operator== (const BloomFilter & a, const BloomFilter & b)
116128
117129void BloomFilter::addHashWithSeed (const UInt64 & hash, const UInt64 & hash_seed)
118130{
119- size_t pos = CityHash_v1_0_2::Hash128to64 (CityHash_v1_0_2::uint128 (hash, hash_seed)) % ( 8 * size );
120- filter[pos / ( 8 * sizeof (UnderType)) ] |= (1ULL << (pos % ( 8 * sizeof (UnderType)) ));
131+ size_t pos = fastMod ( CityHash_v1_0_2::Hash128to64 (CityHash_v1_0_2::uint128 (hash, hash_seed)));
132+ filter[pos / word_bits ] |= (1ULL << (pos % word_bits ));
121133}
122134
123135bool BloomFilter::findHashWithSeed (const UInt64 & hash, const UInt64 & hash_seed)
124136{
125- size_t pos = CityHash_v1_0_2::Hash128to64 (CityHash_v1_0_2::uint128 (hash, hash_seed)) % ( 8 * size );
126- return bool (filter[pos / ( 8 * sizeof (UnderType)) ] & (1ULL << (pos % ( 8 * sizeof (UnderType)) )));
137+ size_t pos = fastMod ( CityHash_v1_0_2::Hash128to64 (CityHash_v1_0_2::uint128 (hash, hash_seed)));
138+ return bool (filter[pos / word_bits ] & (1ULL << (pos % word_bits )));
127139}
128140
129141DataTypePtr BloomFilter::getPrimitiveType (const DataTypePtr & data_type)
0 commit comments