Skip to content

Commit e211632

Browse files
authored
Merge branch 'customizations/24.8.14' into backport/24.8.14/58934
2 parents 0693d07 + 5bcf1e6 commit e211632

File tree

2 files changed

+33
-14
lines changed

2 files changed

+33
-14
lines changed

src/Interpreters/BloomFilter.cpp

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <DataTypes/DataTypeArray.h>
77
#include <DataTypes/DataTypeNullable.h>
88
#include <DataTypes/DataTypeLowCardinality.h>
9+
#include <libdivide.h>
910

1011

1112
namespace DB
@@ -39,7 +40,8 @@ BloomFilter::BloomFilter(const BloomFilterParameters & params)
3940
}
4041

4142
BloomFilter::BloomFilter(size_t size_, size_t hashes_, size_t seed_)
42-
: size(size_), hashes(hashes_), seed(seed_), words((size + sizeof(UnderType) - 1) / sizeof(UnderType)), filter(words, 0)
43+
: size(size_), hashes(hashes_), seed(seed_), words((size + sizeof(UnderType) - 1) / sizeof(UnderType)),
44+
modulus(8 * size_), divider(modulus), filter(words, 0)
4345
{
4446
chassert(size != 0);
4547
chassert(hashes != 0);
@@ -49,6 +51,8 @@ void BloomFilter::resize(size_t size_)
4951
{
5052
size = size_;
5153
words = ((size + sizeof(UnderType) - 1) / sizeof(UnderType));
54+
modulus = 8 * size;
55+
divider = libdivide::divider<size_t, libdivide::BRANCHFREE>(modulus);
5256
filter.resize(words);
5357
}
5458

@@ -57,11 +61,15 @@ bool BloomFilter::find(const char * data, size_t len)
5761
size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed);
5862
size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B);
5963

64+
size_t acc = hash1;
6065
for (size_t i = 0; i < hashes; ++i)
6166
{
62-
size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
63-
if (!(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType))))))
67+
/// It accumulates in the loop as follows:
68+
/// pos = (hash1 + hash2 * i + i * i) % (8 * size)
69+
size_t pos = fastMod(acc + i * i);
70+
if (!(filter[pos / word_bits] & (1ULL << (pos % word_bits))))
6471
return false;
72+
acc += hash2;
6573
}
6674
return true;
6775
}
@@ -71,10 +79,14 @@ void BloomFilter::add(const char * data, size_t len)
7179
size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed);
7280
size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B);
7381

82+
size_t acc = hash1;
7483
for (size_t i = 0; i < hashes; ++i)
7584
{
76-
size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
77-
filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
85+
/// It accumulates in the loop as follows:
86+
/// pos = (hash1 + hash2 * i + i * i) % (8 * size)
87+
size_t pos = fastMod(acc + i * i);
88+
filter[pos / word_bits] |= (1ULL << (pos % word_bits));
89+
acc += hash2;
7890
}
7991
}
8092

@@ -111,14 +123,14 @@ bool operator== (const BloomFilter & a, const BloomFilter & b)
111123

112124
void BloomFilter::addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed)
113125
{
114-
size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)) % (8 * size);
115-
filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
126+
size_t pos = fastMod(CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)));
127+
filter[pos / word_bits] |= (1ULL << (pos % word_bits));
116128
}
117129

118130
bool BloomFilter::findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed)
119131
{
120-
size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)) % (8 * size);
121-
return bool(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType)))));
132+
size_t pos = fastMod(CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)));
133+
return bool(filter[pos / word_bits] & (1ULL << (pos % word_bits)));
122134
}
123135

124136
DataTypePtr BloomFilter::getPrimitiveType(const DataTypePtr & data_type)

src/Interpreters/BloomFilter.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
#pragma once
22

3-
#include <vector>
43
#include <base/types.h>
5-
#include <Core/Field.h>
6-
#include <Common/PODArray.h>
7-
#include <Common/Allocator.h>
84
#include <Columns/IColumn.h>
9-
#include <Columns/ColumnVector.h>
105
#include <DataTypes/IDataType.h>
6+
#include <libdivide.h>
7+
8+
//#include <vector>
9+
//#include <Common/PODArray.h>
10+
//#include <Common/Allocator.h>
11+
//#include <Columns/ColumnVector.h>
1112

1213

1314
namespace DB
@@ -58,12 +59,18 @@ class BloomFilter
5859
friend bool operator== (const BloomFilter & a, const BloomFilter & b);
5960
private:
6061

62+
static constexpr size_t word_bits = 8 * sizeof(UnderType);
63+
6164
size_t size;
6265
size_t hashes;
6366
size_t seed;
6467
size_t words;
68+
size_t modulus; /// 8 * size, cached for fast modulo.
69+
libdivide::divider<size_t, libdivide::BRANCHFREE> divider; /// Divider for fast modulo by modulus.
6570
Container filter;
6671

72+
inline size_t fastMod(size_t value) const { return value - (value / divider) * modulus; }
73+
6774
public:
6875
static ColumnPtr getPrimitiveColumn(const ColumnPtr & column);
6976
static DataTypePtr getPrimitiveType(const DataTypePtr & data_type);

0 commit comments

Comments
 (0)