Skip to content

Commit 60a7c50

Browse files
committed
Speed up Bloom filter construction (a bit better)
1 parent c9a5876 commit 60a7c50

File tree

1 file changed

+16
-18
lines changed

1 file changed

+16
-18
lines changed

src/bloom.h

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -95,41 +95,39 @@ Status BloomFilter<ItemType, bits_per_item, HashFamily, k>::Add(
9595
return Ok;
9696
}
9797

98-
#define BLOCK_SHIFT 18
99-
#define BLOCK_LEN (1 << BLOCK_SHIFT)
100-
101-
void applyBlock(uint64_t* tmp, int block, int len, uint64_t *data) {
102-
for (int i = 0; i < len; i += 2) {
103-
int index = tmp[(block << BLOCK_SHIFT) + i];
104-
uint64_t bits = tmp[(block << BLOCK_SHIFT) + i + 1];
105-
data[index] |= bits;
98+
const int blockShift = 15;
99+
const int blockLen = 1 << blockShift;
100+
101+
void applyBlock(uint32_t* tmp, int block, int len, uint64_t *data) {
102+
for (int i = 0; i < len; i++) {
103+
uint32_t index = tmp[(block << blockShift) + i];
104+
data[index >> 6] |= getBit(index);
106105
}
107106
}
108107

109108
template <typename ItemType, size_t bits_per_item,
110109
typename HashFamily, int k>
111110
Status BloomFilter<ItemType, bits_per_item, HashFamily, k>::AddAll(
112111
const vector<ItemType> keys, const size_t start, const size_t end) {
113-
int blocks = 1 + arrayLength / BLOCK_LEN;
114-
uint64_t* tmp = new uint64_t[blocks * BLOCK_LEN];
112+
int blocks = 1 + arrayLength / blockLen;
113+
uint32_t* tmp = new uint32_t[blocks * blockLen];
115114
int* tmpLen = new int[blocks]();
116115
for(size_t i = start; i < end; i++) {
117116
uint64_t key = keys[i];
118117
uint64_t hash = hasher(key);
119118
uint32_t a = (uint32_t) (hash >> 32);
120-
uint32_t bb = (uint32_t) hash;
119+
uint32_t b = (uint32_t) hash;
121120
for (int j = 0; j < k; j++) {
122121
int index = reduce(a, this->arrayLength);
123-
int block = index >> BLOCK_SHIFT;
122+
int block = index >> blockShift;
124123
int len = tmpLen[block];
125-
tmp[(block << BLOCK_SHIFT) + len] = index;
126-
tmp[(block << BLOCK_SHIFT) + len + 1] = getBit(a);
127-
tmpLen[block] = len + 2;
128-
if (len + 2 == BLOCK_LEN) {
129-
applyBlock(tmp, block, len + 2, data);
124+
tmp[(block << blockShift) + len] = (index << 6) + (a & 63);
125+
tmpLen[block] = len + 1;
126+
if (len + 1 == blockLen) {
127+
applyBlock(tmp, block, len + 1, data);
130128
tmpLen[block] = 0;
131129
}
132-
a += bb;
130+
a += b;
133131
}
134132
}
135133
for (int block = 0; block < blocks; block++) {

0 commit comments

Comments
 (0)