Skip to content

Commit e7ef113

Browse files
committed
Blocked Bloom filter: bulk insert (faster only in the 100m case, and only about 10%)
1 parent d902530 commit e7ef113

File tree

1 file changed

+47
-0
lines changed

1 file changed

+47
-0
lines changed

src/simd-block-fixed-fpp.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ class SimdBlockFilterFixed {
5656
explicit SimdBlockFilterFixed(const int bits);
5757
~SimdBlockFilterFixed() noexcept;
5858
void Add(const uint64_t key) noexcept;
59+
60+
// Add multiple items to the filter.
61+
void AddAll(const vector<uint64_t> data, const size_t start, const size_t end);
62+
5963
bool Find(const uint64_t key) const noexcept;
6064
uint64_t SizeInBytes() const { return sizeof(Bucket) * bucketCount; }
6165

@@ -64,6 +68,8 @@ class SimdBlockFilterFixed {
6468
// with 1 single 1-bit set in each 32-bit lane.
6569
static __m256i MakeMask(const uint32_t hash) noexcept;
6670

71+
void ApplyBlock(uint64_t* tmp, int block, int len);
72+
6773
};
6874

6975
template<typename HashFamily>
@@ -123,6 +129,47 @@ SimdBlockFilterFixed<HashFamily>::Add(const uint64_t key) noexcept {
123129
_mm256_store_si256(bucket, _mm256_or_si256(*bucket, mask));
124130
}
125131

132+
const int blockShift = 14;
133+
const int blockLen = 1 << blockShift;
134+
135+
template<typename HashFamily>
136+
void SimdBlockFilterFixed<HashFamily>::ApplyBlock(uint64_t* tmp, int block, int len) {
137+
for (int i = 0; i < len; i += 2) {
138+
uint64_t hash = tmp[(block << blockShift) + i];
139+
uint32_t bucket_idx = tmp[(block << blockShift) + i + 1];
140+
const __m256i mask = MakeMask(hash);
141+
__m256i* const bucket = &reinterpret_cast<__m256i*>(directory_)[bucket_idx];
142+
_mm256_store_si256(bucket, _mm256_or_si256(*bucket, mask));
143+
}
144+
}
145+
146+
template<typename HashFamily>
147+
void SimdBlockFilterFixed<HashFamily>::AddAll(
148+
const vector<uint64_t> keys, const size_t start, const size_t end) {
149+
int blocks = 1 + bucketCount / blockLen;
150+
uint64_t* tmp = new uint64_t[blocks * blockLen];
151+
int* tmpLen = new int[blocks]();
152+
for(size_t i = start; i < end; i++) {
153+
uint64_t key = keys[i];
154+
uint64_t hash = hasher_(key);
155+
uint32_t bucket_idx = reduce(rotl64(hash, 32), bucketCount);
156+
int block = bucket_idx >> blockShift;
157+
int len = tmpLen[block];
158+
tmp[(block << blockShift) + len] = hash;
159+
tmp[(block << blockShift) + len + 1] = bucket_idx;
160+
tmpLen[block] = len + 2;
161+
if (len + 2 == blockLen) {
162+
ApplyBlock(tmp, block, len + 1);
163+
tmpLen[block] = 0;
164+
}
165+
}
166+
for (int block = 0; block < blocks; block++) {
167+
ApplyBlock(tmp, block, tmpLen[block]);
168+
}
169+
delete[] tmp;
170+
delete[] tmpLen;
171+
}
172+
126173
template <typename HashFamily>
127174
[[gnu::always_inline]] inline bool
128175
SimdBlockFilterFixed<HashFamily>::Find(const uint64_t key) const noexcept {

0 commit comments

Comments
 (0)