Skip to content

Commit 19e7cb5

Browse files
Merge pull request #29 from TomerEven/master
Adding the Prefix Filter
2 parents 1d5cd72 + 3d59764 commit 19e7cb5

File tree

7 files changed

+1730
-10
lines changed

7 files changed

+1730
-10
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ The filter implementations are in `src/<type>/`. Most implementations depend on
8686

8787
- [Binary Fuse Filters: Fast and Smaller Than Xor Filters](http://arxiv.org/abs/2201.01174), Journal of Experimental Algorithmics (to appear).
8888
- [Xor Filters: Faster and Smaller Than Bloom and Cuckoo Filters](https://arxiv.org/abs/1912.08258), Journal of Experimental Algorithmics 25 (1), 2020
89-
89+
- [Prefix Filter: Practically and Theoretically Better Than Bloom](https://arxiv.org/abs/2203.17139), PVLDB 15(7), 2022.
9090

9191
## Credit
9292

@@ -96,6 +96,7 @@ The Morton filter is from https://github.com/AMDComputeLibraries/morton_filter.
9696
The counting quotient filter (CQF) is from https://github.com/splatlab/cqf.
9797
The vector quotient filter is from https://github.com/splatlab/vqf.
9898
The ribbon filters are from https://github.com/pdillinger/fastfilter_cpp.
99+
The prefix filter is from https://github.com/TomerEven/Prefix-Filter.
99100

100101

101102
# Implementations of xor and binary fuse filters in other programming languages

benchmarks/bulk-insert-and-query.cc

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,12 @@ int main(int argc, char * argv[]) {
359359
// CQF + VQF
360360
{30,"CQF"},
361361
{31,"VQF"},
362+
// TwoChoicer
363+
{32,"TwoChoice"},
364+
// Prefix
365+
{35,"PF[TC]"},
366+
{36,"PF[CF-12-Flex]"},
367+
{37,"PF[BBF-Flex]"},
362368
#endif
363369
// Bloom
364370
{40, "Bloom8"}, {41, "Bloom12" }, {42, "Bloom16"},
@@ -732,11 +738,42 @@ int main(int argc, char * argv[]) {
732738
}
733739
a = 31;
734740
if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
735-
auto cf = FilterBenchmark<
741+
auto cf = FilterBenchmark<
736742
VQFilter<uint64_t, SimpleMixSplit>>(
737743
add_count, to_add, intersectionsize, mixed_sets, true, false);
738744
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
739-
}
745+
}
746+
#endif
747+
#if __PF_AVX512__
748+
a = 32;
749+
if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
750+
auto cf = FilterBenchmark<
751+
TC_shortcut<SimpleMixSplit>>(
752+
add_count, to_add, intersectionsize, mixed_sets, false, false /* set to true to support deletions. */);
753+
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
754+
}
755+
// Prefix ---------------------------------------------------------
756+
a = 35;
757+
if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
758+
auto cf = FilterBenchmark<
759+
Prefix_Filter<TC_shortcut<SimpleMixSplit>>>(
760+
add_count, to_add, intersectionsize, mixed_sets, false, false);
761+
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
762+
}
763+
a = 36;
764+
if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
765+
auto cf = FilterBenchmark<
766+
Prefix_Filter<CuckooFilterStable<uint64_t, 12, SingleTable, SimpleMixSplit>>>(
767+
add_count, to_add, intersectionsize, mixed_sets, false, false);
768+
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
769+
}
770+
a = 37;
771+
if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
772+
auto cf = FilterBenchmark<
773+
Prefix_Filter<SimdBlockFilterFixed<SimpleMixSplit>>>(
774+
add_count, to_add, intersectionsize, mixed_sets, false, false);
775+
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
776+
}
740777
#endif
741778

742779
// Bloom ----------------------------------------------------------

benchmarks/filterapi.h

Lines changed: 244 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@
2727
#include "vqf_cpp.h"
2828
#include "simd-block.h"
2929
#endif
30+
#define __PF_AVX512__ (__AVX512BW__ & __AVX512VL__ & __AVX512CD__ & __AVX512DQ__)
31+
#if __PF_AVX512__
32+
#include "prefix/min_pd256.hpp"
33+
#include "tc-shortcut/tc-shortcut.hpp"
34+
#endif
3035
#include "simd-block-fixed-fpp.h"
3136
#include "ribbon_impl.h"
3237

@@ -206,6 +211,244 @@ struct FilterAPI<SimdBlockFilterFixed<HashFamily>> {
206211
return table->Find(key);
207212
}
208213
};
214+
215+
#endif
216+
#if __PF_AVX512__
217+
template<typename HashFamily>
218+
struct FilterAPI<TC_shortcut<HashFamily>> {
219+
using Table = TC_shortcut<HashFamily>;
220+
221+
static Table ConstructFromAddCount(size_t add_count) {
222+
constexpr float load = .935;
223+
return Table(add_count, load);
224+
}
225+
static void Add(uint64_t key, Table *table) {
226+
if (!table->insert(key)) {
227+
std::cout << table->info() << std::endl;
228+
throw std::logic_error(table->get_name() + " is too small to hold all of the elements");
229+
}
230+
}
231+
static void AddAll(const vector<uint64_t>& keys, const size_t start, const size_t end, Table* table) {
232+
for(size_t i = start; i < end; i++) { Add(keys[i],table); }
233+
}
234+
235+
static bool Add_attempt(uint64_t key, Table *table) {
236+
if (!table->insert(key)) {
237+
std::cout << "load when failed: \t" << table->get_effective_load() << std::endl;
238+
std::cout << table->info() << std::endl;
239+
return false;
240+
}
241+
return true;
242+
}
243+
244+
245+
static void Remove(uint64_t key, Table *table) {
246+
table->remove(key);
247+
}
248+
CONTAIN_ATTRIBUTES static bool Contain(uint64_t key, const Table *table){
249+
return table->lookup(key);
250+
}
251+
};
252+
253+
254+
255+
template<typename Table>
256+
inline size_t get_l2_slots(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) {
257+
const double expected_items_reaching_next_level = l1_items * overflowing_items_ratio;
258+
size_t slots_in_l2 = (expected_items_reaching_next_level / loads[1]);
259+
return slots_in_l2;
260+
}
261+
262+
template<>
263+
inline size_t get_l2_slots<cuckoofilter::CuckooFilterStable<u64, 12>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) {
264+
constexpr auto expected_items100 = 0.07952;
265+
constexpr auto expected_items95 = 0.0586;
266+
constexpr auto spare_workload = 0.94;
267+
constexpr auto safety = 1.08;
268+
constexpr auto factor95 = safety * expected_items95 / spare_workload;
269+
const double expected_items_reaching_next_level = l1_items * factor95;
270+
return expected_items_reaching_next_level;
271+
}
272+
273+
template<>
274+
inline size_t get_l2_slots<TC_shortcut<>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) {
275+
constexpr auto expected_items100 = 0.07952;
276+
constexpr auto safety = 1.08;
277+
constexpr auto expected_items95 = 0.0586;
278+
constexpr auto spare_workload = 0.935;
279+
constexpr auto factor95 = safety * expected_items95 / spare_workload;
280+
const double expected_items_reaching_next_level = l1_items * factor95;
281+
size_t slots_in_l2 = std::ceil(expected_items_reaching_next_level);
282+
return slots_in_l2;
283+
}
284+
285+
286+
template<>
287+
inline size_t get_l2_slots<SimdBlockFilter<>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) {
288+
const double expected_items_reaching_next_level = l1_items * overflowing_items_ratio;
289+
size_t slots_in_l2 = (expected_items_reaching_next_level / loads[1]);
290+
return slots_in_l2 * 4;
291+
}
292+
293+
template<>
294+
inline size_t get_l2_slots<SimdBlockFilterFixed<>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) {
295+
const double expected_items_reaching_next_level = l1_items * overflowing_items_ratio;
296+
size_t slots_in_l2 = (expected_items_reaching_next_level / loads[1]);
297+
return slots_in_l2 * 2;
298+
}
299+
300+
301+
template<typename Table, typename HashFamily = hashing::TwoIndependentMultiplyShift>
302+
class Prefix_Filter {
303+
const size_t filter_max_capacity;
304+
const size_t number_of_pd;
305+
size_t cap[2] = {0};
306+
307+
hashing::TwoIndependentMultiplyShift Hasher, H0;
308+
__m256i *pd_array;
309+
Table GenSpare;
310+
311+
static double constexpr overflowing_items_ratio = 0.0586;// = expected_items95
312+
313+
public:
314+
Prefix_Filter(size_t max_items, const float loads[2])
315+
: filter_max_capacity(max_items),
316+
number_of_pd(std::ceil(1.0 * max_items / (min_pd::MAX_CAP0 * loads[0]))),
317+
GenSpare(FilterAPI<Table>::ConstructFromAddCount(get_l2_slots<Table>(max_items, overflowing_items_ratio, loads))),
318+
Hasher(), H0() {
319+
320+
int ok = posix_memalign((void **) &pd_array, 32, 32 * number_of_pd);
321+
if (ok != 0) {
322+
std::cout << "Space allocation failed!" << std::endl;
323+
assert(false);
324+
exit(-3);
325+
}
326+
327+
constexpr uint64_t pd256_plus_init_header = (((INT64_C(1) << min_pd::QUOTS) - 1) << 6) | 32;
328+
for (size_t i = 0; i < number_of_pd; i++){
329+
pd_array[i] = __m256i{pd256_plus_init_header, 0, 0, 0};
330+
}
331+
332+
}
333+
334+
~Prefix_Filter() {
335+
free(pd_array);
336+
}
337+
338+
__attribute__((always_inline)) inline static constexpr uint32_t reduce32(uint32_t hash, uint32_t n) {
339+
// http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
340+
return (uint32_t) (((uint64_t) hash * n) >> 32);
341+
}
342+
343+
344+
__attribute__((always_inline)) inline static constexpr uint16_t fixed_reduce(uint16_t hash) {
345+
// http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
346+
return (uint16_t) (((uint32_t) hash * 6400) >> 16);
347+
}
348+
349+
350+
inline auto Find(const u64 &item) const -> bool {
351+
const u64 s = H0(item);
352+
uint32_t out1 = s >> 32u, out2 = s;
353+
const uint32_t pd_index = reduce32(out1, (uint32_t) number_of_pd);
354+
const uint16_t qr = fixed_reduce(out2);
355+
const int64_t quot = qr >> 8;
356+
const uint8_t rem = qr;
357+
// return min_pd::pd_find_25(quot, rem, &pd_array[pd_index]);
358+
// return (!min_pd::cmp_qr1(qr, &pd_array[pd_index])) ? min_pd::pd_find_25(quot, rem, &pd_array[pd_index])
359+
return (!min_pd::cmp_qr1(qr, &pd_array[pd_index])) ? min_pd::find_core(quot, rem, &pd_array[pd_index])
360+
: incSpare_lookup(pd_index, qr);
361+
}
362+
363+
inline auto incSpare_lookup(size_t pd_index, u16 qr) const -> bool {
364+
const u64 data = (pd_index << 13u) | qr;
365+
return FilterAPI<Table>::Contain(data, &GenSpare);
366+
}
367+
368+
inline void incSpare_add(size_t pd_index, const min_pd::add_res &a_info) {
369+
cap[1]++;
370+
u16 qr = (((u16) a_info.quot) << 8u) | a_info.rem;
371+
const u64 data = (pd_index << 13u) | qr;
372+
return FilterAPI<Table>::Add(data, &GenSpare);
373+
}
374+
375+
void Add(const u64 &item) {
376+
const u64 s = H0(item);
377+
constexpr u64 full_mask = (1ULL << 55);
378+
uint32_t out1 = s >> 32u, out2 = s;
379+
380+
const uint32_t pd_index = reduce32(out1, (uint32_t) number_of_pd);
381+
382+
auto pd = pd_array + pd_index;
383+
const uint64_t header = reinterpret_cast<const u64 *>(pd)[0];
384+
const bool not_full = !(header & full_mask);
385+
386+
const uint16_t qr = fixed_reduce(out2);
387+
const int64_t quot = qr >> 8;
388+
const uint8_t rem = qr;
389+
390+
if (not_full) {
391+
cap[0]++;
392+
assert(!min_pd::is_pd_full(pd));
393+
size_t end = min_pd::pd_select64(header >> 6, quot);
394+
const size_t h_index = end + 6;
395+
const u64 mask = _bzhi_u64(-1, h_index);
396+
const u64 lo = header & mask;
397+
const u64 hi = ((header & ~mask) << 1u);// & h_mask;
398+
assert(!(lo & hi));
399+
const u64 h7 = lo | hi;
400+
memcpy(pd, &h7, 7);
401+
402+
const size_t body_index = end - quot;
403+
min_pd::body_add_case0_avx(body_index, rem, pd);
404+
assert(min_pd::find_core(quot, rem, pd));
405+
assert(Find(item));
406+
return;
407+
} else {
408+
auto add_res = min_pd::new_pd_swap_short(quot, rem, pd);
409+
incSpare_add(pd_index, add_res);
410+
assert(Find(item));
411+
}
412+
}
413+
414+
size_t SizeInBytes() const{
415+
size_t l1 = sizeof(__m256i) * number_of_pd;
416+
size_t l2 = GenSpare.SizeInBytes();
417+
auto res = l1 + l2;
418+
return res;
419+
}
420+
421+
};
422+
423+
424+
template<typename filterTable>
425+
struct FilterAPI<Prefix_Filter<filterTable>> {
426+
using Table = Prefix_Filter<filterTable>;
427+
428+
static Table ConstructFromAddCount(size_t add_count) {
429+
constexpr float loads[2] = {.95, .95};
430+
return Table(add_count, loads);
431+
}
432+
433+
static void Add(u64 key, Table *table) {
434+
table->Add(key);
435+
}
436+
437+
static void AddAll(const vector<uint64_t>& keys, const size_t start, const size_t end, Table* table) {
438+
for(size_t i = start; i < end; i++) { Add(keys[i],table); }
439+
}
440+
441+
static void Remove(u64 key, Table *table) {
442+
throw std::runtime_error("Unsupported");
443+
}
444+
445+
CONTAIN_ATTRIBUTES static bool Contain(u64 key, const Table *table) {
446+
return table->Find(key);
447+
}
448+
449+
};
450+
451+
209452
#endif
210453

211454
#ifdef __SSE41__
@@ -969,4 +1212,4 @@ struct FilterAPI<SuccinctCountingBlockedBloomRankFilter<ItemType, bits_per_item,
9691212
};
9701213

9711214

972-
#endif
1215+
#endif

src/hashutil.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,9 @@
1212
namespace hashing {
1313
// See Martin Dietzfelbinger, "Universal hashing and k-wise independent random
1414
// variables via integer arithmetic without primes".
15-
/*
16-
class TwoIndependentMultiplyShift {
1715

16+
class TwoIndependentMultiplyShift {
1817
unsigned __int128 multiply_, add_;
19-
2018
public:
2119
TwoIndependentMultiplyShift() {
2220
::std::random_device random;
@@ -28,13 +26,12 @@ class TwoIndependentMultiplyShift {
2826
}
2927
}
3028
}
31-
3229
inline uint64_t operator()(uint64_t key) const {
3330
return (add_ + multiply_ * static_cast<decltype(multiply_)>(key)) >> 64;
3431
}
3532

3633
};
37-
*/
34+
3835

3936
class SimpleMixSplit {
4037

@@ -63,4 +60,4 @@ class SimpleMixSplit {
6360

6461
}
6562

66-
#endif // CUCKOO_FILTER_HASHUTIL_H_
63+
#endif // CUCKOO_FILTER_HASHUTIL_H_

0 commit comments

Comments
 (0)