Skip to content

Commit 7e244e0

Browse files
committed
Adding the Prefix-Filter to the repository. Another filter called TwoChoicer is added. the TwoChoicer is based on the power of two choices paradigm.
1 parent 1d5cd72 commit 7e244e0

File tree

7 files changed

+1790
-38
lines changed

7 files changed

+1790
-38
lines changed

benchmarks/bulk-insert-and-query.cc

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,12 @@ int main(int argc, char * argv[]) {
359359
// CQF + VQF
360360
{30,"CQF"},
361361
{31,"VQF"},
362+
// TwoChoicer
363+
{32,"TwoChoice"},
364+
// Prefix
365+
{35,"PF[TC]"},
366+
{36,"PF[CF-12-Flex]"},
367+
{37,"PF[BBF-Flex]"},
362368
#endif
363369
// Bloom
364370
{40, "Bloom8"}, {41, "Bloom12" }, {42, "Bloom16"},
@@ -731,12 +737,60 @@ int main(int argc, char * argv[]) {
731737
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
732738
}
733739
a = 31;
740+
// if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
741+
// auto cf = FilterBenchmark<
742+
// VQFilter<uint64_t, SimpleMixSplit>>(
743+
// add_count, to_add, intersectionsize, mixed_sets, true, false);
744+
// cout << setw(NAME_WIDTH) << names[a] << cf << endl;
745+
// }
746+
#endif
747+
#ifdef __PF_AVX512__
748+
a = 32;
734749
if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
735750
auto cf = FilterBenchmark<
736-
VQFilter<uint64_t, SimpleMixSplit>>(
737-
add_count, to_add, intersectionsize, mixed_sets, true, false);
751+
TC_shortcut<SimpleMixSplit>>(
752+
add_count, to_add, intersectionsize, mixed_sets, false, false /* set to true to support deletions. */);
738753
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
739-
}
754+
}
755+
// Prefix ---------------------------------------------------------
756+
a = 35;
757+
if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
758+
auto cf = FilterBenchmark<
759+
Prefix_Filter<TC_shortcut<SimpleMixSplit>>>(
760+
add_count, to_add, intersectionsize, mixed_sets, false, false);
761+
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
762+
}
763+
a = 36;
764+
if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
765+
auto cf = FilterBenchmark<
766+
Prefix_Filter<CuckooFilterStable<uint64_t, 12, SingleTable, SimpleMixSplit>>>(
767+
add_count, to_add, intersectionsize, mixed_sets, false, false);
768+
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
769+
}
770+
a = 37;
771+
if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
772+
auto cf = FilterBenchmark<
773+
Prefix_Filter<SimdBlockFilterFixed<SimpleMixSplit>>>(
774+
add_count, to_add, intersectionsize, mixed_sets, false, false);
775+
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
776+
}
777+
/*
778+
a = 38;
779+
if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
780+
using prefix_l2 = Prefix_Filter<TrivialFilter>;
781+
auto cf = FilterBenchmark<Prefix_Filter<prefix_l2>>(add_count, to_add, intersectionsize, mixed_sets, false, false);
782+
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
783+
}
784+
a = 39;
785+
if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
786+
using prefix_l2 = Prefix_Filter<TrivialFilter>;
787+
using prefix_l3 = Prefix_Filter<prefix_l2>;
788+
using prefix_l4 = Prefix_Filter<prefix_l3>;
789+
790+
auto cf = FilterBenchmark<prefix_l4>(add_count, to_add, intersectionsize, mixed_sets, false, false);
791+
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
792+
}
793+
*/
740794
#endif
741795

742796
// Bloom ----------------------------------------------------------

benchmarks/filterapi.h

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@
2727
#include "vqf_cpp.h"
2828
#include "simd-block.h"
2929
#endif
30+
#define __PF_AVX512__ (__AVX512CD__ & __AVX512DQ__ &__AVX512DQ__ &__AVX512DQ__)
31+
#ifdef __PF_AVX512__
32+
#include "prefix/min_pd256.hpp"
33+
#include "tcShortcut/TC-shortcut.hpp"
34+
#endif
3035
#include "simd-block-fixed-fpp.h"
3136
#include "ribbon_impl.h"
3237

@@ -206,6 +211,246 @@ struct FilterAPI<SimdBlockFilterFixed<HashFamily>> {
206211
return table->Find(key);
207212
}
208213
};
214+
215+
#endif
216+
#ifdef __PF_AVX512__
217+
template<typename HashFamily>
218+
struct FilterAPI<TC_shortcut<HashFamily>> {
219+
using Table = TC_shortcut<HashFamily>;
220+
221+
static Table ConstructFromAddCount(size_t add_count) {
222+
constexpr float load = .935;
223+
return Table(add_count, load);
224+
}
225+
static void Add(uint64_t key, Table *table) {
226+
if (!table->insert(key)) {
227+
std::cout << table->info() << std::endl;
228+
// std::cout << "max_load: \t" << 0.945 << std::endl;
229+
throw std::logic_error(table->get_name() + " is too small to hold all of the elements");
230+
}
231+
}
232+
static void AddAll(const vector<uint64_t>& keys, const size_t start, const size_t end, Table* table) {
233+
for(size_t i = start; i < end; i++) { Add(keys[i],table); }
234+
}
235+
236+
static bool Add_attempt(uint64_t key, Table *table) {
237+
if (!table->insert(key)) {
238+
std::cout << "load when failed: \t" << table->get_effective_load() << std::endl;
239+
std::cout << table->info() << std::endl;
240+
return false;
241+
}
242+
return true;
243+
}
244+
245+
246+
static void Remove(uint64_t key, Table *table) {
247+
table->remove(key);
248+
}
249+
CONTAIN_ATTRIBUTES static bool Contain(uint64_t key, const Table *table){
250+
return table->lookup(key);
251+
}
252+
};
253+
254+
255+
256+
template<typename Table>
257+
inline size_t get_l2_slots(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) {
258+
const double expected_items_reaching_next_level = l1_items * overflowing_items_ratio;
259+
size_t slots_in_l2 = (expected_items_reaching_next_level / loads[1]);
260+
return slots_in_l2;
261+
}
262+
263+
template<>
264+
inline size_t get_l2_slots<cuckoofilter::CuckooFilterStable<u64, 12>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) {
265+
constexpr auto expected_items100 = 0.07952;
266+
constexpr auto expected_items95 = 0.0586;
267+
constexpr auto spare_workload = 0.94;
268+
constexpr auto safety = 1.08;
269+
constexpr auto factor95 = safety * expected_items95 / spare_workload;
270+
const double expected_items_reaching_next_level = l1_items * factor95;
271+
return expected_items_reaching_next_level;
272+
}
273+
274+
template<>
275+
inline size_t get_l2_slots<TC_shortcut<>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) {
276+
constexpr auto expected_items100 = 0.07952;
277+
constexpr auto safety = 1.08;
278+
constexpr auto expected_items95 = 0.0586;
279+
constexpr auto spare_workload = 0.935;
280+
constexpr auto factor95 = safety * expected_items95 / spare_workload;
281+
const double expected_items_reaching_next_level = l1_items * factor95;
282+
size_t slots_in_l2 = std::ceil(expected_items_reaching_next_level);
283+
return slots_in_l2;
284+
}
285+
286+
287+
template<>
288+
inline size_t get_l2_slots<SimdBlockFilter<>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) {
289+
const double expected_items_reaching_next_level = l1_items * overflowing_items_ratio;
290+
size_t slots_in_l2 = (expected_items_reaching_next_level / loads[1]);
291+
return slots_in_l2 * 4;
292+
}
293+
294+
template<>
295+
inline size_t get_l2_slots<SimdBlockFilterFixed<>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) {
296+
const double expected_items_reaching_next_level = l1_items * overflowing_items_ratio;
297+
size_t slots_in_l2 = (expected_items_reaching_next_level / loads[1]);
298+
return slots_in_l2 * 2;
299+
}
300+
301+
302+
template<typename Table, typename HashFamily = TwoIndependentMultiplyShift>
303+
class Prefix_Filter {
304+
const size_t filter_max_capacity;
305+
const size_t number_of_pd;
306+
size_t cap[2] = {0};
307+
308+
hashing::TwoIndependentMultiplyShift Hasher, H0;
309+
__m256i *pd_array;
310+
Table GenSpare;
311+
312+
static double constexpr overflowing_items_ratio = 0.0586;// = expected_items95
313+
314+
public:
315+
Prefix_Filter(size_t max_items, const float loads[2])
316+
: filter_max_capacity(max_items),
317+
number_of_pd(std::ceil(1.0 * max_items / (min_pd::MAX_CAP0 * loads[0]))),
318+
GenSpare(FilterAPI<Table>::ConstructFromAddCount(get_l2_slots<Table>(max_items, overflowing_items_ratio, loads))),
319+
Hasher(), H0() {
320+
321+
int ok = posix_memalign((void **) &pd_array, 32, 32 * number_of_pd);
322+
if (ok != 0) {
323+
std::cout << "Space allocation failed!" << std::endl;
324+
assert(false);
325+
exit(-3);
326+
}
327+
328+
constexpr uint64_t pd256_plus_init_header = (((INT64_C(1) << min_pd::QUOTS) - 1) << 6) | 32;
329+
// std_fill<__m256i *, __m256i>(pd_array, pd_array + number_of_pd, __m256i{0, 0, 0, 0});
330+
for (size_t i = 0; i < number_of_pd; i++){
331+
pd_array[i] = __m256i{pd256_plus_init_header, 0, 0, 0};
332+
}
333+
334+
}
335+
336+
~Prefix_Filter() {
337+
free(pd_array);
338+
}
339+
340+
__attribute__((always_inline)) inline static constexpr uint32_t reduce32(uint32_t hash, uint32_t n) {
341+
// http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
342+
return (uint32_t) (((uint64_t) hash * n) >> 32);
343+
}
344+
345+
346+
__attribute__((always_inline)) inline static constexpr uint16_t fixed_reduce(uint16_t hash) {
347+
// http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
348+
return (uint16_t) (((uint32_t) hash * 6400) >> 16);
349+
}
350+
351+
352+
inline auto Find(const u64 &item) const -> bool {
353+
const u64 s = H0(item);
354+
uint32_t out1 = s >> 32u, out2 = s;
355+
const uint32_t pd_index = reduce32(out1, (uint32_t) number_of_pd);
356+
const uint16_t qr = fixed_reduce(out2);
357+
const int64_t quot = qr >> 8;
358+
const uint8_t rem = qr;
359+
// return min_pd::pd_find_25(quot, rem, &pd_array[pd_index]);
360+
// return (!min_pd::cmp_qr1(qr, &pd_array[pd_index])) ? min_pd::pd_find_25(quot, rem, &pd_array[pd_index])
361+
return (!min_pd::cmp_qr1(qr, &pd_array[pd_index])) ? min_pd::find_core(quot, rem, &pd_array[pd_index])
362+
: incSpare_lookup(pd_index, qr);
363+
}
364+
365+
inline auto incSpare_lookup(size_t pd_index, u16 qr) const -> bool {
366+
const u64 data = (pd_index << 13u) | qr;
367+
return FilterAPI<Table>::Contain(data, &GenSpare);
368+
}
369+
370+
inline void incSpare_add(size_t pd_index, const min_pd::add_res &a_info) {
371+
cap[1]++;
372+
u16 qr = (((u16) a_info.quot) << 8u) | a_info.rem;
373+
const u64 data = (pd_index << 13u) | qr;
374+
return FilterAPI<Table>::Add(data, &GenSpare);
375+
}
376+
377+
void Add(const u64 &item) {
378+
const u64 s = H0(item);
379+
constexpr u64 full_mask = (1ULL << 55);
380+
uint32_t out1 = s >> 32u, out2 = s;
381+
382+
const uint32_t pd_index = reduce32(out1, (uint32_t) number_of_pd);
383+
384+
auto pd = pd_array + pd_index;
385+
const uint64_t header = reinterpret_cast<const u64 *>(pd)[0];
386+
const bool not_full = !(header & full_mask);
387+
388+
const uint16_t qr = fixed_reduce(out2);
389+
const int64_t quot = qr >> 8;
390+
const uint8_t rem = qr;
391+
392+
if (not_full) {
393+
cap[0]++;
394+
assert(!min_pd::is_pd_full(pd));
395+
size_t end = min_pd::pd_select64(header >> 6, quot);
396+
const size_t h_index = end + 6;
397+
const u64 mask = _bzhi_u64(-1, h_index);
398+
const u64 lo = header & mask;
399+
const u64 hi = ((header & ~mask) << 1u);// & h_mask;
400+
assert(!(lo & hi));
401+
const u64 h7 = lo | hi;
402+
memcpy(pd, &h7, 7);
403+
404+
const size_t body_index = end - quot;
405+
min_pd::body_add_case0_avx(body_index, rem, pd);
406+
assert(min_pd::find_core(quot, rem, pd));
407+
assert(Find(item));
408+
return;
409+
} else {
410+
auto add_res = min_pd::new_pd_swap_short(quot, rem, pd);
411+
incSpare_add(pd_index, add_res);
412+
assert(Find(item));
413+
}
414+
}
415+
416+
size_t SizeInBytes() const{
417+
size_t l1 = sizeof(__m256i) * number_of_pd;
418+
size_t l2 = GenSpare.SizeInBytes();
419+
auto res = l1 + l2;
420+
return res;
421+
}
422+
423+
};
424+
425+
426+
template<typename filterTable>
427+
struct FilterAPI<Prefix_Filter<filterTable>> {
428+
using Table = Prefix_Filter<filterTable>;
429+
430+
static Table ConstructFromAddCount(size_t add_count) {
431+
constexpr float loads[2] = {.95, .95};
432+
return Table(add_count, loads);
433+
}
434+
435+
static void Add(u64 key, Table *table) {
436+
table->Add(key);
437+
}
438+
439+
static void AddAll(const vector<uint64_t>& keys, const size_t start, const size_t end, Table* table) {
440+
for(size_t i = start; i < end; i++) { Add(keys[i],table); }
441+
}
442+
443+
static void Remove(u64 key, Table *table) {
444+
throw std::runtime_error("Unsupported");
445+
}
446+
447+
CONTAIN_ATTRIBUTES static bool Contain(u64 key, const Table *table) {
448+
return table->Find(key);
449+
}
450+
451+
};
452+
453+
209454
#endif
210455

211456
#ifdef __SSE41__

src/cuckoo/printutil.cc

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,21 @@
66

77
namespace cuckoofilter {
88

9-
std::string PrintUtil::bytes_to_hex(const char *data, size_t len) {
10-
std::string hexstr = "";
11-
static const char hexes[] = "0123456789ABCDEF ";
9+
// std::string PrintUtil::bytes_to_hex(const char *data, size_t len) {
10+
// std::string hexstr = "";
11+
// static const char hexes[] = "0123456789ABCDEF ";
1212

13-
for (size_t i = 0; i < len; i++) {
14-
unsigned char c = data[i];
15-
hexstr.push_back(hexes[c >> 4]);
16-
hexstr.push_back(hexes[c & 0xf]);
17-
hexstr.push_back(hexes[16]);
18-
}
19-
return hexstr;
20-
};
13+
// for (size_t i = 0; i < len; i++) {
14+
// unsigned char c = data[i];
15+
// hexstr.push_back(hexes[c >> 4]);
16+
// hexstr.push_back(hexes[c & 0xf]);
17+
// hexstr.push_back(hexes[16]);
18+
// }
19+
// return hexstr;
20+
// };
2121

22-
std::string PrintUtil::bytes_to_hex(const std::string &s) {
23-
return bytes_to_hex((const char *)s.data(), s.size());
24-
};
22+
// std::string PrintUtil::bytes_to_hex(const std::string &s) {
23+
// return bytes_to_hex((const char *)s.data(), s.size());
24+
// };
2525

2626
} // namespace cuckoofilter

0 commit comments

Comments
 (0)