2727#include " vqf_cpp.h"
2828#include " simd-block.h"
2929#endif
30+ #define __PF_AVX512__ (__AVX512BW__ & __AVX512VL__ & __AVX512CD__ & __AVX512DQ__)
31+ #if __PF_AVX512__
32+ #include " prefix/min_pd256.hpp"
33+ #include " tc-shortcut/tc-shortcut.hpp"
34+ #endif
3035#include " simd-block-fixed-fpp.h"
3136#include " ribbon_impl.h"
3237
@@ -206,6 +211,244 @@ struct FilterAPI<SimdBlockFilterFixed<HashFamily>> {
206211 return table->Find (key);
207212 }
208213};
214+
215+ #endif
216+ #if __PF_AVX512__
217+ template <typename HashFamily>
218+ struct FilterAPI <TC_shortcut<HashFamily>> {
219+ using Table = TC_shortcut<HashFamily>;
220+
221+ static Table ConstructFromAddCount (size_t add_count) {
222+ constexpr float load = .935 ;
223+ return Table (add_count, load);
224+ }
225+ static void Add (uint64_t key, Table *table) {
226+ if (!table->insert (key)) {
227+ std::cout << table->info () << std::endl;
228+ throw std::logic_error (table->get_name () + " is too small to hold all of the elements" );
229+ }
230+ }
231+ static void AddAll (const vector<uint64_t >& keys, const size_t start, const size_t end, Table* table) {
232+ for (size_t i = start; i < end; i++) { Add (keys[i],table); }
233+ }
234+
235+ static bool Add_attempt (uint64_t key, Table *table) {
236+ if (!table->insert (key)) {
237+ std::cout << " load when failed: \t " << table->get_effective_load () << std::endl;
238+ std::cout << table->info () << std::endl;
239+ return false ;
240+ }
241+ return true ;
242+ }
243+
244+
245+ static void Remove (uint64_t key, Table *table) {
246+ table->remove (key);
247+ }
248+ CONTAIN_ATTRIBUTES static bool Contain (uint64_t key, const Table *table){
249+ return table->lookup (key);
250+ }
251+ };
252+
253+
254+
255+ template <typename Table>
256+ inline size_t get_l2_slots (size_t l1_items, const double overflowing_items_ratio, const float loads[2 ]) {
257+ const double expected_items_reaching_next_level = l1_items * overflowing_items_ratio;
258+ size_t slots_in_l2 = (expected_items_reaching_next_level / loads[1 ]);
259+ return slots_in_l2;
260+ }
261+
262+ template <>
263+ inline size_t get_l2_slots<cuckoofilter::CuckooFilterStable<u64 , 12 >>(size_t l1_items, const double overflowing_items_ratio, const float loads[2 ]) {
264+ constexpr auto expected_items100 = 0.07952 ;
265+ constexpr auto expected_items95 = 0.0586 ;
266+ constexpr auto spare_workload = 0.94 ;
267+ constexpr auto safety = 1.08 ;
268+ constexpr auto factor95 = safety * expected_items95 / spare_workload;
269+ const double expected_items_reaching_next_level = l1_items * factor95;
270+ return expected_items_reaching_next_level;
271+ }
272+
273+ template <>
274+ inline size_t get_l2_slots<TC_shortcut<>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2 ]) {
275+ constexpr auto expected_items100 = 0.07952 ;
276+ constexpr auto safety = 1.08 ;
277+ constexpr auto expected_items95 = 0.0586 ;
278+ constexpr auto spare_workload = 0.935 ;
279+ constexpr auto factor95 = safety * expected_items95 / spare_workload;
280+ const double expected_items_reaching_next_level = l1_items * factor95;
281+ size_t slots_in_l2 = std::ceil (expected_items_reaching_next_level);
282+ return slots_in_l2;
283+ }
284+
285+
286+ template <>
287+ inline size_t get_l2_slots<SimdBlockFilter<>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2 ]) {
288+ const double expected_items_reaching_next_level = l1_items * overflowing_items_ratio;
289+ size_t slots_in_l2 = (expected_items_reaching_next_level / loads[1 ]);
290+ return slots_in_l2 * 4 ;
291+ }
292+
293+ template <>
294+ inline size_t get_l2_slots<SimdBlockFilterFixed<>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2 ]) {
295+ const double expected_items_reaching_next_level = l1_items * overflowing_items_ratio;
296+ size_t slots_in_l2 = (expected_items_reaching_next_level / loads[1 ]);
297+ return slots_in_l2 * 2 ;
298+ }
299+
300+
301+ template <typename Table, typename HashFamily = hashing::TwoIndependentMultiplyShift>
302+ class Prefix_Filter {
303+ const size_t filter_max_capacity;
304+ const size_t number_of_pd;
305+ size_t cap[2 ] = {0 };
306+
307+ hashing::TwoIndependentMultiplyShift Hasher, H0;
308+ __m256i *pd_array;
309+ Table GenSpare;
310+
311+ static double constexpr overflowing_items_ratio = 0.0586 ;// = expected_items95
312+
313+ public:
314+ Prefix_Filter (size_t max_items, const float loads[2 ])
315+ : filter_max_capacity(max_items),
316+ number_of_pd (std::ceil(1.0 * max_items / (min_pd::MAX_CAP0 * loads[0 ]))),
317+ GenSpare(FilterAPI<Table>::ConstructFromAddCount(get_l2_slots<Table>(max_items, overflowing_items_ratio, loads))),
318+ Hasher(), H0() {
319+
320+ int ok = posix_memalign ((void **) &pd_array, 32 , 32 * number_of_pd);
321+ if (ok != 0 ) {
322+ std::cout << " Space allocation failed!" << std::endl;
323+ assert (false );
324+ exit (-3 );
325+ }
326+
327+ constexpr uint64_t pd256_plus_init_header = (((INT64_C (1 ) << min_pd::QUOTS) - 1 ) << 6 ) | 32 ;
328+ for (size_t i = 0 ; i < number_of_pd; i++){
329+ pd_array[i] = __m256i{pd256_plus_init_header, 0 , 0 , 0 };
330+ }
331+
332+ }
333+
334+ ~Prefix_Filter () {
335+ free (pd_array);
336+ }
337+
338+ __attribute__ ((always_inline)) inline static constexpr uint32_t reduce32 (uint32_t hash, uint32_t n) {
339+ // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
340+ return (uint32_t ) (((uint64_t ) hash * n) >> 32 );
341+ }
342+
343+
344+ __attribute__ ((always_inline)) inline static constexpr uint16_t fixed_reduce (uint16_t hash) {
345+ // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
346+ return (uint16_t ) (((uint32_t ) hash * 6400 ) >> 16 );
347+ }
348+
349+
350+ inline auto Find (const u64 &item) const -> bool {
351+ const u64 s = H0 (item);
352+ uint32_t out1 = s >> 32u , out2 = s;
353+ const uint32_t pd_index = reduce32 (out1, (uint32_t ) number_of_pd);
354+ const uint16_t qr = fixed_reduce (out2);
355+ const int64_t quot = qr >> 8 ;
356+ const uint8_t rem = qr;
357+ // return min_pd::pd_find_25(quot, rem, &pd_array[pd_index]);
358+ // return (!min_pd::cmp_qr1(qr, &pd_array[pd_index])) ? min_pd::pd_find_25(quot, rem, &pd_array[pd_index])
359+ return (!min_pd::cmp_qr1 (qr, &pd_array[pd_index])) ? min_pd::find_core (quot, rem, &pd_array[pd_index])
360+ : incSpare_lookup (pd_index, qr);
361+ }
362+
363+ inline auto incSpare_lookup (size_t pd_index, u16 qr) const -> bool {
364+ const u64 data = (pd_index << 13u ) | qr;
365+ return FilterAPI<Table>::Contain (data, &GenSpare);
366+ }
367+
368+ inline void incSpare_add (size_t pd_index, const min_pd::add_res &a_info) {
369+ cap[1 ]++;
370+ u16 qr = (((u16 ) a_info.quot ) << 8u ) | a_info.rem ;
371+ const u64 data = (pd_index << 13u ) | qr;
372+ return FilterAPI<Table>::Add (data, &GenSpare);
373+ }
374+
375+ void Add (const u64 &item) {
376+ const u64 s = H0 (item);
377+ constexpr u64 full_mask = (1ULL << 55 );
378+ uint32_t out1 = s >> 32u , out2 = s;
379+
380+ const uint32_t pd_index = reduce32 (out1, (uint32_t ) number_of_pd);
381+
382+ auto pd = pd_array + pd_index;
383+ const uint64_t header = reinterpret_cast <const u64 *>(pd)[0 ];
384+ const bool not_full = !(header & full_mask);
385+
386+ const uint16_t qr = fixed_reduce (out2);
387+ const int64_t quot = qr >> 8 ;
388+ const uint8_t rem = qr;
389+
390+ if (not_full) {
391+ cap[0 ]++;
392+ assert (!min_pd::is_pd_full (pd));
393+ size_t end = min_pd::pd_select64 (header >> 6 , quot);
394+ const size_t h_index = end + 6 ;
395+ const u64 mask = _bzhi_u64 (-1 , h_index);
396+ const u64 lo = header & mask;
397+ const u64 hi = ((header & ~mask) << 1u );// & h_mask;
398+ assert (!(lo & hi));
399+ const u64 h7 = lo | hi;
400+ memcpy (pd, &h7, 7 );
401+
402+ const size_t body_index = end - quot;
403+ min_pd::body_add_case0_avx (body_index, rem, pd);
404+ assert (min_pd::find_core (quot, rem, pd));
405+ assert (Find (item));
406+ return ;
407+ } else {
408+ auto add_res = min_pd::new_pd_swap_short (quot, rem, pd);
409+ incSpare_add (pd_index, add_res);
410+ assert (Find (item));
411+ }
412+ }
413+
414+ size_t SizeInBytes () const {
415+ size_t l1 = sizeof (__m256i) * number_of_pd;
416+ size_t l2 = GenSpare.SizeInBytes ();
417+ auto res = l1 + l2;
418+ return res;
419+ }
420+
421+ };
422+
423+
424+ template <typename filterTable>
425+ struct FilterAPI <Prefix_Filter<filterTable>> {
426+ using Table = Prefix_Filter<filterTable>;
427+
428+ static Table ConstructFromAddCount (size_t add_count) {
429+ constexpr float loads[2 ] = {.95 , .95 };
430+ return Table (add_count, loads);
431+ }
432+
433+ static void Add (u64 key, Table *table) {
434+ table->Add (key);
435+ }
436+
437+ static void AddAll (const vector<uint64_t >& keys, const size_t start, const size_t end, Table* table) {
438+ for (size_t i = start; i < end; i++) { Add (keys[i],table); }
439+ }
440+
441+ static void Remove (u64 key, Table *table) {
442+ throw std::runtime_error (" Unsupported" );
443+ }
444+
445+ CONTAIN_ATTRIBUTES static bool Contain (u64 key, const Table *table) {
446+ return table->Find (key);
447+ }
448+
449+ };
450+
451+
209452#endif
210453
211454#ifdef __SSE41__
@@ -969,4 +1212,4 @@ struct FilterAPI<SuccinctCountingBlockedBloomRankFilter<ItemType, bits_per_item,
9691212};
9701213
9711214
972- #endif
1215+ #endif
0 commit comments