|
27 | 27 | #include "vqf_cpp.h" |
28 | 28 | #include "simd-block.h" |
29 | 29 | #endif |
| 30 | +#define __PF_AVX512__ (__AVX512CD__ & __AVX512DQ__ &__AVX512DQ__ &__AVX512DQ__) |
| 31 | +#ifdef __PF_AVX512__ |
| 32 | +#include "prefix/min_pd256.hpp" |
| 33 | +#include "tcShortcut/TC-shortcut.hpp" |
| 34 | +#endif |
30 | 35 | #include "simd-block-fixed-fpp.h" |
31 | 36 | #include "ribbon_impl.h" |
32 | 37 |
|
@@ -206,6 +211,246 @@ struct FilterAPI<SimdBlockFilterFixed<HashFamily>> { |
206 | 211 | return table->Find(key); |
207 | 212 | } |
208 | 213 | }; |
| 214 | + |
| 215 | +#endif |
| 216 | +#ifdef __PF_AVX512__ |
| 217 | +template<typename HashFamily> |
| 218 | +struct FilterAPI<TC_shortcut<HashFamily>> { |
| 219 | + using Table = TC_shortcut<HashFamily>; |
| 220 | + |
| 221 | + static Table ConstructFromAddCount(size_t add_count) { |
| 222 | + constexpr float load = .935; |
| 223 | + return Table(add_count, load); |
| 224 | + } |
| 225 | + static void Add(uint64_t key, Table *table) { |
| 226 | + if (!table->insert(key)) { |
| 227 | + std::cout << table->info() << std::endl; |
| 228 | + // std::cout << "max_load: \t" << 0.945 << std::endl; |
| 229 | + throw std::logic_error(table->get_name() + " is too small to hold all of the elements"); |
| 230 | + } |
| 231 | + } |
| 232 | + static void AddAll(const vector<uint64_t>& keys, const size_t start, const size_t end, Table* table) { |
| 233 | + for(size_t i = start; i < end; i++) { Add(keys[i],table); } |
| 234 | + } |
| 235 | + |
| 236 | + static bool Add_attempt(uint64_t key, Table *table) { |
| 237 | + if (!table->insert(key)) { |
| 238 | + std::cout << "load when failed: \t" << table->get_effective_load() << std::endl; |
| 239 | + std::cout << table->info() << std::endl; |
| 240 | + return false; |
| 241 | + } |
| 242 | + return true; |
| 243 | + } |
| 244 | + |
| 245 | + |
| 246 | + static void Remove(uint64_t key, Table *table) { |
| 247 | + table->remove(key); |
| 248 | + } |
| 249 | + CONTAIN_ATTRIBUTES static bool Contain(uint64_t key, const Table *table){ |
| 250 | + return table->lookup(key); |
| 251 | + } |
| 252 | +}; |
| 253 | + |
| 254 | + |
| 255 | + |
| 256 | +template<typename Table> |
| 257 | +inline size_t get_l2_slots(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) { |
| 258 | + const double expected_items_reaching_next_level = l1_items * overflowing_items_ratio; |
| 259 | + size_t slots_in_l2 = (expected_items_reaching_next_level / loads[1]); |
| 260 | + return slots_in_l2; |
| 261 | +} |
| 262 | + |
| 263 | +template<> |
| 264 | +inline size_t get_l2_slots<cuckoofilter::CuckooFilterStable<u64, 12>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) { |
| 265 | + constexpr auto expected_items100 = 0.07952; |
| 266 | + constexpr auto expected_items95 = 0.0586; |
| 267 | + constexpr auto spare_workload = 0.94; |
| 268 | + constexpr auto safety = 1.08; |
| 269 | + constexpr auto factor95 = safety * expected_items95 / spare_workload; |
| 270 | + const double expected_items_reaching_next_level = l1_items * factor95; |
| 271 | + return expected_items_reaching_next_level; |
| 272 | +} |
| 273 | + |
| 274 | +template<> |
| 275 | +inline size_t get_l2_slots<TC_shortcut<>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) { |
| 276 | + constexpr auto expected_items100 = 0.07952; |
| 277 | + constexpr auto safety = 1.08; |
| 278 | + constexpr auto expected_items95 = 0.0586; |
| 279 | + constexpr auto spare_workload = 0.935; |
| 280 | + constexpr auto factor95 = safety * expected_items95 / spare_workload; |
| 281 | + const double expected_items_reaching_next_level = l1_items * factor95; |
| 282 | + size_t slots_in_l2 = std::ceil(expected_items_reaching_next_level); |
| 283 | + return slots_in_l2; |
| 284 | +} |
| 285 | + |
| 286 | + |
| 287 | +template<> |
| 288 | +inline size_t get_l2_slots<SimdBlockFilter<>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) { |
| 289 | + const double expected_items_reaching_next_level = l1_items * overflowing_items_ratio; |
| 290 | + size_t slots_in_l2 = (expected_items_reaching_next_level / loads[1]); |
| 291 | + return slots_in_l2 * 4; |
| 292 | +} |
| 293 | + |
| 294 | +template<> |
| 295 | +inline size_t get_l2_slots<SimdBlockFilterFixed<>>(size_t l1_items, const double overflowing_items_ratio, const float loads[2]) { |
| 296 | + const double expected_items_reaching_next_level = l1_items * overflowing_items_ratio; |
| 297 | + size_t slots_in_l2 = (expected_items_reaching_next_level / loads[1]); |
| 298 | + return slots_in_l2 * 2; |
| 299 | +} |
| 300 | + |
| 301 | + |
| 302 | +template<typename Table, typename HashFamily = TwoIndependentMultiplyShift> |
| 303 | +class Prefix_Filter { |
| 304 | + const size_t filter_max_capacity; |
| 305 | + const size_t number_of_pd; |
| 306 | + size_t cap[2] = {0}; |
| 307 | + |
| 308 | + hashing::TwoIndependentMultiplyShift Hasher, H0; |
| 309 | + __m256i *pd_array; |
| 310 | + Table GenSpare; |
| 311 | + |
| 312 | + static double constexpr overflowing_items_ratio = 0.0586;// = expected_items95 |
| 313 | + |
| 314 | +public: |
| 315 | + Prefix_Filter(size_t max_items, const float loads[2]) |
| 316 | + : filter_max_capacity(max_items), |
| 317 | + number_of_pd(std::ceil(1.0 * max_items / (min_pd::MAX_CAP0 * loads[0]))), |
| 318 | + GenSpare(FilterAPI<Table>::ConstructFromAddCount(get_l2_slots<Table>(max_items, overflowing_items_ratio, loads))), |
| 319 | + Hasher(), H0() { |
| 320 | + |
| 321 | + int ok = posix_memalign((void **) &pd_array, 32, 32 * number_of_pd); |
| 322 | + if (ok != 0) { |
| 323 | + std::cout << "Space allocation failed!" << std::endl; |
| 324 | + assert(false); |
| 325 | + exit(-3); |
| 326 | + } |
| 327 | + |
| 328 | + constexpr uint64_t pd256_plus_init_header = (((INT64_C(1) << min_pd::QUOTS) - 1) << 6) | 32; |
| 329 | +// std_fill<__m256i *, __m256i>(pd_array, pd_array + number_of_pd, __m256i{0, 0, 0, 0}); |
| 330 | + for (size_t i = 0; i < number_of_pd; i++){ |
| 331 | + pd_array[i] = __m256i{pd256_plus_init_header, 0, 0, 0}; |
| 332 | + } |
| 333 | + |
| 334 | + } |
| 335 | + |
| 336 | + ~Prefix_Filter() { |
| 337 | + free(pd_array); |
| 338 | + } |
| 339 | + |
| 340 | + __attribute__((always_inline)) inline static constexpr uint32_t reduce32(uint32_t hash, uint32_t n) { |
| 341 | + // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ |
| 342 | + return (uint32_t) (((uint64_t) hash * n) >> 32); |
| 343 | + } |
| 344 | + |
| 345 | + |
| 346 | + __attribute__((always_inline)) inline static constexpr uint16_t fixed_reduce(uint16_t hash) { |
| 347 | + // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ |
| 348 | + return (uint16_t) (((uint32_t) hash * 6400) >> 16); |
| 349 | + } |
| 350 | + |
| 351 | + |
| 352 | + inline auto Find(const u64 &item) const -> bool { |
| 353 | + const u64 s = H0(item); |
| 354 | + uint32_t out1 = s >> 32u, out2 = s; |
| 355 | + const uint32_t pd_index = reduce32(out1, (uint32_t) number_of_pd); |
| 356 | + const uint16_t qr = fixed_reduce(out2); |
| 357 | + const int64_t quot = qr >> 8; |
| 358 | + const uint8_t rem = qr; |
| 359 | + // return min_pd::pd_find_25(quot, rem, &pd_array[pd_index]); |
| 360 | + // return (!min_pd::cmp_qr1(qr, &pd_array[pd_index])) ? min_pd::pd_find_25(quot, rem, &pd_array[pd_index]) |
| 361 | + return (!min_pd::cmp_qr1(qr, &pd_array[pd_index])) ? min_pd::find_core(quot, rem, &pd_array[pd_index]) |
| 362 | + : incSpare_lookup(pd_index, qr); |
| 363 | + } |
| 364 | + |
| 365 | + inline auto incSpare_lookup(size_t pd_index, u16 qr) const -> bool { |
| 366 | + const u64 data = (pd_index << 13u) | qr; |
| 367 | + return FilterAPI<Table>::Contain(data, &GenSpare); |
| 368 | + } |
| 369 | + |
| 370 | + inline void incSpare_add(size_t pd_index, const min_pd::add_res &a_info) { |
| 371 | + cap[1]++; |
| 372 | + u16 qr = (((u16) a_info.quot) << 8u) | a_info.rem; |
| 373 | + const u64 data = (pd_index << 13u) | qr; |
| 374 | + return FilterAPI<Table>::Add(data, &GenSpare); |
| 375 | + } |
| 376 | + |
| 377 | + void Add(const u64 &item) { |
| 378 | + const u64 s = H0(item); |
| 379 | + constexpr u64 full_mask = (1ULL << 55); |
| 380 | + uint32_t out1 = s >> 32u, out2 = s; |
| 381 | + |
| 382 | + const uint32_t pd_index = reduce32(out1, (uint32_t) number_of_pd); |
| 383 | + |
| 384 | + auto pd = pd_array + pd_index; |
| 385 | + const uint64_t header = reinterpret_cast<const u64 *>(pd)[0]; |
| 386 | + const bool not_full = !(header & full_mask); |
| 387 | + |
| 388 | + const uint16_t qr = fixed_reduce(out2); |
| 389 | + const int64_t quot = qr >> 8; |
| 390 | + const uint8_t rem = qr; |
| 391 | + |
| 392 | + if (not_full) { |
| 393 | + cap[0]++; |
| 394 | + assert(!min_pd::is_pd_full(pd)); |
| 395 | + size_t end = min_pd::pd_select64(header >> 6, quot); |
| 396 | + const size_t h_index = end + 6; |
| 397 | + const u64 mask = _bzhi_u64(-1, h_index); |
| 398 | + const u64 lo = header & mask; |
| 399 | + const u64 hi = ((header & ~mask) << 1u);// & h_mask; |
| 400 | + assert(!(lo & hi)); |
| 401 | + const u64 h7 = lo | hi; |
| 402 | + memcpy(pd, &h7, 7); |
| 403 | + |
| 404 | + const size_t body_index = end - quot; |
| 405 | + min_pd::body_add_case0_avx(body_index, rem, pd); |
| 406 | + assert(min_pd::find_core(quot, rem, pd)); |
| 407 | + assert(Find(item)); |
| 408 | + return; |
| 409 | + } else { |
| 410 | + auto add_res = min_pd::new_pd_swap_short(quot, rem, pd); |
| 411 | + incSpare_add(pd_index, add_res); |
| 412 | + assert(Find(item)); |
| 413 | + } |
| 414 | + } |
| 415 | + |
| 416 | + size_t SizeInBytes() const{ |
| 417 | + size_t l1 = sizeof(__m256i) * number_of_pd; |
| 418 | + size_t l2 = GenSpare.SizeInBytes(); |
| 419 | + auto res = l1 + l2; |
| 420 | + return res; |
| 421 | + } |
| 422 | + |
| 423 | +}; |
| 424 | + |
| 425 | + |
| 426 | +template<typename filterTable> |
| 427 | +struct FilterAPI<Prefix_Filter<filterTable>> { |
| 428 | + using Table = Prefix_Filter<filterTable>; |
| 429 | + |
| 430 | + static Table ConstructFromAddCount(size_t add_count) { |
| 431 | + constexpr float loads[2] = {.95, .95}; |
| 432 | + return Table(add_count, loads); |
| 433 | + } |
| 434 | + |
| 435 | + static void Add(u64 key, Table *table) { |
| 436 | + table->Add(key); |
| 437 | + } |
| 438 | + |
| 439 | + static void AddAll(const vector<uint64_t>& keys, const size_t start, const size_t end, Table* table) { |
| 440 | + for(size_t i = start; i < end; i++) { Add(keys[i],table); } |
| 441 | + } |
| 442 | + |
| 443 | + static void Remove(u64 key, Table *table) { |
| 444 | + throw std::runtime_error("Unsupported"); |
| 445 | + } |
| 446 | + |
| 447 | + CONTAIN_ATTRIBUTES static bool Contain(u64 key, const Table *table) { |
| 448 | + return table->Find(key); |
| 449 | + } |
| 450 | + |
| 451 | +}; |
| 452 | + |
| 453 | + |
209 | 454 | #endif |
210 | 455 |
|
211 | 456 | #ifdef __SSE41__ |
|
0 commit comments