@@ -141,168 +141,3 @@ SimdBlockFilter<HashFamily>::Find(const uint64_t key) const noexcept {
141141}
142142
143143
144-
145-
146- // / Rest is copied and pasted to work over 64-byte blocks
147-
148- template <typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
149- class SimdBlockFilter64 {
150- private:
151- // The filter is divided up into Buckets:
152- using Bucket = uint32_t [16 ];
153-
154- // log2(number of bytes in a bucket):
155- static constexpr int LOG_BUCKET_BYTE_SIZE = 6 ;
156-
157- static_assert ((1 << LOG_BUCKET_BYTE_SIZE) == sizeof (Bucket) &&
158- sizeof (Bucket) == 2 * sizeof (__m256i),
159- " Bucket sizing has gone awry." );
160-
161- // log_num_buckets_ is the log (base 2) of the number of buckets in the
162- // directory:
163- const int log_num_buckets_;
164-
165- // directory_mask_ is (1 << log_num_buckets_) - 1. It is precomputed in the
166- // contructor for efficiency reasons:
167- const uint32_t directory_mask_;
168-
169- Bucket *directory_;
170-
171- HashFamily hasher_;
172-
173- public:
174- // Consumes at most (1 << log_heap_space) bytes on the heap:
175- explicit SimdBlockFilter64 (const int log_heap_space);
176- SimdBlockFilter64 (SimdBlockFilter64 &&that)
177- : log_num_buckets_(that.log_num_buckets_),
178- directory_mask_ (that.directory_mask_), directory_(that.directory_),
179- hasher_(that.hasher_) {}
180- ~SimdBlockFilter64 () noexcept ;
181- void Add (const uint64_t key) noexcept ;
182- bool Find (const uint64_t key) const noexcept ;
183- uint64_t SizeInBytes () const {
184- return sizeof (Bucket) * (1ull << log_num_buckets_);
185- }
186-
187- private:
188- static void MakeMask (const uint32_t hash, __m256i *out1,
189- __m256i *out2) noexcept ;
190-
191- SimdBlockFilter64 (const SimdBlockFilter64 &) = delete;
192- void operator =(const SimdBlockFilter64 &) = delete ;
193- };
194-
195- template <typename HashFamily>
196- SimdBlockFilter64<HashFamily>::SimdBlockFilter64(const int log_heap_space)
197- : // Since log_heap_space is in bytes, we need to convert it to the number
198- // of Buckets we will use.
199- log_num_buckets_ (::std::max(1 , log_heap_space - LOG_BUCKET_BYTE_SIZE)),
200- // Don't use log_num_buckets_ if it will lead to undefined behavior by a
201- // shift that is too large.
202- directory_mask_((1ull << ::std::min(63 , log_num_buckets_)) - 1),
203- directory_(nullptr ), hasher_() {
204- if (!__builtin_cpu_supports (" avx2" )) {
205- throw ::std::runtime_error (
206- " SimdBlockFilter64 does not work without AVX2 instructions" );
207- }
208- const size_t alloc_size = 1ull << (log_num_buckets_ + LOG_BUCKET_BYTE_SIZE);
209- const int malloc_failed =
210- posix_memalign (reinterpret_cast <void **>(&directory_), 64 , alloc_size);
211- if (malloc_failed)
212- throw ::std::bad_alloc ();
213- memset (directory_, 0 , alloc_size);
214- }
215-
216- template <typename HashFamily>
217- SimdBlockFilter64<HashFamily>::~SimdBlockFilter64 () noexcept {
218- free (directory_);
219- directory_ = nullptr ;
220- }
221-
222- // with AVX-512, this becomes a single instruction
223- static inline __m256i hacked_mm256_mullo_epi64 (__m256i x, __m256i ml,
224- __m256i mh) {
225- __m256i xl = x;
226- // _mm256_and_si256(x, _mm256_set1_epi64x(UINT64_C(0x00000000ffffffff)));
227- // __m256i xh = _mm256_srli_epi64(x, 32);
228- // __m256i hl = _mm256_slli_epi64(_mm256_mul_epu32(xh, ml), 32);
229- __m256i lh = _mm256_slli_epi64 (_mm256_mul_epu32 (xl, mh), 32 );
230- __m256i ll = _mm256_mul_epu32 (xl, ml);
231- // return _mm256_add_epi64(ll, _mm256_add_epi64(hl, lh));
232- return _mm256_add_epi64 (lh, ll);
233- }
234-
235- // The SIMD reinterpret_casts technically violate C++'s strict aliasing rules.
236- // However, we compile with -fno-strict-aliasing.
237- template <typename HashFamily>
238- [[gnu::always_inline]] inline void
239- SimdBlockFilter64<HashFamily>::MakeMask(const uint32_t hash, __m256i *out1,
240- __m256i *out2) noexcept {
241- const __m256i ones = _mm256_set1_epi64x (1 );
242- // Odd contants for hashing:
243- const __m256i rehash1_l = _mm256_setr_epi64x (
244- 0x53214365047b6137 & 0xffffffff , 0x2c5635344974d91 & 0xffffffff ,
245- 0x7fe299d78824ad5b & 0xffffffff , 0xc01ac48e4d29f115 & 0xffffffff );
246- const __m256i rehash1_h = _mm256_setr_epi64x (
247- UINT64_C (0x53214365047b6137 ) >> 32 , UINT64_C (0x2c5635344974d91 ) >> 32 ,
248- UINT64_C (0x7fe299d78824ad5b ) >> 32 , UINT64_C (0xc01ac48e4d29f115 ) >> 32 );
249-
250- const __m256i rehash2_l = _mm256_setr_epi64x (
251- 0x7bdeb6734f95e2e3 & 0xffffffff , 0x2ec75a90a4e6ad3d & 0xffffffff ,
252- 0x3d485cae00ae48fd & 0xffffffff , 0xe7d0f0c09b59d29b & 0xffffffff );
253- const __m256i rehash2_h = _mm256_setr_epi64x (
254- UINT64_C (0x7bdeb6734f95e2e3 ) >> 32 , UINT64_C (0x2ec75a90a4e6ad3d ) >> 32 ,
255- UINT64_C (0x3d485cae00ae48fd ) >> 32 , UINT64_C (0xe7d0f0c09b59d29b ) >> 32 );
256-
257- __m256i hash_data = _mm256_set1_epi64x (hash);
258-
259- // Multiply-shift hashing ala Dietzfelbinger et al.: multiply 'hash' by eight
260- // different odd constants, then keep the 6 most significant bits from each
261- // product.
262- __m256i hash_data1 = hacked_mm256_mullo_epi64 (
263- hash_data, rehash1_l,
264- rehash1_h); // _mm256_mullo_epi64(rehash1, hash_data);
265- __m256i hash_data2 = hacked_mm256_mullo_epi64 (
266- hash_data, rehash2_l,
267- rehash2_h); // _mm256_mullo_epi64(rehash2, hash_data);
268-
269- hash_data1 = _mm256_and_si256 (_mm256_srli_epi64 (hash_data1, 32 ),
270- _mm256_set1_epi64x (63 ));
271- hash_data2 = _mm256_and_si256 (_mm256_srli_epi64 (hash_data2, 32 ),
272- _mm256_set1_epi64x (63 ));
273- // Use these 6 bits to shift a single bit to a location in each 32-bit lane
274- *out1 = _mm256_sllv_epi64 (ones, hash_data1);
275- *out2 = _mm256_sllv_epi64 (ones, hash_data2);
276- }
277-
278-
279- template <typename HashFamily>
280- [[gnu::always_inline]] inline void
281- SimdBlockFilter64<HashFamily>::Add(const uint64_t key) noexcept {
282- const auto hash = hasher_ (key);
283- const uint32_t bucket_idx = hash & directory_mask_;
284- __m256i mask1, mask2;
285- MakeMask (hash >> log_num_buckets_, &mask1, &mask2);
286- __m256i *const bucket1 =
287- &reinterpret_cast <__m256i *>(directory_)[2 * bucket_idx];
288- __m256i *const bucket2 =
289- &reinterpret_cast <__m256i *>(directory_)[2 * bucket_idx + 1 ];
290-
291- _mm256_store_si256 (bucket1, _mm256_or_si256 (*bucket1, mask1));
292- _mm256_store_si256 (bucket2, _mm256_or_si256 (*bucket2, mask2));
293- }
294-
295- template <typename HashFamily>
296- [[gnu::always_inline]] inline bool
297- SimdBlockFilter64<HashFamily>::Find(const uint64_t key) const noexcept {
298- const auto hash = hasher_ (key);
299- const uint32_t bucket_idx = hash & directory_mask_;
300- __m256i mask1, mask2;
301- MakeMask (hash >> log_num_buckets_, &mask1, &mask2);
302- const __m256i bucket1 =
303- reinterpret_cast <__m256i *>(directory_)[2 * bucket_idx];
304- const __m256i bucket2 =
305- reinterpret_cast <__m256i *>(directory_)[2 * bucket_idx + 1 ];
306- return _mm256_testc_si256 (bucket1, mask1) &
307- _mm256_testc_si256 (bucket2, mask2);
308- }
0 commit comments