@@ -108,208 +108,6 @@ class Rank9 {
108108
109109};
110110
111- // from https://github.com/rob-p/rank_speed_test/blob/master/src/poppy.cpp
112- // license:
113- // GNU Lesser General Public License v3.0
114- // which looks like it's from
115- // https://github.com/efficient/rankselect
116- // license:
117- // Copyright (C) 2013, Carnegie Mellon University
118- // Licensed under the Apache License, Version 2.0 (the "License")
119-
120- const int kWordSize = 64 ;
121- const int kBasicBlockSize = 512 ;
122- const int kBasicBlockBits = 9 ;
123- const int kBasicBlockMask = kBasicBlockSize - 1 ;
124- const int kWordCountPerBasicBlock = kBasicBlockSize / kWordSize ;
125- const int kCacheLineSize = 64 ;
126-
127- // #define USE_POPPY
128-
129- class Poppy {
130- private:
131- uint64_t *bits_;
132- uint64_t num_bits_;
133- uint64_t num_counts_;
134-
135- uint64_t *l2Entries_;
136- uint64_t l2EntryCount_;
137- uint64_t *l1Entries_;
138- uint64_t l1EntryCount_;
139- uint64_t basicBlockCount_;
140-
141- uint32_t *loc_[1 << 16 ];
142- uint32_t locCount_[1 << 16 ];
143-
144- static const int kLocFreq = 8192 ;
145- static const int kLocFreqMask = 8191 ;
146- static const int kL2EntryCountPerL1Entry = 1 << 21 ;
147- static const int kBasicBlockCountPerL1Entry = 1 << 23 ;
148-
149- public:
150- Poppy (uint64_t * const bits, const uint64_t num_bits);
151-
152- inline uint64_t rank (uint64_t pos);
153- inline uint64_t get (uint64_t pos) {
154- return (bits_[(size_t ) (pos >> 6 )] >> (63 - (pos & 63 ))) & 1 ;
155- }
156-
157- uint64_t getBitCount () {
158- return num_counts_;
159- }
160-
161- };
162-
163- #define popcountsize 64ULL
164- #define popcountmask (popcountsize - 1 )
165- #define _mm_popcnt_u64 bitCount64
166-
167- inline uint64_t popcountLinear (uint64_t *bits, uint64_t x, uint64_t nbits) {
168- if (nbits == 0 )
169- return 0 ;
170-
171- uint64_t lastword = (nbits - 1 ) / popcountsize;
172- uint64_t p = 0 ;
173-
174- for (int i = 0 ; i < lastword; i++) {
175- p += _mm_popcnt_u64 (bits[x+i]);
176- }
177-
178- uint64_t lastshifted = bits[x+lastword] >> (63 - ((nbits - 1 ) & popcountmask));
179- p += _mm_popcnt_u64 (lastshifted);
180- return p;
181- }
182-
183- Poppy::Poppy (uint64_t * const bits, uint64_t num_bits) {
184- size_t bitsArraySize = (size_t ) ((num_bits + 511 ) / 512 ) * 512 / 64 ;
185- // TODO use posix_memalign
186- posix_memalign ((void **) &bits_, kCacheLineSize , (bitsArraySize + 16 ) * sizeof (uint64_t ));
187- // bits_ = new uint64_t[bitsArraySize + 16]();
188-
189- for (int i=0 ; i<bitsArraySize; i++) {
190- uint64_t x = bits[i];
191- uint64_t y = 0 ;
192- for (int j=0 ; j<64 ; j++) {
193- y = (y << 1 ) | (x & 0x1 );
194- x >>= 1 ;
195- }
196- bits_[i] = y;
197- }
198- num_bits = (bitsArraySize + 16 ) * 64 ;
199-
200- // memcpy(bits_, bits, (bitsArraySize - 1) * sizeof(uint64_t));
201- // bits_[bitsArraySize - 1] = 0;
202-
203- // bits_ = bits;
204- // std::cout << "poppy.init0 " << num_bits << "\n";
205-
206-
207- num_bits_ = num_bits;
208- num_counts_ = 0 ;
209-
210- l1EntryCount_ = std::max (num_bits_ >> 32 , (uint64_t ) 1 );
211- l2EntryCount_ = num_bits_ >> 11 ;
212- basicBlockCount_ = num_bits_ / kBasicBlockSize ;
213-
214- // assert(
215- posix_memalign ((void **) &l1Entries_, kCacheLineSize , l1EntryCount_ * sizeof (uint64_t ));
216- // >= 0);
217- // assert(
218- posix_memalign ((void **) &l2Entries_, kCacheLineSize , l2EntryCount_ * sizeof (uint64_t ));
219- // >= 0);
220-
221- uint64_t l2Id = 0 ;
222- uint64_t basicBlockId = 0 ;
223-
224- memset (locCount_, 0 , sizeof (locCount_));
225-
226- for (uint64_t i = 0 ; i < l1EntryCount_; i++) {
227- l1Entries_[i] = num_counts_;
228- uint32_t cum = 0 ;
229- for (int k = 0 ; k < kL2EntryCountPerL1Entry ; k++) {
230- l2Entries_[l2Id] = cum;
231- for (int offset = 0 ; offset < 30 ; offset += 10 ) {
232- int c = popcountLinear (bits_,
233- basicBlockId * kWordCountPerBasicBlock ,
234- kBasicBlockSize );
235- cum += c;
236- basicBlockId++;
237- l2Entries_[l2Id] |= (uint64_t ) c << (32 + offset);
238- }
239- cum += popcountLinear (bits_, basicBlockId * kWordCountPerBasicBlock , kBasicBlockSize );
240- basicBlockId++;
241-
242- if (++l2Id >= l2EntryCount_) break ;
243- }
244-
245- locCount_[i] = (cum + kLocFreq - 1 ) / kLocFreq ;
246- num_counts_ += cum;
247- }
248- basicBlockId = 0 ;
249- for (uint64_t i = 0 ; i < l1EntryCount_; i++) {
250- loc_[i] = new uint32_t [locCount_[i]];
251- locCount_[i] = 0 ;
252-
253- uint32_t oneCount = 0 ;
254-
255- for (uint32_t k = 0 ; k < kBasicBlockCountPerL1Entry ; k++) {
256- uint64_t woff = basicBlockId * kWordCountPerBasicBlock ;
257- for (int widx = 0 ; widx < kWordCountPerBasicBlock ; widx++)
258- for (int bit = 0 ; bit < kWordSize ; bit++)
259- if (bits_[woff + widx] & (1ULL << (63 - bit))) {
260- oneCount++;
261- if ((oneCount & kLocFreqMask ) == 1 ) {
262- loc_[i][locCount_[i]] = k * kBasicBlockSize + widx * kWordSize + bit;
263- locCount_[i]++;
264- }
265- }
266-
267- basicBlockId++;
268- if (basicBlockId >= basicBlockCount_) break ;
269- }
270- }
271-
272- // to ensure everything is OK
273- Rank9 *r = new Rank9 (bits, num_bits);
274- for (int i=0 ; i<num_bits; i++) {
275- if (r->rank (i) != rank (i)) {
276- std::cout << " rank " << i << " of " << num_bits << " r9 " << r->rank (i) << " poppy " << rank (i) << " \n " ;
277- break ;
278- }
279- }
280- for (int i=0 ; i<num_bits; i++) {
281- if (r->get (i) != get (i)) {
282- std::cout << " get " << i << " of " << num_bits << " r9 " << r->get (i) << " poppy " << get (i) << " \n " ;
283- break ;
284- }
285- }
286- delete r;
287-
288- }
289-
290- inline uint64_t Poppy::rank (uint64_t pos) {
291- // assert(pos <= num_bits_);
292- // --pos;
293- // std::cout << "poppy.rank " << pos << "\n";
294-
295-
296- uint64_t l1Id = pos >> 32 ;
297- uint64_t l2Id = pos >> 11 ;
298- uint64_t x = l2Entries_[l2Id];
299-
300- uint64_t res = l1Entries_[l1Id] + (x & 0xFFFFFFFFULL );
301- x >>= 32 ;
302-
303- int groupId = (pos & 2047 ) / 512 ;
304- for (int i = 0 ; i < groupId; i++) {
305- res += x & 1023 ;
306- x >>= 10 ;
307- }
308- res += popcountLinear (bits_, (l2Id * 4 + groupId) * kWordCountPerBasicBlock , (pos & 511 ));
309-
310- return res;
311- }
312-
313111inline uint64_t rotl64 (uint64_t n, unsigned int c) {
314112 // assumes width is a power of 2
315113 const unsigned int mask = (CHAR_BIT * sizeof (n) - 1 );
@@ -368,11 +166,7 @@ class XorFilterPlus {
368166 size_t arrayLength;
369167 size_t blockLength;
370168 FingerprintType *fingerprints = NULL ;
371- #ifdef USE_POPPY
372- Poppy *rank = NULL ;
373- #else
374169 Rank9 *rank = NULL ;
375- #endif
376170 size_t totalSizeInBytes;
377171
378172 HashFamily* hasher;
@@ -579,11 +373,7 @@ Status XorFilterPlus<ItemType, FingerprintType, HashFamily>::AddAll(
579373 }
580374 }
581375 delete [] fp;
582- #ifdef USE_POPPY
583- rank = new Poppy (bits, bitCount);
584- #else
585376 rank = new Rank9 (bits, bitCount);
586- #endif
587377 delete [] bits;
588378 totalSizeInBytes = (2 * blockLength + setBits) * sizeof (FingerprintType)
589379 + rank->getBitCount () / 8 ;
@@ -603,18 +393,11 @@ Status XorFilterPlus<ItemType, FingerprintType, HashFamily>::Contain(
603393 uint32_t h1 = reduce (r1, blockLength) + blockLength;
604394 uint32_t h2a = reduce (r2, blockLength);
605395 f ^= fingerprints[h0] ^ fingerprints[h1];
606- #ifdef USE_POPPY
607- if (rank->get (h2a)) {
608- uint32_t h2x = (uint32_t ) rank->rank (h2a);
609- f ^= fingerprints[h2x + 2 * blockLength];
610- }
611- #else
612396 uint64_t bitAndPartialRank = rank->getAndPartialRank (h2a);
613397 if ((bitAndPartialRank & 1 ) == 1 ) {
614398 uint32_t h2x = (uint32_t ) ((bitAndPartialRank >> 1 ) + rank->remainingRank (h2a));
615399 f ^= fingerprints[h2x + 2 * blockLength];
616400 }
617- #endif
618401 return f == 0 ? Ok : NotFound;
619402}
620403
0 commit comments