@@ -108,6 +108,208 @@ class Rank9 {
108108
109109};
110110
111+ // from https://github.com/rob-p/rank_speed_test/blob/master/src/poppy.cpp
112+ // license:
113+ // GNU Lesser General Public License v3.0
114+ // which looks like it's from
115+ // https://github.com/efficient/rankselect
116+ // license:
117+ // Copyright (C) 2013, Carnegie Mellon University
118+ // Licensed under the Apache License, Version 2.0 (the "License")
119+
120+ const int kWordSize = 64 ;
121+ const int kBasicBlockSize = 512 ;
122+ const int kBasicBlockBits = 9 ;
123+ const int kBasicBlockMask = kBasicBlockSize - 1 ;
124+ const int kWordCountPerBasicBlock = kBasicBlockSize / kWordSize ;
125+ const int kCacheLineSize = 64 ;
126+
127+ // #define USE_POPPY
128+
129+ class Poppy {
130+ private:
131+ uint64_t *bits_;
132+ uint64_t num_bits_;
133+ uint64_t num_counts_;
134+
135+ uint64_t *l2Entries_;
136+ uint64_t l2EntryCount_;
137+ uint64_t *l1Entries_;
138+ uint64_t l1EntryCount_;
139+ uint64_t basicBlockCount_;
140+
141+ uint32_t *loc_[1 << 16 ];
142+ uint32_t locCount_[1 << 16 ];
143+
144+ static const int kLocFreq = 8192 ;
145+ static const int kLocFreqMask = 8191 ;
146+ static const int kL2EntryCountPerL1Entry = 1 << 21 ;
147+ static const int kBasicBlockCountPerL1Entry = 1 << 23 ;
148+
149+ public:
150+ Poppy (uint64_t * const bits, const uint64_t num_bits);
151+
152+ inline uint64_t rank (uint64_t pos);
153+ inline uint64_t get (uint64_t pos) {
154+ return (bits_[(size_t ) (pos >> 6 )] >> (63 - (pos & 63 ))) & 1 ;
155+ }
156+
157+ uint64_t getBitCount () {
158+ return num_counts_;
159+ }
160+
161+ };
162+
163+ #define popcountsize 64ULL
164+ #define popcountmask (popcountsize - 1 )
165+ #define _mm_popcnt_u64 bitCount64
166+
167+ inline uint64_t popcountLinear (uint64_t *bits, uint64_t x, uint64_t nbits) {
168+ if (nbits == 0 )
169+ return 0 ;
170+
171+ uint64_t lastword = (nbits - 1 ) / popcountsize;
172+ uint64_t p = 0 ;
173+
174+ for (int i = 0 ; i < lastword; i++) {
175+ p += _mm_popcnt_u64 (bits[x+i]);
176+ }
177+
178+ uint64_t lastshifted = bits[x+lastword] >> (63 - ((nbits - 1 ) & popcountmask));
179+ p += _mm_popcnt_u64 (lastshifted);
180+ return p;
181+ }
182+
183+ Poppy::Poppy (uint64_t * const bits, uint64_t num_bits) {
184+ size_t bitsArraySize = (size_t ) ((num_bits + 511 ) / 512 ) * 512 / 64 ;
185+ // TODO use posix_memalign
186+ posix_memalign ((void **) &bits_, kCacheLineSize , (bitsArraySize + 16 ) * sizeof (uint64_t ));
187+ // bits_ = new uint64_t[bitsArraySize + 16]();
188+
189+ for (int i=0 ; i<bitsArraySize; i++) {
190+ uint64_t x = bits[i];
191+ uint64_t y = 0 ;
192+ for (int j=0 ; j<64 ; j++) {
193+ y = (y << 1 ) | (x & 0x1 );
194+ x >>= 1 ;
195+ }
196+ bits_[i] = y;
197+ }
198+ num_bits = (bitsArraySize + 16 ) * 64 ;
199+
200+ // memcpy(bits_, bits, (bitsArraySize - 1) * sizeof(uint64_t));
201+ // bits_[bitsArraySize - 1] = 0;
202+
203+ // bits_ = bits;
204+ // std::cout << "poppy.init0 " << num_bits << "\n";
205+
206+
207+ num_bits_ = num_bits;
208+ num_counts_ = 0 ;
209+
210+ l1EntryCount_ = std::max (num_bits_ >> 32 , (uint64_t ) 1 );
211+ l2EntryCount_ = num_bits_ >> 11 ;
212+ basicBlockCount_ = num_bits_ / kBasicBlockSize ;
213+
214+ // assert(
215+ posix_memalign ((void **) &l1Entries_, kCacheLineSize , l1EntryCount_ * sizeof (uint64_t ));
216+ // >= 0);
217+ // assert(
218+ posix_memalign ((void **) &l2Entries_, kCacheLineSize , l2EntryCount_ * sizeof (uint64_t ));
219+ // >= 0);
220+
221+ uint64_t l2Id = 0 ;
222+ uint64_t basicBlockId = 0 ;
223+
224+ memset (locCount_, 0 , sizeof (locCount_));
225+
226+ for (uint64_t i = 0 ; i < l1EntryCount_; i++) {
227+ l1Entries_[i] = num_counts_;
228+ uint32_t cum = 0 ;
229+ for (int k = 0 ; k < kL2EntryCountPerL1Entry ; k++) {
230+ l2Entries_[l2Id] = cum;
231+ for (int offset = 0 ; offset < 30 ; offset += 10 ) {
232+ int c = popcountLinear (bits_,
233+ basicBlockId * kWordCountPerBasicBlock ,
234+ kBasicBlockSize );
235+ cum += c;
236+ basicBlockId++;
237+ l2Entries_[l2Id] |= (uint64_t ) c << (32 + offset);
238+ }
239+ cum += popcountLinear (bits_, basicBlockId * kWordCountPerBasicBlock , kBasicBlockSize );
240+ basicBlockId++;
241+
242+ if (++l2Id >= l2EntryCount_) break ;
243+ }
244+
245+ locCount_[i] = (cum + kLocFreq - 1 ) / kLocFreq ;
246+ num_counts_ += cum;
247+ }
248+ basicBlockId = 0 ;
249+ for (uint64_t i = 0 ; i < l1EntryCount_; i++) {
250+ loc_[i] = new uint32_t [locCount_[i]];
251+ locCount_[i] = 0 ;
252+
253+ uint32_t oneCount = 0 ;
254+
255+ for (uint32_t k = 0 ; k < kBasicBlockCountPerL1Entry ; k++) {
256+ uint64_t woff = basicBlockId * kWordCountPerBasicBlock ;
257+ for (int widx = 0 ; widx < kWordCountPerBasicBlock ; widx++)
258+ for (int bit = 0 ; bit < kWordSize ; bit++)
259+ if (bits_[woff + widx] & (1ULL << (63 - bit))) {
260+ oneCount++;
261+ if ((oneCount & kLocFreqMask ) == 1 ) {
262+ loc_[i][locCount_[i]] = k * kBasicBlockSize + widx * kWordSize + bit;
263+ locCount_[i]++;
264+ }
265+ }
266+
267+ basicBlockId++;
268+ if (basicBlockId >= basicBlockCount_) break ;
269+ }
270+ }
271+
272+ // to ensure everything is OK
273+ Rank9 *r = new Rank9 (bits, num_bits);
274+ for (int i=0 ; i<num_bits; i++) {
275+ if (r->rank (i) != rank (i)) {
276+ std::cout << " rank " << i << " of " << num_bits << " r9 " << r->rank (i) << " poppy " << rank (i) << " \n " ;
277+ break ;
278+ }
279+ }
280+ for (int i=0 ; i<num_bits; i++) {
281+ if (r->get (i) != get (i)) {
282+ std::cout << " get " << i << " of " << num_bits << " r9 " << r->get (i) << " poppy " << get (i) << " \n " ;
283+ break ;
284+ }
285+ }
286+ delete r;
287+
288+ }
289+
290+ inline uint64_t Poppy::rank (uint64_t pos) {
291+ // assert(pos <= num_bits_);
292+ // --pos;
293+ // std::cout << "poppy.rank " << pos << "\n";
294+
295+
296+ uint64_t l1Id = pos >> 32 ;
297+ uint64_t l2Id = pos >> 11 ;
298+ uint64_t x = l2Entries_[l2Id];
299+
300+ uint64_t res = l1Entries_[l1Id] + (x & 0xFFFFFFFFULL );
301+ x >>= 32 ;
302+
303+ int groupId = (pos & 2047 ) / 512 ;
304+ for (int i = 0 ; i < groupId; i++) {
305+ res += x & 1023 ;
306+ x >>= 10 ;
307+ }
308+ res += popcountLinear (bits_, (l2Id * 4 + groupId) * kWordCountPerBasicBlock , (pos & 511 ));
309+
310+ return res;
311+ }
312+
111313inline uint64_t rotl64 (uint64_t n, unsigned int c) {
112314 // assumes width is a power of 2
113315 const unsigned int mask = (CHAR_BIT * sizeof (n) - 1 );
@@ -166,7 +368,11 @@ class XorFilterPlus {
166368 size_t arrayLength;
167369 size_t blockLength;
168370 FingerprintType *fingerprints = NULL ;
371+ #ifdef USE_POPPY
372+ Poppy *rank = NULL ;
373+ #else
169374 Rank9 *rank = NULL ;
375+ #endif
170376 size_t totalSizeInBytes;
171377
172378 HashFamily* hasher;
@@ -373,7 +579,11 @@ Status XorFilterPlus<ItemType, FingerprintType, HashFamily>::AddAll(
373579 }
374580 }
375581 delete [] fp;
582+ #ifdef USE_POPPY
583+ rank = new Poppy (bits, bitCount);
584+ #else
376585 rank = new Rank9 (bits, bitCount);
586+ #endif
377587 delete [] bits;
378588 totalSizeInBytes = (2 * blockLength + setBits) * sizeof (FingerprintType)
379589 + rank->getBitCount () / 8 ;
@@ -393,11 +603,18 @@ Status XorFilterPlus<ItemType, FingerprintType, HashFamily>::Contain(
393603 uint32_t h1 = reduce (r1, blockLength) + blockLength;
394604 uint32_t h2a = reduce (r2, blockLength);
395605 f ^= fingerprints[h0] ^ fingerprints[h1];
396- uint64_t AndPartialRank = rank->getAndPartialRank (h2a);
397- if ((AndPartialRank & 1 ) == 1 ) {
398- uint32_t h2x = (uint32_t ) ((AndPartialRank >> 1 ) + rank->remainingRank (h2a));
606+ #ifdef USE_POPPY
607+ if (rank->get (h2a)) {
608+ uint32_t h2x = (uint32_t ) rank->rank (h2a);
609+ f ^= fingerprints[h2x + 2 * blockLength];
610+ }
611+ #else
612+ uint64_t bitAndPartialRank = rank->getAndPartialRank (h2a);
613+ if ((bitAndPartialRank & 1 ) == 1 ) {
614+ uint32_t h2x = (uint32_t ) ((bitAndPartialRank >> 1 ) + rank->remainingRank (h2a));
399615 f ^= fingerprints[h2x + 2 * blockLength];
400616 }
617+ #endif
401618 return f == 0 ? Ok : NotFound;
402619}
403620
0 commit comments