Skip to content

Commit f1cfb60

Browse files
committed
Xor+ remove the option to use Poppy as an alternative to Rank9 (Poppy is slower)
1 parent 61240bd commit f1cfb60

File tree

1 file changed

+0
-217
lines changed

1 file changed

+0
-217
lines changed

src/xorfilter/xorfilter_plus.h

Lines changed: 0 additions & 217 deletions
Original file line numberDiff line numberDiff line change
@@ -108,208 +108,6 @@ class Rank9 {
108108

109109
};
110110

111-
// from https://github.com/rob-p/rank_speed_test/blob/master/src/poppy.cpp
112-
// license:
113-
// GNU Lesser General Public License v3.0
114-
// which looks like it's from
115-
// https://github.com/efficient/rankselect
116-
// license:
117-
// Copyright (C) 2013, Carnegie Mellon University
118-
// Licensed under the Apache License, Version 2.0 (the "License")
119-
120-
const int kWordSize = 64;
121-
const int kBasicBlockSize = 512;
122-
const int kBasicBlockBits = 9;
123-
const int kBasicBlockMask = kBasicBlockSize - 1;
124-
const int kWordCountPerBasicBlock = kBasicBlockSize / kWordSize;
125-
const int kCacheLineSize = 64;
126-
127-
// #define USE_POPPY
128-
129-
class Poppy {
130-
private:
131-
uint64_t *bits_;
132-
uint64_t num_bits_;
133-
uint64_t num_counts_;
134-
135-
uint64_t *l2Entries_;
136-
uint64_t l2EntryCount_;
137-
uint64_t *l1Entries_;
138-
uint64_t l1EntryCount_;
139-
uint64_t basicBlockCount_;
140-
141-
uint32_t *loc_[1 << 16];
142-
uint32_t locCount_[1 << 16];
143-
144-
static const int kLocFreq = 8192;
145-
static const int kLocFreqMask = 8191;
146-
static const int kL2EntryCountPerL1Entry = 1 << 21;
147-
static const int kBasicBlockCountPerL1Entry = 1 << 23;
148-
149-
public:
150-
Poppy(uint64_t * const bits, const uint64_t num_bits);
151-
152-
inline uint64_t rank(uint64_t pos);
153-
inline uint64_t get(uint64_t pos) {
154-
return (bits_[(size_t) (pos >> 6)] >> (63 - (pos & 63))) & 1;
155-
}
156-
157-
uint64_t getBitCount() {
158-
return num_counts_;
159-
}
160-
161-
};
162-
163-
#define popcountsize 64ULL
164-
#define popcountmask (popcountsize - 1)
165-
#define _mm_popcnt_u64 bitCount64
166-
167-
inline uint64_t popcountLinear(uint64_t *bits, uint64_t x, uint64_t nbits) {
168-
if (nbits == 0)
169-
return 0;
170-
171-
uint64_t lastword = (nbits - 1) / popcountsize;
172-
uint64_t p = 0;
173-
174-
for (int i = 0; i < lastword; i++) {
175-
p += _mm_popcnt_u64(bits[x+i]);
176-
}
177-
178-
uint64_t lastshifted = bits[x+lastword] >> (63 - ((nbits - 1) & popcountmask));
179-
p += _mm_popcnt_u64(lastshifted);
180-
return p;
181-
}
182-
183-
Poppy::Poppy(uint64_t * const bits, uint64_t num_bits) {
184-
size_t bitsArraySize = (size_t) ((num_bits + 511) / 512) * 512 / 64;
185-
// TODO use posix_memalign
186-
posix_memalign((void **) &bits_, kCacheLineSize, (bitsArraySize + 16) * sizeof(uint64_t));
187-
// bits_ = new uint64_t[bitsArraySize + 16]();
188-
189-
for(int i=0; i<bitsArraySize; i++) {
190-
uint64_t x = bits[i];
191-
uint64_t y = 0;
192-
for(int j=0; j<64; j++) {
193-
y = (y << 1) | (x & 0x1);
194-
x >>= 1;
195-
}
196-
bits_[i] = y;
197-
}
198-
num_bits = (bitsArraySize + 16) * 64;
199-
200-
// memcpy(bits_, bits, (bitsArraySize - 1) * sizeof(uint64_t));
201-
// bits_[bitsArraySize - 1] = 0;
202-
203-
// bits_ = bits;
204-
// std::cout << "poppy.init0 " << num_bits << "\n";
205-
206-
207-
num_bits_ = num_bits;
208-
num_counts_ = 0;
209-
210-
l1EntryCount_ = std::max(num_bits_ >> 32, (uint64_t) 1);
211-
l2EntryCount_ = num_bits_ >> 11;
212-
basicBlockCount_ = num_bits_ / kBasicBlockSize;
213-
214-
// assert(
215-
posix_memalign((void **) &l1Entries_, kCacheLineSize, l1EntryCount_ * sizeof(uint64_t));
216-
// >= 0);
217-
// assert(
218-
posix_memalign((void **) &l2Entries_, kCacheLineSize, l2EntryCount_ * sizeof(uint64_t));
219-
// >= 0);
220-
221-
uint64_t l2Id = 0;
222-
uint64_t basicBlockId = 0;
223-
224-
memset(locCount_, 0, sizeof(locCount_));
225-
226-
for (uint64_t i = 0; i < l1EntryCount_; i++) {
227-
l1Entries_[i] = num_counts_;
228-
uint32_t cum = 0;
229-
for (int k = 0; k < kL2EntryCountPerL1Entry; k++) {
230-
l2Entries_[l2Id] = cum;
231-
for (int offset = 0; offset < 30; offset += 10) {
232-
int c = popcountLinear(bits_,
233-
basicBlockId * kWordCountPerBasicBlock,
234-
kBasicBlockSize);
235-
cum += c;
236-
basicBlockId++;
237-
l2Entries_[l2Id] |= (uint64_t) c << (32 + offset);
238-
}
239-
cum += popcountLinear(bits_, basicBlockId * kWordCountPerBasicBlock, kBasicBlockSize);
240-
basicBlockId++;
241-
242-
if (++l2Id >= l2EntryCount_) break;
243-
}
244-
245-
locCount_[i] = (cum + kLocFreq - 1) / kLocFreq;
246-
num_counts_ += cum;
247-
}
248-
basicBlockId = 0;
249-
for (uint64_t i = 0; i < l1EntryCount_; i++) {
250-
loc_[i] = new uint32_t[locCount_[i]];
251-
locCount_[i] = 0;
252-
253-
uint32_t oneCount = 0;
254-
255-
for (uint32_t k = 0; k < kBasicBlockCountPerL1Entry; k++) {
256-
uint64_t woff = basicBlockId * kWordCountPerBasicBlock;
257-
for (int widx = 0; widx < kWordCountPerBasicBlock; widx++)
258-
for (int bit = 0; bit < kWordSize; bit++)
259-
if (bits_[woff + widx] & (1ULL << (63 - bit))) {
260-
oneCount++;
261-
if ((oneCount & kLocFreqMask) == 1) {
262-
loc_[i][locCount_[i]] = k * kBasicBlockSize + widx * kWordSize + bit;
263-
locCount_[i]++;
264-
}
265-
}
266-
267-
basicBlockId++;
268-
if (basicBlockId >= basicBlockCount_) break;
269-
}
270-
}
271-
272-
// to ensure everything is OK
273-
Rank9 *r = new Rank9(bits, num_bits);
274-
for(int i=0; i<num_bits; i++) {
275-
if (r->rank(i) != rank(i)) {
276-
std::cout << "rank " << i << " of " << num_bits << " r9 " << r->rank(i) << " poppy " << rank(i) << "\n";
277-
break;
278-
}
279-
}
280-
for(int i=0; i<num_bits; i++) {
281-
if (r->get(i) != get(i)) {
282-
std::cout << "get " << i << " of " << num_bits << " r9 " << r->get(i) << " poppy " << get(i) << "\n";
283-
break;
284-
}
285-
}
286-
delete r;
287-
288-
}
289-
290-
inline uint64_t Poppy::rank(uint64_t pos) {
291-
// assert(pos <= num_bits_);
292-
// --pos;
293-
// std::cout << "poppy.rank " << pos << "\n";
294-
295-
296-
uint64_t l1Id = pos >> 32;
297-
uint64_t l2Id = pos >> 11;
298-
uint64_t x = l2Entries_[l2Id];
299-
300-
uint64_t res = l1Entries_[l1Id] + (x & 0xFFFFFFFFULL);
301-
x >>= 32;
302-
303-
int groupId = (pos & 2047) / 512;
304-
for (int i = 0; i < groupId; i++) {
305-
res += x & 1023;
306-
x >>= 10;
307-
}
308-
res += popcountLinear(bits_, (l2Id * 4 + groupId) * kWordCountPerBasicBlock, (pos & 511));
309-
310-
return res;
311-
}
312-
313111
inline uint64_t rotl64(uint64_t n, unsigned int c) {
314112
// assumes width is a power of 2
315113
const unsigned int mask = (CHAR_BIT * sizeof(n) - 1);
@@ -368,11 +166,7 @@ class XorFilterPlus {
368166
size_t arrayLength;
369167
size_t blockLength;
370168
FingerprintType *fingerprints = NULL;
371-
#ifdef USE_POPPY
372-
Poppy *rank = NULL;
373-
#else
374169
Rank9 *rank = NULL;
375-
#endif
376170
size_t totalSizeInBytes;
377171

378172
HashFamily* hasher;
@@ -579,11 +373,7 @@ Status XorFilterPlus<ItemType, FingerprintType, HashFamily>::AddAll(
579373
}
580374
}
581375
delete [] fp;
582-
#ifdef USE_POPPY
583-
rank = new Poppy(bits, bitCount);
584-
#else
585376
rank = new Rank9(bits, bitCount);
586-
#endif
587377
delete [] bits;
588378
totalSizeInBytes = (2 * blockLength + setBits) * sizeof(FingerprintType)
589379
+ rank->getBitCount() / 8;
@@ -603,18 +393,11 @@ Status XorFilterPlus<ItemType, FingerprintType, HashFamily>::Contain(
603393
uint32_t h1 = reduce(r1, blockLength) + blockLength;
604394
uint32_t h2a = reduce(r2, blockLength);
605395
f ^= fingerprints[h0] ^ fingerprints[h1];
606-
#ifdef USE_POPPY
607-
if (rank->get(h2a)) {
608-
uint32_t h2x = (uint32_t) rank->rank(h2a);
609-
f ^= fingerprints[h2x + 2 * blockLength];
610-
}
611-
#else
612396
uint64_t bitAndPartialRank = rank->getAndPartialRank(h2a);
613397
if ((bitAndPartialRank & 1) == 1) {
614398
uint32_t h2x = (uint32_t) ((bitAndPartialRank >> 1) + rank->remainingRank(h2a));
615399
f ^= fingerprints[h2x + 2 * blockLength];
616400
}
617-
#endif
618401
return f == 0 ? Ok : NotFound;
619402
}
620403

0 commit comments

Comments
 (0)