Skip to content

Commit e9b4222

Browse files
committed
Speed up construction (work in progress)
1 parent 83af74c commit e9b4222

File tree

1 file changed

+196
-0
lines changed

1 file changed

+196
-0
lines changed

src/xorfilter.h

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,32 @@ void applyBlock(uint64_t* tmp, int b, int len, t2val_t * t2vals) {
101101
}
102102
}
103103

104+
int applyBlock2(uint64_t* tmp, int b, int len, t2val_t * t2vals, int* alone, int alonePos) {
105+
for (int i = 0; i < len; i += 2) {
106+
uint64_t hash = tmp[(b << BLOCK_SHIFT) + i];
107+
int index = (int) tmp[(b << BLOCK_SHIFT) + i + 1];
108+
int oldCount = t2vals[index].t2count;
109+
// std::cout << " consume index " << index << " hash " << hash << " oldCount " << oldCount << " i " << i << "\n";
110+
/*
111+
int newCount = --t2vals[h].t2count;
112+
if (newCount == 1) {
113+
alone[alonePos++] = h;
114+
}
115+
t2vals[h].t2 ^= hash;
116+
*/
117+
118+
if (oldCount >= 1) {
119+
int newCount = oldCount - 1;
120+
t2vals[index].t2count = newCount;
121+
if (newCount == 1) {
122+
alone[alonePos++] = index;
123+
}
124+
t2vals[index].t2 ^= hash;
125+
}
126+
}
127+
return alonePos;
128+
}
129+
104130
template <typename ItemType, typename FingerprintType,
105131
typename HashFamily>
106132
Status XorFilter<ItemType, FingerprintType, HashFamily>::AddAll(
@@ -139,9 +165,119 @@ Status XorFilter<ItemType, FingerprintType, HashFamily>::AddAll(
139165
}
140166
delete[] tmp;
141167
delete[] tmpc;
168+
reverseOrderPos = 0;
169+
170+
int* alone = new int[arrayLength];
171+
int alonePos = 0;
172+
for (size_t i = 0; i < arrayLength; i++) {
173+
if (t2vals[i].t2count == 1) {
174+
alone[alonePos++] = i;
175+
}
176+
}
177+
178+
tmp = new uint64_t[blocks * BLOCK_LEN];
179+
tmpc = new int[blocks]();
142180

143181
reverseOrderPos = 0;
182+
while (reverseOrderPos < size) {
183+
184+
if (alonePos == 0) {
185+
int bestb = -1, bb = -1;
186+
for (int b = 0; b < blocks && alonePos == 0; b++) {
187+
if (tmpc[b] > bestb) {
188+
bestb = tmpc[b];
189+
bb = b;
190+
}
191+
}
192+
if (tmpc[bb] > 0) {
193+
alonePos = applyBlock2(tmp, bb, tmpc[bb], t2vals, alone, alonePos);
194+
tmpc[bb] = 0;
195+
}
196+
if (alonePos == 0) {
197+
for (int b = 0; b < blocks && alonePos == 0; b++) {
198+
if (tmpc[b] > 0) {
199+
alonePos = applyBlock2(tmp, b, tmpc[b], t2vals, alone, alonePos);
200+
tmpc[b] = 0;
201+
}
202+
}
203+
}
204+
// std::cout << "now alone " << alonePos << "\n";
205+
}
206+
207+
if (alonePos == 0) {
208+
break;
209+
}
210+
211+
int i = alone[--alonePos];
212+
213+
int b = i >> BLOCK_SHIFT;
214+
if (tmpc[b] > 0) {
215+
alonePos = applyBlock2(tmp, b, tmpc[b], t2vals, alone, alonePos);
216+
tmpc[b] = 0;
217+
}
218+
219+
uint8_t found = -1;
220+
if (t2vals[i].t2count == 0) {
221+
continue;
222+
}
223+
// if (t2vals[i].t2count > 100 || t2vals[i].t2count < 0) {
224+
//std::cout << "UNEXPECTED " << i << " = " << t2vals[i].t2count << "\n";
225+
//}
226+
long hash = t2vals[i].t2;
227+
//if (hash == 0) {
228+
// std::cout << "UNEXPECTED hash " << i << " = " << t2vals[i].t2count << "\n";
229+
//}
230+
231+
for (int hi = 0; hi < 3; hi++) {
232+
int h = getHashFromHash(hash, hi, blockLength);
233+
if (h == i) {
234+
found = (uint8_t) hi;
235+
//if (t2vals[i].t2count != 1) {
236+
// std::cout << " NOT 1 " << t2vals[i].t2count << "\n";
237+
//}
238+
t2vals[i].t2count = 0;
239+
} else {
240+
//std::cout << " add index " << h << " hash " << hash << " hi " << hi << "\n";
241+
242+
int b = h >> BLOCK_SHIFT;
243+
int i2 = tmpc[b];
244+
tmp[(b << BLOCK_SHIFT) + i2] = hash;
245+
tmp[(b << BLOCK_SHIFT) + i2 + 1] = h;
246+
tmpc[b] += 2;
247+
if (tmpc[b] >= BLOCK_LEN) {
248+
alonePos = applyBlock2(tmp, b, tmpc[b], t2vals, alone, alonePos);
249+
tmpc[b] = 0;
250+
}
251+
/*
252+
253+
int newCount = --t2vals[h].t2count;
254+
if (newCount == 1) {
255+
alone[alonePos++] = h;
256+
}
257+
t2vals[h].t2 ^= hash;
258+
*/
259+
}
260+
}
261+
// std::cout << " add " << hash << " found " << (int) found << "\n";
144262

263+
reverseOrder[reverseOrderPos] = hash;
264+
265+
//if (found < 0) {
266+
// std::cout << " NOT FOUND " << hash << "\n";
267+
//}
268+
reverseH[reverseOrderPos] = found;
269+
reverseOrderPos++;
270+
271+
272+
}
273+
274+
delete[] tmp;
275+
delete[] tmpc;
276+
277+
delete [] alone;
278+
279+
280+
/*
145281
int* alone = new int[arrayLength];
146282
int alonePos = 0;
147283
for (size_t i = 0; i < arrayLength; i++) {
@@ -174,6 +310,66 @@ Status XorFilter<ItemType, FingerprintType, HashFamily>::AddAll(
174310
reverseOrderPos++;
175311
}
176312
delete [] alone;
313+
*/
314+
315+
316+
317+
/*
318+
int* alone = new int[blocks * BLOCK_LEN];
319+
int* alonePos = new int[blocks]();
320+
for (size_t i = 0; i < arrayLength; i++) {
321+
if (t2vals[i].t2count == 1) {
322+
int b = i >> BLOCK_SHIFT;
323+
// TODO could in theory go over the limit
324+
int p = alonePos[b]++;
325+
alone[(b << BLOCK_SHIFT) + p] = i;
326+
}
327+
}
328+
reverseOrderPos = 0;
329+
330+
int currentBlock = 0;
331+
while (reverseOrderPos < size) {
332+
if (alonePos[currentBlock] == 0) {
333+
for(int i=0, b=currentBlock + 1; i<blocks; i++, b++) {
334+
if (b > blocks) {
335+
b = 0;
336+
}
337+
if (alonePos[b] > 0) {
338+
currentBlock = b;
339+
break;
340+
}
341+
}
342+
}
343+
if (alonePos[currentBlock] == 0) {
344+
break;
345+
}
346+
int i = (b << BLOCK_SHIFT) + alone[--alonePos[currentBlock]];
347+
if (t2vals[i].t2count == 0) {
348+
continue;
349+
}
350+
long hash = t2vals[i].t2;
351+
uint8_t found = -1;
352+
for (int hi = 0; hi < 3; hi++) {
353+
int h = getHashFromHash(hash, hi, blockLength);
354+
int newCount = --t2vals[h].t2count;
355+
if (newCount == 0) {
356+
found = (uint8_t) hi;
357+
} else {
358+
if (newCount == 1) {
359+
alone[alonePos++] = h;
360+
}
361+
t2vals[h].t2 ^= hash;
362+
}
363+
}
364+
reverseOrder[reverseOrderPos] = hash;
365+
reverseH[reverseOrderPos] = found;
366+
reverseOrderPos++;
367+
}
368+
delete [] alone;
369+
*/
370+
371+
372+
177373

178374
if (reverseOrderPos == size) {
179375
break;

0 commit comments

Comments
 (0)