@@ -101,6 +101,32 @@ void applyBlock(uint64_t* tmp, int b, int len, t2val_t * t2vals) {
101101 }
102102}
103103
104+ int applyBlock2 (uint64_t * tmp, int b, int len, t2val_t * t2vals, int * alone, int alonePos) {
105+ for (int i = 0 ; i < len; i += 2 ) {
106+ uint64_t hash = tmp[(b << BLOCK_SHIFT) + i];
107+ int index = (int ) tmp[(b << BLOCK_SHIFT) + i + 1 ];
108+ int oldCount = t2vals[index].t2count ;
109+ // std::cout << " consume index " << index << " hash " << hash << " oldCount " << oldCount << " i " << i << "\n";
110+ /*
111+ int newCount = --t2vals[h].t2count;
112+ if (newCount == 1) {
113+ alone[alonePos++] = h;
114+ }
115+ t2vals[h].t2 ^= hash;
116+ */
117+
118+ if (oldCount >= 1 ) {
119+ int newCount = oldCount - 1 ;
120+ t2vals[index].t2count = newCount;
121+ if (newCount == 1 ) {
122+ alone[alonePos++] = index;
123+ }
124+ t2vals[index].t2 ^= hash;
125+ }
126+ }
127+ return alonePos;
128+ }
129+
104130template <typename ItemType, typename FingerprintType,
105131 typename HashFamily>
106132Status XorFilter<ItemType, FingerprintType, HashFamily>::AddAll(
@@ -139,9 +165,119 @@ Status XorFilter<ItemType, FingerprintType, HashFamily>::AddAll(
139165 }
140166 delete[] tmp;
141167 delete[] tmpc;
168+ reverseOrderPos = 0 ;
169+
170+ int * alone = new int [arrayLength];
171+ int alonePos = 0 ;
172+ for (size_t i = 0 ; i < arrayLength; i++) {
173+ if (t2vals[i].t2count == 1 ) {
174+ alone[alonePos++] = i;
175+ }
176+ }
177+
178+ tmp = new uint64_t [blocks * BLOCK_LEN];
179+ tmpc = new int [blocks]();
142180
143181 reverseOrderPos = 0 ;
182+ while (reverseOrderPos < size) {
183+
184+ if (alonePos == 0 ) {
185+ int bestb = -1 , bb = -1 ;
186+ for (int b = 0 ; b < blocks && alonePos == 0 ; b++) {
187+ if (tmpc[b] > bestb) {
188+ bestb = tmpc[b];
189+ bb = b;
190+ }
191+ }
192+ if (tmpc[bb] > 0 ) {
193+ alonePos = applyBlock2 (tmp, bb, tmpc[bb], t2vals, alone, alonePos);
194+ tmpc[bb] = 0 ;
195+ }
196+ if (alonePos == 0 ) {
197+ for (int b = 0 ; b < blocks && alonePos == 0 ; b++) {
198+ if (tmpc[b] > 0 ) {
199+ alonePos = applyBlock2 (tmp, b, tmpc[b], t2vals, alone, alonePos);
200+ tmpc[b] = 0 ;
201+ }
202+ }
203+ }
204+ // std::cout << "now alone " << alonePos << "\n";
205+ }
206+
207+ if (alonePos == 0 ) {
208+ break ;
209+ }
210+
211+ int i = alone[--alonePos];
212+
213+ int b = i >> BLOCK_SHIFT;
214+ if (tmpc[b] > 0 ) {
215+ alonePos = applyBlock2 (tmp, b, tmpc[b], t2vals, alone, alonePos);
216+ tmpc[b] = 0 ;
217+ }
218+
219+ uint8_t found = -1 ;
220+ if (t2vals[i].t2count == 0 ) {
221+ continue ;
222+ }
223+ // if (t2vals[i].t2count > 100 || t2vals[i].t2count < 0) {
224+ // std::cout << "UNEXPECTED " << i << " = " << t2vals[i].t2count << "\n";
225+ // }
226+ long hash = t2vals[i].t2 ;
227+ // if (hash == 0) {
228+ // std::cout << "UNEXPECTED hash " << i << " = " << t2vals[i].t2count << "\n";
229+ // }
230+
231+ for (int hi = 0 ; hi < 3 ; hi++) {
232+ int h = getHashFromHash (hash, hi, blockLength);
233+ if (h == i) {
234+ found = (uint8_t ) hi;
235+ // if (t2vals[i].t2count != 1) {
236+ // std::cout << " NOT 1 " << t2vals[i].t2count << "\n";
237+ // }
238+ t2vals[i].t2count = 0 ;
239+ } else {
240+ // std::cout << " add index " << h << " hash " << hash << " hi " << hi << "\n";
241+
242+ int b = h >> BLOCK_SHIFT;
243+ int i2 = tmpc[b];
244+ tmp[(b << BLOCK_SHIFT) + i2] = hash;
245+ tmp[(b << BLOCK_SHIFT) + i2 + 1 ] = h;
246+ tmpc[b] += 2 ;
247+ if (tmpc[b] >= BLOCK_LEN) {
248+ alonePos = applyBlock2 (tmp, b, tmpc[b], t2vals, alone, alonePos);
249+ tmpc[b] = 0 ;
250+ }
251+ /*
252+
253+ int newCount = --t2vals[h].t2count;
254+ if (newCount == 1) {
255+ alone[alonePos++] = h;
256+ }
257+ t2vals[h].t2 ^= hash;
258+ */
259+ }
260+ }
261+ // std::cout << " add " << hash << " found " << (int) found << "\n";
144262
263+ reverseOrder[reverseOrderPos] = hash;
264+
265+ // if (found < 0) {
266+ // std::cout << " NOT FOUND " << hash << "\n";
267+ // }
268+ reverseH[reverseOrderPos] = found;
269+ reverseOrderPos++;
270+
271+
272+ }
273+
274+ delete[] tmp;
275+ delete[] tmpc;
276+
277+ delete [] alone;
278+
279+
280+ /*
145281 int* alone = new int[arrayLength];
146282 int alonePos = 0;
147283 for (size_t i = 0; i < arrayLength; i++) {
@@ -174,6 +310,66 @@ Status XorFilter<ItemType, FingerprintType, HashFamily>::AddAll(
174310 reverseOrderPos++;
175311 }
176312 delete [] alone;
313+ */
314+
315+
316+
317+ /*
318+ int* alone = new int[blocks * BLOCK_LEN];
319+ int* alonePos = new int[blocks]();
320+ for (size_t i = 0; i < arrayLength; i++) {
321+ if (t2vals[i].t2count == 1) {
322+ int b = i >> BLOCK_SHIFT;
323+ // TODO could in theory go over the limit
324+ int p = alonePos[b]++;
325+ alone[(b << BLOCK_SHIFT) + p] = i;
326+ }
327+ }
328+ reverseOrderPos = 0;
329+
330+ int currentBlock = 0;
331+ while (reverseOrderPos < size) {
332+ if (alonePos[currentBlock] == 0) {
333+ for(int i=0, b=currentBlock + 1; i<blocks; i++, b++) {
334+ if (b > blocks) {
335+ b = 0;
336+ }
337+ if (alonePos[b] > 0) {
338+ currentBlock = b;
339+ break;
340+ }
341+ }
342+ }
343+ if (alonePos[currentBlock] == 0) {
344+ break;
345+ }
346+ int i = (b << BLOCK_SHIFT) + alone[--alonePos[currentBlock]];
347+ if (t2vals[i].t2count == 0) {
348+ continue;
349+ }
350+ long hash = t2vals[i].t2;
351+ uint8_t found = -1;
352+ for (int hi = 0; hi < 3; hi++) {
353+ int h = getHashFromHash(hash, hi, blockLength);
354+ int newCount = --t2vals[h].t2count;
355+ if (newCount == 0) {
356+ found = (uint8_t) hi;
357+ } else {
358+ if (newCount == 1) {
359+ alone[alonePos++] = h;
360+ }
361+ t2vals[h].t2 ^= hash;
362+ }
363+ }
364+ reverseOrder[reverseOrderPos] = hash;
365+ reverseH[reverseOrderPos] = found;
366+ reverseOrderPos++;
367+ }
368+ delete [] alone;
369+ */
370+
371+
372+
177373
178374 if (reverseOrderPos == size) {
179375 break ;
0 commit comments