@@ -104,18 +104,34 @@ struct t2val {
104104
105105typedef struct t2val t2val_t ;
106106
107- #define BLOCK_SHIFT 18
108- #define BLOCK_LEN (1 << BLOCK_SHIFT)
107+ const int blockShift = 18 ;
109108
110109void applyBlock (uint64_t * tmp, int b, int len, t2val_t * t2vals) {
111110 for (int i = 0 ; i < len; i += 2 ) {
112- uint64_t x = tmp[(b << BLOCK_SHIFT ) + i];
113- int index = (int ) tmp[(b << BLOCK_SHIFT ) + i + 1 ];
111+ uint64_t x = tmp[(b << blockShift ) + i];
112+ int index = (int ) tmp[(b << blockShift ) + i + 1 ];
114113 t2vals[index].t2count ++;
115114 t2vals[index].t2 ^= x;
116115 }
117116}
118117
118+ int applyBlock2 (uint64_t * tmp, int b, int len, t2val_t * t2vals, int * alone, int alonePos) {
119+ for (int i = 0 ; i < len; i += 2 ) {
120+ uint64_t hash = tmp[(b << blockShift) + i];
121+ int index = (int ) tmp[(b << blockShift) + i + 1 ];
122+ int oldCount = t2vals[index].t2count ;
123+ if (oldCount >= 1 ) {
124+ int newCount = oldCount - 1 ;
125+ t2vals[index].t2count = newCount;
126+ if (newCount == 1 ) {
127+ alone[alonePos++] = index;
128+ }
129+ t2vals[index].t2 ^= hash;
130+ }
131+ }
132+ return alonePos;
133+ }
134+
119135template <typename ItemType, typename FingerprintType,
120136 typename FingerprintStorageType, typename HashFamily>
121137Status XorFilter2<ItemType, FingerprintType, FingerprintStorageType, HashFamily>::AddAll(
@@ -128,20 +144,20 @@ Status XorFilter2<ItemType, FingerprintType, FingerprintStorageType, HashFamily>
128144 t2val_t * t2vals = new t2val_t [m];
129145 while (true ) {
130146 memset (t2vals, 0 , sizeof (t2val_t [m]));
131- int blocks = 1 + (3 * blockLength) / BLOCK_LEN ;
132- uint64_t * tmp = new uint64_t [blocks * BLOCK_LEN ];
147+ int blocks = 1 + (( 3 * blockLength) >> blockShift) ;
148+ uint64_t * tmp = new uint64_t [blocks << blockShift ];
133149 int * tmpc = new int [blocks]();
134150 for (size_t i = start; i < end; i++) {
135151 uint64_t k = keys[i];
136152 uint64_t hash = (*hasher)(k);
137153 for (int hi = 0 ; hi < 3 ; hi++) {
138154 int index = getHashFromHash (hash, hi, blockLength);
139- int b = index >> BLOCK_SHIFT ;
155+ int b = index >> blockShift ;
140156 int i2 = tmpc[b];
141- tmp[(b << BLOCK_SHIFT ) + i2] = hash;
142- tmp[(b << BLOCK_SHIFT ) + i2 + 1 ] = index;
157+ tmp[(b << blockShift ) + i2] = hash;
158+ tmp[(b << blockShift ) + i2 + 1 ] = index;
143159 tmpc[b] += 2 ;
144- if (i2 + 2 == BLOCK_LEN ) {
160+ if (i2 + 2 == ( 1 << blockShift) ) {
145161 applyBlock (tmp, b, i2 + 2 , t2vals);
146162 tmpc[b] = 0 ;
147163 }
@@ -153,8 +169,94 @@ Status XorFilter2<ItemType, FingerprintType, FingerprintStorageType, HashFamily>
153169 }
154170 delete[] tmp;
155171 delete[] tmpc;
172+ reverseOrderPos = 0 ;
156173
174+ int * alone = new int [arrayLength];
175+ int alonePos = 0 ;
176+ for (size_t i = 0 ; i < arrayLength; i++) {
177+ if (t2vals[i].t2count == 1 ) {
178+ alone[alonePos++] = i;
179+ }
180+ }
181+ tmp = new uint64_t [blocks << blockShift];
182+ tmpc = new int [blocks]();
157183 reverseOrderPos = 0 ;
184+ int bestBlock = -1 ;
185+ while (reverseOrderPos < size) {
186+ if (alonePos == 0 ) {
187+ // we need to apply blocks until we have an entry that is alone
188+ // (that is, until alonePos > 0)
189+ // so, find a large block (the larger the better)
190+ // but don't need to search very long
191+ // start searching where we stopped the last time
192+ // (to make it more even)
193+ for (int i = 0 , b = bestBlock + 1 , best = -1 ; i < blocks; i++) {
194+ if (b >= blocks) {
195+ b = 0 ;
196+ }
197+ if (tmpc[b] > best) {
198+ best = tmpc[b];
199+ bestBlock = b;
200+ if (best > (1 << (blockShift - 1 ))) {
201+ // sufficiently large: stop
202+ break ;
203+ }
204+ }
205+ }
206+ if (tmpc[bestBlock] > 0 ) {
207+ alonePos = applyBlock2 (tmp, bestBlock, tmpc[bestBlock], t2vals, alone, alonePos);
208+ tmpc[bestBlock] = 0 ;
209+ }
210+ // applying a block may not actually result in a new entry that is alone
211+ if (alonePos == 0 ) {
212+ for (int b = 0 ; b < blocks && alonePos == 0 ; b++) {
213+ if (tmpc[b] > 0 ) {
214+ alonePos = applyBlock2 (tmp, b, tmpc[b], t2vals, alone, alonePos);
215+ tmpc[b] = 0 ;
216+ }
217+ }
218+ }
219+ }
220+ if (alonePos == 0 ) {
221+ break ;
222+ }
223+ int i = alone[--alonePos];
224+ int b = i >> blockShift;
225+ if (tmpc[b] > 0 ) {
226+ alonePos = applyBlock2 (tmp, b, tmpc[b], t2vals, alone, alonePos);
227+ tmpc[b] = 0 ;
228+ }
229+ uint8_t found = -1 ;
230+ if (t2vals[i].t2count == 0 ) {
231+ continue ;
232+ }
233+ long hash = t2vals[i].t2 ;
234+ for (int hi = 0 ; hi < 3 ; hi++) {
235+ int h = getHashFromHash (hash, hi, blockLength);
236+ if (h == i) {
237+ found = (uint8_t ) hi;
238+ t2vals[i].t2count = 0 ;
239+ } else {
240+ int b = h >> blockShift;
241+ int i2 = tmpc[b];
242+ tmp[(b << blockShift) + i2] = hash;
243+ tmp[(b << blockShift) + i2 + 1 ] = h;
244+ tmpc[b] += 2 ;
245+ if (tmpc[b] >= 1 << blockShift) {
246+ alonePos = applyBlock2 (tmp, b, tmpc[b], t2vals, alone, alonePos);
247+ tmpc[b] = 0 ;
248+ }
249+ }
250+ }
251+ reverseOrder[reverseOrderPos] = hash;
252+ reverseH[reverseOrderPos] = found;
253+ reverseOrderPos++;
254+ }
255+ delete[] tmp;
256+ delete[] tmpc;
257+ delete[] alone;
258+
259+ /*
158260 int* alone = new int[arrayLength];
159261 int alonePos = 0;
160262 reverseOrderPos = 0;
@@ -190,6 +292,9 @@ Status XorFilter2<ItemType, FingerprintType, FingerprintStorageType, HashFamily>
190292 }
191293 }
192294 delete [] alone;
295+
296+ */
297+
193298 if (reverseOrderPos == size) {
194299 break ;
195300 }
0 commit comments