Skip to content

Commit 0c72e37

Browse files
committed
Succinct counting Bloom filters: small simplifications
1 parent d5275a1 commit 0c72e37

File tree

2 files changed

+22
-84
lines changed

2 files changed

+22
-84
lines changed

src/main/java/org/fastfilter/bloom/SuccinctCountingBlockedBloom.java

Lines changed: 21 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@ public class SuccinctCountingBlockedBloom implements Filter {
3131

3232
public static SuccinctCountingBlockedBloom construct(long[] keys, int bitsPerKey) {
3333
long n = keys.length;
34-
long m = n * bitsPerKey;
35-
int k = getBestK(m, n);
34+
int k = getBestK(bitsPerKey);
3635
SuccinctCountingBlockedBloom f = new SuccinctCountingBlockedBloom((int) n, bitsPerKey, k);
3736
for(long x : keys) {
3837
f.add(x);
@@ -43,8 +42,8 @@ public static SuccinctCountingBlockedBloom construct(long[] keys, int bitsPerKey
4342
return f;
4443
}
4544

46-
private static int getBestK(long m, long n) {
47-
return Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
45+
private static int getBestK(double bitsPerKey) {
46+
return Math.max(1, (int) Math.round(bitsPerKey * Math.log(2)));
4847
}
4948

5049
private final int k;
@@ -54,7 +53,7 @@ private static int getBestK(long m, long n) {
5453

5554
// the counter bits
5655
// the same size as the "data bits" currently
57-
private final BitField counts;
56+
private final long[] counts;
5857

5958
private int nextFreeOverflow;
6059
private final long[] overflow;
@@ -65,7 +64,7 @@ private static int getBestK(long m, long n) {
6564

6665

6766
public long getBitCount() {
68-
return data.capacity() * 64L + counts.getBitCount() + 64 * overflow.length;
67+
return 64L * data.capacity() + 64L * counts.length + 64L * overflow.length;
6968
}
7069

7170
SuccinctCountingBlockedBloom(int entryCount, int bitsPerKey, int k) {
@@ -76,11 +75,11 @@ public long getBitCount() {
7675
this.blocks = (int) (bits + BITS_PER_BLOCK - 1) / BITS_PER_BLOCK;
7776
// ByteBuffer.allocateDirect allocation are cache line aligned
7877
// (regular arrays are aligned to 8 bytes; finding the offset is tricky, and GC could move arrays)
79-
data = ByteBuffer.allocateDirect((int) (blocks * BYTES_PER_BLOCK) + 8).asLongBuffer();
78+
data = ByteBuffer.allocateDirect((int) (blocks * BYTES_PER_BLOCK)).asLongBuffer();
8079
// data = ByteBuffer.allocate((int) (blocks * BYTES_PER_BLOCK) + 8);
8180

82-
int arraySize = (blocks * BITS_PER_BLOCK + 63) / 64;
83-
counts = new BitField(64 * (arraySize + 10));
81+
int arraySize = (blocks * BYTES_PER_BLOCK) / 8;
82+
counts = new long[arraySize];
8483
overflow = new long[100 + arraySize / 100 * 24];
8584
for (int i = 0; i < overflow.length; i += 8) {
8685
overflow[i] = i + 8;
@@ -129,7 +128,7 @@ public void remove(long key) {
129128
for (int i = 0; i < k; i++) {
130129
int index = start + ((a & BLOCK_MASK) >>> 6);
131130
if (VERIFY_COUNTS) {
132-
realCounts[(index << 6) + (a & 63)]++;
131+
realCounts[(index << 6) + (a & 63)]--;
133132
}
134133
decrement(index, a);
135134
a += b;
@@ -145,7 +144,10 @@ public long cardinality() {
145144
for (int i = 0; i < data.capacity(); i++) {
146145
sum += data.get(i);
147146
}
148-
return sum + counts.cardinality();
147+
for(long x : counts) {
148+
sum += Long.bitCount(x);
149+
}
150+
return sum;
149151
}
150152

151153
@Override
@@ -170,7 +172,7 @@ public boolean mayContain(long key) {
170172
private void increment(int group, int x) {
171173
long m = data.get(group);
172174
long d = (m >>> x) & 1;
173-
long c = counts.getLong(group);
175+
long c = counts[group];
174176
if ((c & 0xc000000000000000L) != 0) {
175177
// an overflow entry, or overflowing now
176178
int index;
@@ -184,13 +186,12 @@ private void increment(int group, int x) {
184186
}
185187
long count = 64;
186188
c = 0x8000000000000000L | (count << 32) | index;
187-
counts.setLong(group, c);
188189
} else {
189190
// already
190191
index = (int) (c & 0x0fffffff);
191192
c += 1L << 32;
192-
counts.setLong(group, c);
193193
}
194+
counts[group] = c;
194195
int bitIndex = x & 63;
195196
overflow[index + bitIndex / 8] += getBit(bitIndex);
196197
data.put(group, data.get(group) | (1L << x));
@@ -204,7 +205,7 @@ private void increment(int group, int x) {
204205
long left = c & ~mask;
205206
long right = c & mask;
206207
c = (left << 1) | ((1 ^ d) << insertAt) | right;
207-
counts.setLong(group, c);
208+
counts[group] = c;
208209
}
209210

210211
private int allocateOverflow() {
@@ -218,12 +219,12 @@ private int allocateOverflow() {
218219

219220
private void decrement(int group, int x) {
220221
long m = data.get(group);
221-
long c = counts.getLong(group);
222+
long c = counts[group];
222223
if ((c & 0x8000000000000000L) != 0) {
223224
// an overflow entry
224225
int count = (int) (c >>> 32) & 0x0fffffff;
225226
c -= 1L << 32;
226-
counts.setLong(group, c);
227+
counts[group] = c;
227228
int index = (int) (c & 0x0fffffff);
228229
int bitIndex = x & 63;
229230
long n = overflow[index + bitIndex / 8];
@@ -241,7 +242,7 @@ private void decrement(int group, int x) {
241242
c2 = ((c2 << 1) | 1) << (cj - 1);
242243
}
243244
}
244-
counts.setLong(group, c2);
245+
counts[group] = c2;
245246
freeOverflow(index);
246247
}
247248
return;
@@ -253,7 +254,7 @@ private void decrement(int group, int x) {
253254
long mask = (1L << removeAt) - 1;
254255
long left = (c >>> 1) & ~mask;
255256
long right= c & mask;
256-
counts.setLong(group, left | right);
257+
counts[group] = left | right;
257258
long removed = (c >> removeAt) & 1;
258259
// possibly reset the data bit
259260
data.put(group, m & ~(removed << x));
@@ -286,7 +287,7 @@ private int readCount(int x) {
286287
if (d == 0) {
287288
return 0;
288289
}
289-
long c = counts.getLong(group);
290+
long c = counts[group];
290291
if ((c & 0x8000000000000000L) != 0) {
291292
int index = (int) (c & 0x0fffffff);
292293
int bitIndex = x & 63;
@@ -300,66 +301,4 @@ private int readCount(int x) {
300301
return Long.numberOfLeadingZeros(y) + 1;
301302
}
302303

303-
public static class BitField {
304-
305-
private final long[] data;
306-
307-
BitField(int bitCount) {
308-
data = new long[(bitCount + 63) / 64];
309-
}
310-
311-
public long cardinality() {
312-
long sum = 0;
313-
for(long x : data) {
314-
sum += Long.bitCount(x);
315-
}
316-
return sum;
317-
}
318-
319-
void clear(int index) {
320-
data[index >>> 6] &= ~(1L << index);
321-
}
322-
323-
public void setLong(int longIndex, long x) {
324-
data[longIndex] = x;
325-
}
326-
327-
public long getLong(int longIndex) {
328-
return data[longIndex];
329-
}
330-
331-
public long getBitCount() {
332-
return data.length << 6;
333-
}
334-
335-
long get(int index) {
336-
return (data[index >>> 6] >> index) & 1;
337-
}
338-
339-
void set(int index) {
340-
data[index >>> 6] |= 1L << index;
341-
}
342-
343-
public String toString() {
344-
StringBuilder buff = new StringBuilder();
345-
for (int i = 0; i < data.length * 64; i++) {
346-
if ((i & 63) == 0) {
347-
if (data[i >>> 6] == 0) {
348-
i += 63;
349-
} else {
350-
buff.append("\n" + i + ":");
351-
}
352-
} else {
353-
if (get(i) == 0) {
354-
buff.append('0');
355-
} else {
356-
buff.append('1');
357-
}
358-
}
359-
}
360-
return buff.toString();
361-
}
362-
363-
}
364-
365304
}

src/main/java/org/fastfilter/bloom/SuccinctCountingBloom.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,13 +146,12 @@ private void increment(int x) {
146146
}
147147
long count = 64;
148148
c = 0x8000000000000000L | (count << 32) | index;
149-
counts.setLong(group, c);
150149
} else {
151150
// already
152151
index = (int) (c & 0x0fffffff);
153152
c += 1L << 32;
154-
counts.setLong(group, c);
155153
}
154+
counts.setLong(group, c);
156155
int bitIndex = x & 63;
157156
overflow[index + bitIndex / 16] += getBit(bitIndex);
158157
data.set(x);

0 commit comments

Comments
 (0)