Skip to content

Commit 7e551d0

Browse files
committed
Improved succinct counting blocked Bloom filter
1 parent 27bc546 commit 7e551d0

File tree

4 files changed

+318
-28
lines changed

4 files changed

+318
-28
lines changed

src/main/java/org/fastfilter/FilterType.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import org.fastfilter.bloom.Bloom;
66
import org.fastfilter.bloom.CountingBloom;
77
import org.fastfilter.bloom.SuccinctCountingBlockedBloom;
8+
import org.fastfilter.bloom.SuccinctCountingBlockedBloomV2;
89
import org.fastfilter.bloom.SuccinctCountingBloom;
910
import org.fastfilter.cuckoo.Cuckoo16;
1011
import org.fastfilter.cuckoo.Cuckoo8;
@@ -59,6 +60,12 @@ public Filter construct(long[] keys, int setting) {
5960
return SuccinctCountingBlockedBloom.construct(keys, setting);
6061
}
6162
},
63+
SUCCINCT_COUNTING_BLOCKED_BLOOM_V2 {
64+
@Override
65+
public Filter construct(long[] keys, int setting) {
66+
return SuccinctCountingBlockedBloomV2.construct(keys, setting);
67+
}
68+
},
6269
XOR_SIMPLE {
6370
@Override
6471
public Filter construct(long[] keys, int setting) {

src/main/java/org/fastfilter/bloom/BlockedBloomV2.java

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
import org.fastfilter.utils.Hash;
55

66
/**
7-
* A special kind of blocked Bloom filter. It sets 6 bits in 3 consecutive
8-
* 64-bit words, and exactly 2 bits per word. It is faster than a regular Bloom
9-
* filter, but needs slightly more space / has a slightly worse false positive
10-
* rate.
7+
* A special kind of blocked Bloom filter. It sets 2 to 4 (usually 4) bits in
8+
* two 64-bit words; 1 or 2 (usually 2) per word. It is faster than a regular
9+
* Bloom filter, but needs slightly more space / has a slightly worse false
10+
* positive rate.
1111
*/
1212
public class BlockedBloomV2 implements Filter {
1313

@@ -20,7 +20,7 @@ public static BlockedBloomV2 construct(long[] keys, int bitsPerKey) {
2020
return f;
2121
}
2222

23-
private final int blocks;
23+
private final int buckets;
2424
private final long seed;
2525
private final long[] data;
2626

@@ -29,11 +29,12 @@ public long getBitCount() {
2929
}
3030

3131
BlockedBloomV2(int entryCount, int bitsPerKey) {
32+
// bitsPerKey = 11;
3233
entryCount = Math.max(1, entryCount);
3334
this.seed = Hash.randomSeed();
3435
long bits = (long) entryCount * bitsPerKey;
35-
this.blocks = (int) bits / 64;
36-
data = new long[(int) (blocks + 8)];
36+
this.buckets = (int) bits / 64;
37+
data = new long[(int) (buckets + 16)];
3738
}
3839

3940
@Override
@@ -44,31 +45,24 @@ public boolean supportsAdd() {
4445
@Override
4546
public void add(long key) {
4647
long hash = Hash.hash64(key, seed);
47-
int start = Hash.reduce((int) hash, blocks);
48-
int a = (int) hash;
49-
int b = (int) (hash >>> 32);
50-
for (int i = 0; i < 3; i++) {
51-
a += b;
52-
data[start] |= (1L << a) | (1L << (a >> 8));
53-
start++;
54-
}
48+
int start = Hash.reduce((int) hash, buckets);
49+
hash = hash ^ Long.rotateLeft(hash, 32);
50+
long m1 = (1L << hash) | (1L << (hash >> 6));
51+
long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18));
52+
data[start] |= m1;
53+
data[start + 1 + (int) (hash >>> 60)] |= m2;
5554
}
5655

5756
@Override
5857
public boolean mayContain(long key) {
5958
long hash = Hash.hash64(key, seed);
60-
int start = Hash.reduce((int) hash, blocks);
61-
int a = (int) hash;
62-
int b = (int) (hash >>> 32);
63-
for (int i = 0; i < 3; i++) {
64-
a += b;
65-
long x = data[start];
66-
if (((x >> a) & (x >> (a >> 8)) & 1) == 0) {
67-
return false;
68-
}
69-
start++;
70-
}
71-
return true;
59+
int start = Hash.reduce((int) hash, buckets);
60+
hash = hash ^ Long.rotateLeft(hash, 32);
61+
long a = data[start];
62+
long b = data[start + 1 + (int) (hash >>> 60)];
63+
long m1 = (1L << hash) | (1L << (hash >> 6));
64+
long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18));
65+
return ((m1 & a) == m1) && ((m2 & b) == m2);
7266
}
7367

7468
}

src/main/java/org/fastfilter/bloom/SuccinctCountingBlockedBloom.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ public long cardinality() {
142142
}
143143
long sum = 0;
144144
for (int i = 0; i < data.capacity(); i++) {
145-
sum += data.get(i);
145+
sum += Long.bitCount(data.get(i));
146146
}
147147
for(long x : counts) {
148148
sum += Long.bitCount(x);

0 commit comments

Comments
 (0)