Skip to content

Commit 27bc546

Browse files
committed
Add a new type of blocked Bloom filter (about twice as fast as the regular Bloom filter)
1 parent d7cb21f commit 27bc546

File tree

2 files changed

+82
-1
lines changed

2 files changed

+82
-1
lines changed

src/main/java/org/fastfilter/FilterType.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
package org.fastfilter;
22

33
import org.fastfilter.bloom.BlockedBloom;
4-
import org.fastfilter.bloom.SuccinctCountingBlockedBloom;
4+
import org.fastfilter.bloom.BlockedBloomV2;
55
import org.fastfilter.bloom.Bloom;
66
import org.fastfilter.bloom.CountingBloom;
7+
import org.fastfilter.bloom.SuccinctCountingBlockedBloom;
78
import org.fastfilter.bloom.SuccinctCountingBloom;
89
import org.fastfilter.cuckoo.Cuckoo16;
910
import org.fastfilter.cuckoo.Cuckoo8;
@@ -46,6 +47,12 @@ public Filter construct(long[] keys, int setting) {
4647
return BlockedBloom.construct(keys, setting);
4748
}
4849
},
50+
BLOCKED_BLOOM_V2 {
51+
@Override
52+
public Filter construct(long[] keys, int setting) {
53+
return BlockedBloomV2.construct(keys, setting);
54+
}
55+
},
4956
SUCCINCT_COUNTING_BLOCKED_BLOOM {
5057
@Override
5158
public Filter construct(long[] keys, int setting) {
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
package org.fastfilter.bloom;
2+
3+
import org.fastfilter.Filter;
4+
import org.fastfilter.utils.Hash;
5+
6+
/**
7+
* A special kind of blocked Bloom filter. It sets 6 bits in 3 consecutive
8+
* 64-bit words, and exactly 2 bits per word. It is faster than a regular Bloom
9+
* filter, but needs slightly more space / has a slightly worse false positive
10+
* rate.
11+
*/
12+
public class BlockedBloomV2 implements Filter {
13+
14+
public static BlockedBloomV2 construct(long[] keys, int bitsPerKey) {
15+
long n = keys.length;
16+
BlockedBloomV2 f = new BlockedBloomV2((int) n, bitsPerKey);
17+
for(long x : keys) {
18+
f.add(x);
19+
}
20+
return f;
21+
}
22+
23+
private final int blocks;
24+
private final long seed;
25+
private final long[] data;
26+
27+
public long getBitCount() {
28+
return data.length * 64L;
29+
}
30+
31+
BlockedBloomV2(int entryCount, int bitsPerKey) {
32+
entryCount = Math.max(1, entryCount);
33+
this.seed = Hash.randomSeed();
34+
long bits = (long) entryCount * bitsPerKey;
35+
this.blocks = (int) bits / 64;
36+
data = new long[(int) (blocks + 8)];
37+
}
38+
39+
@Override
40+
public boolean supportsAdd() {
41+
return true;
42+
}
43+
44+
@Override
45+
public void add(long key) {
46+
long hash = Hash.hash64(key, seed);
47+
int start = Hash.reduce((int) hash, blocks);
48+
int a = (int) hash;
49+
int b = (int) (hash >>> 32);
50+
for (int i = 0; i < 3; i++) {
51+
a += b;
52+
data[start] |= (1L << a) | (1L << (a >> 8));
53+
start++;
54+
}
55+
}
56+
57+
@Override
58+
public boolean mayContain(long key) {
59+
long hash = Hash.hash64(key, seed);
60+
int start = Hash.reduce((int) hash, blocks);
61+
int a = (int) hash;
62+
int b = (int) (hash >>> 32);
63+
for (int i = 0; i < 3; i++) {
64+
a += b;
65+
long x = data[start];
66+
if (((x >> a) & (x >> (a >> 8)) & 1) == 0) {
67+
return false;
68+
}
69+
start++;
70+
}
71+
return true;
72+
}
73+
74+
}

0 commit comments

Comments
 (0)