44import org .fastfilter .utils .Hash ;
55
66/**
7- * A special kind of blocked Bloom filter. It sets 6 bits in 3 consecutive
8- * 64-bit words, and exactly 2 bits per word. It is faster than a regular Bloom
9- * filter, but needs slightly more space / has a slightly worse false positive
10- * rate.
7+ * A special kind of blocked Bloom filter. It sets 2 to 4 (usually 4) bits in
8+ * two 64-bit words; 1 or 2 (usually 2) per word. It is faster than a regular
9+ * Bloom filter, but needs slightly more space / has a slightly worse false
10+ * positive rate.
1111 */
1212public class BlockedBloomV2 implements Filter {
1313
@@ -20,7 +20,7 @@ public static BlockedBloomV2 construct(long[] keys, int bitsPerKey) {
2020 return f ;
2121 }
2222
23- private final int blocks ;
23+ private final int buckets ;
2424 private final long seed ;
2525 private final long [] data ;
2626
@@ -29,11 +29,12 @@ public long getBitCount() {
2929 }
3030
3131 BlockedBloomV2 (int entryCount , int bitsPerKey ) {
32+ // bitsPerKey = 11;
3233 entryCount = Math .max (1 , entryCount );
3334 this .seed = Hash .randomSeed ();
3435 long bits = (long ) entryCount * bitsPerKey ;
35- this .blocks = (int ) bits / 64 ;
36- data = new long [(int ) (blocks + 8 )];
36+ this .buckets = (int ) bits / 64 ;
37+ data = new long [(int ) (buckets + 16 )];
3738 }
3839
3940 @ Override
@@ -44,31 +45,24 @@ public boolean supportsAdd() {
4445 @ Override
4546 public void add (long key ) {
4647 long hash = Hash .hash64 (key , seed );
47- int start = Hash .reduce ((int ) hash , blocks );
48- int a = (int ) hash ;
49- int b = (int ) (hash >>> 32 );
50- for (int i = 0 ; i < 3 ; i ++) {
51- a += b ;
52- data [start ] |= (1L << a ) | (1L << (a >> 8 ));
53- start ++;
54- }
48+ int start = Hash .reduce ((int ) hash , buckets );
49+ hash = hash ^ Long .rotateLeft (hash , 32 );
50+ long m1 = (1L << hash ) | (1L << (hash >> 6 ));
51+ long m2 = (1L << (hash >> 12 )) | (1L << (hash >> 18 ));
52+ data [start ] |= m1 ;
53+ data [start + 1 + (int ) (hash >>> 60 )] |= m2 ;
5554 }
5655
5756 @ Override
5857 public boolean mayContain (long key ) {
5958 long hash = Hash .hash64 (key , seed );
60- int start = Hash .reduce ((int ) hash , blocks );
61- int a = (int ) hash ;
62- int b = (int ) (hash >>> 32 );
63- for (int i = 0 ; i < 3 ; i ++) {
64- a += b ;
65- long x = data [start ];
66- if (((x >> a ) & (x >> (a >> 8 )) & 1 ) == 0 ) {
67- return false ;
68- }
69- start ++;
70- }
71- return true ;
59+ int start = Hash .reduce ((int ) hash , buckets );
60+ hash = hash ^ Long .rotateLeft (hash , 32 );
61+ long a = data [start ];
62+ long b = data [start + 1 + (int ) (hash >>> 60 )];
63+ long m1 = (1L << hash ) | (1L << (hash >> 6 ));
64+ long m2 = (1L << (hash >> 12 )) | (1L << (hash >> 18 ));
65+ return ((m1 & a ) == m1 ) && ((m2 & b ) == m2 );
7266 }
7367
7468}
0 commit comments