44import org .fastfilter .utils .Hash ;
55
66/**
7- * A blocked Bloom filter. Compared to a regular Bloom filter, it is little bit
8- * faster, but needs more space. Not that useful beyond about 20 bits per key,
9- * as fpp doesn't decreased further.
7+ * A special kind of blocked Bloom filter. It sets 2 to 4 (usually 4) bits in
8+ * two 64-bit words; 1 or 2 (usually 2) per word. It is faster than a regular
9+ * Bloom filter, but needs slightly more space / has a slightly worse false
10+ * positive rate.
1011 */
1112public class BlockedBloom implements Filter {
1213
13- // TODO not cache line aligned
14-
15- // Should match the size of a cache line
16- private static final int BITS_PER_BLOCK = 64 * 8 ;
17- private static final int LONGS_PER_BLOCK = BITS_PER_BLOCK / 64 ;
18- private static final int BLOCK_MASK = BITS_PER_BLOCK - 1 ;
19-
2014 public static BlockedBloom construct (long [] keys , int bitsPerKey ) {
2115 long n = keys .length ;
22- long m = n * bitsPerKey ;
23- int k = getBestK (m , n );
24- BlockedBloom f = new BlockedBloom ((int ) n , bitsPerKey , k );
16+ BlockedBloom f = new BlockedBloom ((int ) n , bitsPerKey );
2517 for (long x : keys ) {
2618 f .add (x );
2719 }
2820 return f ;
2921 }
3022
31- private static int getBestK (long m , long n ) {
32- return Math .max (1 , (int ) Math .round ((double ) m / n * Math .log (2 )));
33- }
34-
35- private final int k ;
36- private final int blocks ;
23+ private final int buckets ;
3724 private final long seed ;
3825 private final long [] data ;
3926
4027 public long getBitCount () {
4128 return data .length * 64L ;
4229 }
4330
44- BlockedBloom (int entryCount , int bitsPerKey , int k ) {
31+ BlockedBloom (int entryCount , int bitsPerKey ) {
32+ // bitsPerKey = 11;
4533 entryCount = Math .max (1 , entryCount );
46- this .k = k ;
4734 this .seed = Hash .randomSeed ();
4835 long bits = (long ) entryCount * bitsPerKey ;
49- this .blocks = (int ) ( bits + BITS_PER_BLOCK - 1 ) / BITS_PER_BLOCK ;
50- data = new long [(int ) (blocks * LONGS_PER_BLOCK ) + 8 ];
36+ this .buckets = (int ) bits / 64 ;
37+ data = new long [(int ) (buckets + 16 ) ];
5138 }
5239
5340 @ Override
@@ -58,32 +45,24 @@ public boolean supportsAdd() {
5845 @ Override
5946 public void add (long key ) {
6047 long hash = Hash .hash64 (key , seed );
61- int start = Hash .reduce ((int ) hash , blocks ) * LONGS_PER_BLOCK ;
62- int a = (int ) hash ;
63- int b = (int ) (hash >>> 32 );
64- for (int i = 0 ; i < k ; i ++) {
65- data [start + ((a & BLOCK_MASK ) >>> 6 )] |= getBit (a );
66- a += b ;
67- }
48+ int start = Hash .reduce ((int ) hash , buckets );
49+ hash = hash ^ Long .rotateLeft (hash , 32 );
50+ long m1 = (1L << hash ) | (1L << (hash >> 6 ));
51+ long m2 = (1L << (hash >> 12 )) | (1L << (hash >> 18 ));
52+ data [start ] |= m1 ;
53+ data [start + 1 + (int ) (hash >>> 60 )] |= m2 ;
6854 }
6955
7056 @ Override
7157 public boolean mayContain (long key ) {
7258 long hash = Hash .hash64 (key , seed );
73- int start = Hash .reduce ((int ) hash , blocks ) * LONGS_PER_BLOCK ;
74- int a = (int ) hash ;
75- int b = (int ) (hash >>> 32 );
76- for (int i = 0 ; i < k ; i ++) {
77- if ((data [start + ((a & BLOCK_MASK ) >>> 6 )] & getBit (a )) == 0 ) {
78- return false ;
79- }
80- a += b ;
81- }
82- return true ;
83- }
84-
85- private static long getBit (int index ) {
86- return 1L << index ;
59+ int start = Hash .reduce ((int ) hash , buckets );
60+ hash = hash ^ Long .rotateLeft (hash , 32 );
61+ long a = data [start ];
62+ long b = data [start + 1 + (int ) (hash >>> 60 )];
63+ long m1 = (1L << hash ) | (1L << (hash >> 6 ));
64+ long m2 = (1L << (hash >> 12 )) | (1L << (hash >> 18 ));
65+ return ((m1 & a ) == m1 ) && ((m2 & b ) == m2 );
8766 }
8867
8968}
0 commit comments