|
| 1 | +package org.fastfilter.bloom; |
| 2 | + |
| 3 | +import org.fastfilter.Filter; |
| 4 | +import org.fastfilter.utils.Hash; |
| 5 | + |
| 6 | +/** |
| 7 | + * A special kind of blocked Bloom filter. It sets 6 bits in 3 consecutive |
| 8 | + * 64-bit words, and exactly 2 bits per word. It is faster than a regular Bloom |
| 9 | + * filter, but needs slightly more space / has a slightly worse false positive |
| 10 | + * rate. |
| 11 | + */ |
| 12 | +public class BlockedBloomV2 implements Filter { |
| 13 | + |
| 14 | + public static BlockedBloomV2 construct(long[] keys, int bitsPerKey) { |
| 15 | + long n = keys.length; |
| 16 | + BlockedBloomV2 f = new BlockedBloomV2((int) n, bitsPerKey); |
| 17 | + for(long x : keys) { |
| 18 | + f.add(x); |
| 19 | + } |
| 20 | + return f; |
| 21 | + } |
| 22 | + |
| 23 | + private final int blocks; |
| 24 | + private final long seed; |
| 25 | + private final long[] data; |
| 26 | + |
| 27 | + public long getBitCount() { |
| 28 | + return data.length * 64L; |
| 29 | + } |
| 30 | + |
| 31 | + BlockedBloomV2(int entryCount, int bitsPerKey) { |
| 32 | + entryCount = Math.max(1, entryCount); |
| 33 | + this.seed = Hash.randomSeed(); |
| 34 | + long bits = (long) entryCount * bitsPerKey; |
| 35 | + this.blocks = (int) bits / 64; |
| 36 | + data = new long[(int) (blocks + 8)]; |
| 37 | + } |
| 38 | + |
| 39 | + @Override |
| 40 | + public boolean supportsAdd() { |
| 41 | + return true; |
| 42 | + } |
| 43 | + |
| 44 | + @Override |
| 45 | + public void add(long key) { |
| 46 | + long hash = Hash.hash64(key, seed); |
| 47 | + int start = Hash.reduce((int) hash, blocks); |
| 48 | + int a = (int) hash; |
| 49 | + int b = (int) (hash >>> 32); |
| 50 | + for (int i = 0; i < 3; i++) { |
| 51 | + a += b; |
| 52 | + data[start] |= (1L << a) | (1L << (a >> 8)); |
| 53 | + start++; |
| 54 | + } |
| 55 | + } |
| 56 | + |
| 57 | + @Override |
| 58 | + public boolean mayContain(long key) { |
| 59 | + long hash = Hash.hash64(key, seed); |
| 60 | + int start = Hash.reduce((int) hash, blocks); |
| 61 | + int a = (int) hash; |
| 62 | + int b = (int) (hash >>> 32); |
| 63 | + for (int i = 0; i < 3; i++) { |
| 64 | + a += b; |
| 65 | + long x = data[start]; |
| 66 | + if (((x >> a) & (x >> (a >> 8)) & 1) == 0) { |
| 67 | + return false; |
| 68 | + } |
| 69 | + start++; |
| 70 | + } |
| 71 | + return true; |
| 72 | + } |
| 73 | + |
| 74 | +} |
0 commit comments