Skip to content

Commit 534822f

Browse files
committed
Rename things and improved analysis tools
1 parent bf0b022 commit 534822f

14 files changed

+824
-488
lines changed

src/main/java/org/fastfilter/FilterType.java

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
package org.fastfilter;
22

33
import org.fastfilter.bloom.BlockedBloom;
4-
import org.fastfilter.bloom.BlockedBloomV2;
54
import org.fastfilter.bloom.Bloom;
6-
import org.fastfilter.bloom.CountingBloom;
7-
import org.fastfilter.bloom.SuccinctCountingBlockedBloom;
8-
import org.fastfilter.bloom.SuccinctCountingBlockedBloomRankedV2;
9-
import org.fastfilter.bloom.SuccinctCountingBlockedBloomV2;
10-
import org.fastfilter.bloom.SuccinctCountingBloom;
5+
import org.fastfilter.bloom.count.CountingBloom;
6+
import org.fastfilter.bloom.count.SuccinctCountingBlockedBloom;
7+
import org.fastfilter.bloom.count.SuccinctCountingBlockedBloomRanked;
8+
import org.fastfilter.bloom.count.SuccinctCountingBloom;
9+
import org.fastfilter.bloom.count.SuccinctCountingBloomRanked;
1110
import org.fastfilter.cuckoo.Cuckoo16;
1211
import org.fastfilter.cuckoo.Cuckoo8;
1312
import org.fastfilter.cuckoo.CuckooPlus16;
@@ -43,16 +42,16 @@ public Filter construct(long[] keys, int setting) {
4342
return SuccinctCountingBloom.construct(keys, setting);
4443
}
4544
},
46-
BLOCKED_BLOOM {
45+
SUCCINCT_COUNTING_BLOOM_RANKED {
4746
@Override
4847
public Filter construct(long[] keys, int setting) {
49-
return BlockedBloom.construct(keys, setting);
48+
return SuccinctCountingBloomRanked.construct(keys, setting);
5049
}
5150
},
52-
BLOCKED_BLOOM_V2 {
51+
BLOCKED_BLOOM {
5352
@Override
5453
public Filter construct(long[] keys, int setting) {
55-
return BlockedBloomV2.construct(keys, setting);
54+
return BlockedBloom.construct(keys, setting);
5655
}
5756
},
5857
SUCCINCT_COUNTING_BLOCKED_BLOOM {
@@ -61,16 +60,10 @@ public Filter construct(long[] keys, int setting) {
6160
return SuccinctCountingBlockedBloom.construct(keys, setting);
6261
}
6362
},
64-
SUCCINCT_COUNTING_BLOCKED_BLOOM_V2 {
65-
@Override
66-
public Filter construct(long[] keys, int setting) {
67-
return SuccinctCountingBlockedBloomV2.construct(keys, setting);
68-
}
69-
},
70-
SUCCINCT_COUNTING_BLOCKED_BLOOM_RANKED_V2 {
63+
SUCCINCT_COUNTING_BLOCKED_BLOOM_RANKED {
7164
@Override
7265
public Filter construct(long[] keys, int setting) {
73-
return SuccinctCountingBlockedBloomRankedV2.construct(keys, setting);
66+
return SuccinctCountingBlockedBloomRanked.construct(keys, setting);
7467
}
7568
},
7669
XOR_SIMPLE {

src/main/java/org/fastfilter/bloom/BlockedBloom.java

Lines changed: 23 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -4,50 +4,37 @@
44
import org.fastfilter.utils.Hash;
55

66
/**
7-
* A blocked Bloom filter. Compared to a regular Bloom filter, it is little bit
8-
* faster, but needs more space. Not that useful beyond about 20 bits per key,
9-
* as fpp doesn't decreased further.
7+
* A special kind of blocked Bloom filter. It sets 2 to 4 (usually 4) bits in
8+
* two 64-bit words; 1 or 2 (usually 2) per word. It is faster than a regular
9+
* Bloom filter, but needs slightly more space / has a slightly worse false
10+
* positive rate.
1011
*/
1112
public class BlockedBloom implements Filter {
1213

13-
// TODO not cache line aligned
14-
15-
// Should match the size of a cache line
16-
private static final int BITS_PER_BLOCK = 64 * 8;
17-
private static final int LONGS_PER_BLOCK = BITS_PER_BLOCK / 64;
18-
private static final int BLOCK_MASK = BITS_PER_BLOCK - 1;
19-
2014
public static BlockedBloom construct(long[] keys, int bitsPerKey) {
2115
long n = keys.length;
22-
long m = n * bitsPerKey;
23-
int k = getBestK(m, n);
24-
BlockedBloom f = new BlockedBloom((int) n, bitsPerKey, k);
16+
BlockedBloom f = new BlockedBloom((int) n, bitsPerKey);
2517
for(long x : keys) {
2618
f.add(x);
2719
}
2820
return f;
2921
}
3022

31-
private static int getBestK(long m, long n) {
32-
return Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
33-
}
34-
35-
private final int k;
36-
private final int blocks;
23+
private final int buckets;
3724
private final long seed;
3825
private final long[] data;
3926

4027
public long getBitCount() {
4128
return data.length * 64L;
4229
}
4330

44-
BlockedBloom(int entryCount, int bitsPerKey, int k) {
31+
BlockedBloom(int entryCount, int bitsPerKey) {
32+
// bitsPerKey = 11;
4533
entryCount = Math.max(1, entryCount);
46-
this.k = k;
4734
this.seed = Hash.randomSeed();
4835
long bits = (long) entryCount * bitsPerKey;
49-
this.blocks = (int) (bits + BITS_PER_BLOCK - 1) / BITS_PER_BLOCK;
50-
data = new long[(int) (blocks * LONGS_PER_BLOCK) + 8];
36+
this.buckets = (int) bits / 64;
37+
data = new long[(int) (buckets + 16)];
5138
}
5239

5340
@Override
@@ -58,32 +45,24 @@ public boolean supportsAdd() {
5845
@Override
5946
public void add(long key) {
6047
long hash = Hash.hash64(key, seed);
61-
int start = Hash.reduce((int) hash, blocks) * LONGS_PER_BLOCK;
62-
int a = (int) hash;
63-
int b = (int) (hash >>> 32);
64-
for (int i = 0; i < k; i++) {
65-
data[start + ((a & BLOCK_MASK) >>> 6)] |= getBit(a);
66-
a += b;
67-
}
48+
int start = Hash.reduce((int) hash, buckets);
49+
hash = hash ^ Long.rotateLeft(hash, 32);
50+
long m1 = (1L << hash) | (1L << (hash >> 6));
51+
long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18));
52+
data[start] |= m1;
53+
data[start + 1 + (int) (hash >>> 60)] |= m2;
6854
}
6955

7056
@Override
7157
public boolean mayContain(long key) {
7258
long hash = Hash.hash64(key, seed);
73-
int start = Hash.reduce((int) hash, blocks) * LONGS_PER_BLOCK;
74-
int a = (int) hash;
75-
int b = (int) (hash >>> 32);
76-
for (int i = 0; i < k; i++) {
77-
if ((data[start + ((a & BLOCK_MASK) >>> 6)] & getBit(a)) == 0) {
78-
return false;
79-
}
80-
a += b;
81-
}
82-
return true;
83-
}
84-
85-
private static long getBit(int index) {
86-
return 1L << index;
59+
int start = Hash.reduce((int) hash, buckets);
60+
hash = hash ^ Long.rotateLeft(hash, 32);
61+
long a = data[start];
62+
long b = data[start + 1 + (int) (hash >>> 60)];
63+
long m1 = (1L << hash) | (1L << (hash >> 6));
64+
long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18));
65+
return ((m1 & a) == m1) && ((m2 & b) == m2);
8766
}
8867

8968
}

src/main/java/org/fastfilter/bloom/BlockedBloomV2.java

Lines changed: 0 additions & 68 deletions
This file was deleted.

0 commit comments

Comments
 (0)