Skip to content

Commit 71f78bc

Browse files
change fixes to blocked bloom filters, fix OOBE bugs in cuckoo+, add simple fuzzer back, ignoring MPHF and GCS2
1 parent adc4d9b commit 71f78bc

File tree

9 files changed

+77
-19
lines changed

9 files changed

+77
-19
lines changed

fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public long getBitCount() {
3434
this.seed = Hash.randomSeed();
3535
long bits = (long) entryCount * bitsPerKey;
3636
this.buckets = (int) bits / 64;
37-
data = new long[(buckets + 16)];
37+
data = new long[buckets + 16 + 1];
3838
}
3939

4040
@Override
@@ -50,7 +50,7 @@ public void add(long key) {
5050
long m1 = (1L << hash) | (1L << (hash >> 6));
5151
long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18));
5252
data[start] |= m1;
53-
data[start + (int) (hash >>> 60)] |= m2;
53+
data[start + 1 + (int) (hash >>> 60)] |= m2;
5454
}
5555

5656
@Override
@@ -59,7 +59,7 @@ public boolean mayContain(long key) {
5959
int start = Hash.reduce((int) hash, buckets);
6060
hash = hash ^ Long.rotateLeft(hash, 32);
6161
long a = data[start];
62-
long b = data[start + (int) (hash >>> 60)];
62+
long b = data[start + 1 + (int) (hash >>> 60)];
6363
long m1 = (1L << hash) | (1L << (hash >> 6));
6464
long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18));
6565
return ((m1 & a) == m1) && ((m2 & b) == m2);

fastfilter/src/main/java/org/fastfilter/bloom/count/Select.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,8 @@ public class Select {
149149
* @return the position (0 for first bit, 63 for last)
150150
*/
151151
public static int selectInLong(long x, int n) {
152-
assert n < Long.bitCount(x);
152+
// TODO this adds bytecode weight which influence inlining decisions
153+
assert n < Long.bitCount(x): n + " >= " + Long.bitCount(x);
153154
// Phase 1: sums by byte
154155
long byteSums = x - ((x & 0xa * ONES_STEP_4) >>> 1);
155156
byteSums = (byteSums & 3 * ONES_STEP_4) +

fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ public long getBitCount() {
6161
this.seed = Hash.randomSeed();
6262
long bits = (long) entryCount * bitsPerKey;
6363
this.buckets = (int) bits / 64;
64-
int arrayLength = (buckets + 16);
64+
int arrayLength = buckets + 16 + 1;
6565
data = new long[arrayLength];
6666
counts = new long[arrayLength];
6767
overflow = new long[100 + arrayLength * 10 / 100];
@@ -87,7 +87,7 @@ public void add(long key) {
8787
if (a2 != a1) {
8888
increment(start, a2);
8989
}
90-
int second = start + (int) (hash >>> 60);
90+
int second = start + 1 + (int) (hash >>> 60);
9191
int a3 = (int) ((hash >> 12) & 63);
9292
int a4 = (int) ((hash >> 18) & 63);
9393
increment(second, a3);
@@ -112,7 +112,7 @@ public void remove(long key) {
112112
if (a2 != a1) {
113113
decrement(start, a2);
114114
}
115-
int second = start + (int) (hash >>> 60);
115+
int second = start + 1 + (int) (hash >>> 60);
116116
int a3 = (int) ((hash >> 12) & 63);
117117
int a4 = (int) ((hash >> 18) & 63);
118118
decrement(second, a3);
@@ -142,7 +142,7 @@ public boolean mayContain(long key) {
142142
int start = Hash.reduce((int) hash, buckets);
143143
hash = hash ^ Long.rotateLeft(hash, 32);
144144
long a = data[start];
145-
long b = data[start + (int) (hash >>> 60)];
145+
long b = data[start + 1 + (int) (hash >>> 60)];
146146
long m1 = (1L << hash) | (1L << (hash >> 6));
147147
long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18));
148148
return ((m1 & a) == m1) && ((m2 & b) == m2);

fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public long getBitCount() {
6363
this.seed = Hash.randomSeed();
6464
long bits = (long) entryCount * bitsPerKey;
6565
this.buckets = (int) bits / 64;
66-
int arrayLength = buckets + 16;
66+
int arrayLength = buckets + 16 + 1;
6767
data = new long[arrayLength];
6868
counts = new long[arrayLength];
6969
overflow = new long[100 + arrayLength * 10 / 100];
@@ -89,7 +89,7 @@ public void add(long key) {
8989
if (a2 != a1) {
9090
increment(start, a2);
9191
}
92-
int second = start + (int) (hash >>> 60);
92+
int second = start + 1 + (int) (hash >>> 60);
9393
int a3 = (int) ((hash >> 12) & 63);
9494
int a4 = (int) ((hash >> 18) & 63);
9595
increment(second, a3);
@@ -114,7 +114,7 @@ public void remove(long key) {
114114
if (a2 != a1) {
115115
decrement(start, a2);
116116
}
117-
int second = start + (int) (hash >>> 60);
117+
int second = start + 1 + (int) (hash >>> 60);
118118
int a3 = (int) ((hash >> 12) & 63);
119119
int a4 = (int) ((hash >> 18) & 63);
120120
decrement(second, a3);
@@ -144,7 +144,7 @@ public boolean mayContain(long key) {
144144
int start = Hash.reduce((int) hash, buckets);
145145
hash = hash ^ Long.rotateLeft(hash, 32);
146146
long a = data[start];
147-
long b = data[start + (int) (hash >>> 60)];
147+
long b = data[start + 1 + (int) (hash >>> 60)];
148148
long m1 = (1L << hash) | (1L << (hash >> 6));
149149
long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18));
150150
return ((m1 & a) == m1) && ((m2 & b) == m2);

fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus16.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ public static CuckooPlus16 construct(long[] keys) {
4242
public CuckooPlus16(int capacity) {
4343
// bucketCount needs to be even for bucket2 to work
4444
bucketCount = (int) Math.ceil((double) capacity) / 2 * 2;
45-
this.data = new short[bucketCount + 1];
45+
this.data = new short[bucketCount + 2];
4646
this.seed = Hash.randomSeed();
4747
}
4848

fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus8.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ public static CuckooPlus8 construct(long[] keys) {
4242
public CuckooPlus8(int capacity) {
4343
// bucketCount needs to be even for bucket2 to work
4444
bucketCount = (int) Math.ceil((double) capacity) / 2 * 2;
45-
this.data = new byte[bucketCount + 1];
45+
this.data = new byte[bucketCount + 2];
4646
this.seed = Hash.randomSeed();
4747
}
4848

fastfilter/src/test/java/org/fastfilter/RegressionTests.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,31 @@ public class RegressionTests {
1414

1515
@Parameterized.Parameters(name = "{0}/{1}/*")
1616
public static Object[][] regressionCases() {
17-
return new Object[][] {
17+
return new Object[][]{
1818
{BLOCKED_BLOOM, 872153271794238865L, new long[]{1, 2, 3}},
1919
{SUCCINCT_COUNTING_BLOCKED_BLOOM_RANKED, -401700599714690558L, new long[]{1, 2, 3}},
2020
{SUCCINCT_COUNTING_BLOCKED_BLOOM, 6049486880293779298L, new long[]{1, 2, 3}},
21+
{SUCCINCT_COUNTING_BLOCKED_BLOOM, 353772444652436712L, new long[]{5828366214313827392L, -8467365400393984494L, -424469057572555653L}},
2122
// actual this one is impossible to reproduce because of the volatile seed
2223
{XOR_SIMPLE, 6831634639270950343L, new long[]{1, 2, 3}},
2324
{CUCKOO_8, 6335419348330489927L, new long[]{1, 2, 3}},
24-
{CUCKOO_16, -9087718164446355442L, new long[]{1, 2, 3}}
25-
};
25+
{CUCKOO_16, -9087718164446355442L, new long[]{1, 2, 3}},
26+
{CUCKOO_PLUS_8, -4031187722136552688L, new long[]{2173645522219008926L, 589862361776609381L, -1776331367981897399L, -7505626095864333717L, 6968992741301426055L, -3110009760358584538L,
27+
4126573288832158972L, -7561361506777543806L, -5363365907738450196L, 4406554949060325754L, 6610203208080690753L, 3455015316204788042L, 7863420196911575708L, 1875128261287193281L,
28+
6163360156169844663L, -24248169001003216L, -62326545792238735L, 5810209567031734221L, -2543215903193150719L, 8066741310405890113L, -1700763885488699715L, 331022494986758365L,
29+
6921011948518481376L, -4135401271689018905L, -3648707841443156724L, 8304743068009082509L, -6681730404693737112L, 1427756985322103926L, 7726889622988885916L, 4123575358133211499L,
30+
4537462330215573723L, 9078573934276235401L, 32187183317483562L, -1841847540329070596L, -8420216857639877248L, -8421265231581213825L, -8233517952154774510L, -4678911007264536715L,
31+
-8526674353687284449L, -27365118851637401L, -254145228777582712L, 2965855027055207977L, -3466341725845433998L, 7006973965168506949L, -3585814173337365788L, 7264252236018528601L,
32+
4058857911179366207L, 561654263008010300L, 2389635521107751132L, 7314182055688934933L, 5884448457819665732L, -7686492008813074402L, 298658331691777464L, -5830719925234073017L,
33+
-6985871982812486035L, -4355730107235544811L, -6914420638144647786L, 7092124037956934799L, 5352744066168866120L, 4081227363605418964L, 2175125725804301191L, -5792740580295507772L,
34+
-6183692349471335223L, -1221949547344177675L, -8340921677695714065L, 6519388252075884491L, -4726807568999917298L, 2930512993631049657L, -7721504975700326069L, -8479276039617916927L,
35+
-2112370952694584366L, -9059529185598491289L, -6189590607337131826L, -5949793064086556159L, 1557391959671056410L, 4107630139293131578L, 4738411557430294180L, -3606951019798437215L,
36+
-1742301458061239008L, -7389522306890543715L, 3726370125210336256L, -2051912870295294004L, -7639673055712206584L, -2767802468218389090L, 3131241789318669061L, -8316329307438505860L,
37+
-4007166641668927959L, -6102930542977036947L, 7088919565484666773L, -3593550123383986925L, 6613817918373076399L, -7596314495989542882L, -5059595045899697395L, -547306193171270722L,
38+
8660029473572898552L, -7731225535097214079L, 2058313776967259523L, 2964665398310080884L, 6291785408569188246L, -329774438524923459L, -5664134174314856593L, -5756681006397171776L, 6223635625117218437L}}
39+
}
40+
41+
;
2642
}
2743

2844
private final FilterType type;
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package org.fastfilter;
2+
3+
import org.fastfilter.utils.Hash;
4+
5+
import java.util.Arrays;
6+
import java.util.EnumSet;
7+
import java.util.concurrent.ThreadLocalRandom;
8+
import java.util.stream.LongStream;
9+
10+
import static junit.framework.TestCase.assertTrue;
11+
12+
public class SimpleFuzzer {
13+
14+
// implementations with bugs which may not be worth fixing
15+
private static final EnumSet<FilterType> IGNORED = EnumSet.of(FilterType.GCS2, FilterType.MPHF);
16+
17+
public static void main(String... args) {
18+
long seed = 0;
19+
for (int keyLength = 3; keyLength < 1_000_000; keyLength += 100) {
20+
long[] keys = LongStream.range(0, keyLength).map(i -> ThreadLocalRandom.current().nextLong()).toArray();
21+
for (FilterType type : FilterType.values()) {
22+
if (IGNORED.contains(type)) {
23+
continue;
24+
}
25+
try {
26+
for (int i = 0; i < 1_000; ++i) {
27+
seed = ThreadLocalRandom.current().nextLong();
28+
Hash.setSeed(seed);
29+
Filter filter = type.construct(keys, 8);
30+
for (long key : keys) {
31+
assertTrue(seed + "/" + type + "/" + Arrays.toString(keys), filter.mayContain(key));
32+
}
33+
}
34+
} catch (Exception e) {
35+
System.out.println(seed + "/" + type + "/" + Arrays.toString(keys));
36+
throw e;
37+
}
38+
}
39+
}
40+
}
41+
}

fastfilter/src/test/java/org/fastfilter/TestAllFilters.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ public static void main(String... args) {
9090
for (int size = 1_000_000; size <= 10_000_000; size *= 10) {
9191
System.out.println("size " + size);
9292
for (int test = 0; test < 10; test++) {
93-
// test(FilterType.BLOOM, size, test, true);
94-
// test(FilterType.BLOCKED_BLOOM, size, test, true);
93+
test(FilterType.BLOOM, size, test, true);
94+
test(FilterType.BLOCKED_BLOOM, size, test, true);
9595
test(FilterType.COUNTING_BLOOM, size, test, true);
9696
test(FilterType.SUCCINCT_COUNTING_BLOOM, size, test, true);
9797
test(FilterType.SUCCINCT_COUNTING_BLOOM_RANKED, size, test, true);

0 commit comments

Comments
 (0)