Skip to content

Commit 9b6c6d8

Browse files
Merge pull request #14 from richardstartin/blocked-bloom-bug
fix array OOBE in blocked bloom filter when top 4 bits of hash are se…
2 parents 50ebb06 + 5100ef9 commit 9b6c6d8

File tree

11 files changed

+128
-12
lines changed

11 files changed

+128
-12
lines changed

fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
public class BlockedBloom implements Filter {
1313

1414
public static BlockedBloom construct(long[] keys, int bitsPerKey) {
15-
long n = keys.length;
16-
BlockedBloom f = new BlockedBloom((int) n, bitsPerKey);
15+
int n = keys.length;
16+
BlockedBloom f = new BlockedBloom(n, bitsPerKey);
1717
for(long x : keys) {
1818
f.add(x);
1919
}
@@ -34,7 +34,7 @@ public long getBitCount() {
3434
this.seed = Hash.randomSeed();
3535
long bits = (long) entryCount * bitsPerKey;
3636
this.buckets = (int) bits / 64;
37-
data = new long[(int) (buckets + 16)];
37+
data = new long[buckets + 16 + 1];
3838
}
3939

4040
@Override

fastfilter/src/main/java/org/fastfilter/bloom/count/Select.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,8 @@ public class Select {
149149
* @return the position (0 for first bit, 63 for last)
150150
*/
151151
public static int selectInLong(long x, int n) {
152-
assert n < Long.bitCount(x);
152+
// TODO this adds bytecode weight which influence inlining decisions
153+
assert n < Long.bitCount(x): n + " >= " + Long.bitCount(x);
153154
// Phase 1: sums by byte
154155
long byteSums = x - ((x & 0xa * ONES_STEP_4) >>> 1);
155156
byteSums = (byteSums & 3 * ONES_STEP_4) +

fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ public long getBitCount() {
6161
this.seed = Hash.randomSeed();
6262
long bits = (long) entryCount * bitsPerKey;
6363
this.buckets = (int) bits / 64;
64-
int arrayLength = (int) (buckets + 16);
64+
int arrayLength = buckets + 16 + 1;
6565
data = new long[arrayLength];
6666
counts = new long[arrayLength];
6767
overflow = new long[100 + arrayLength * 10 / 100];

fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public long getBitCount() {
6363
this.seed = Hash.randomSeed();
6464
long bits = (long) entryCount * bitsPerKey;
6565
this.buckets = (int) bits / 64;
66-
int arrayLength = buckets + 16;
66+
int arrayLength = buckets + 16 + 1;
6767
data = new long[arrayLength];
6868
counts = new long[arrayLength];
6969
overflow = new long[100 + arrayLength * 10 / 100];

fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo16.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public static Cuckoo16 construct(long[] keys) {
3939

4040
public Cuckoo16(int capacity) {
4141
// bucketCount needs to be even for bucket2 to work
42-
bucketCount = (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2;
42+
bucketCount = Math.max(1, (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2);
4343
this.data = new long[bucketCount];
4444
this.seed = Hash.randomSeed();
4545
}

fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo8.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public static Cuckoo8 construct(long[] keys) {
3939

4040
public Cuckoo8(int capacity) {
4141
// bucketCount needs to be even for bucket2 to work
42-
bucketCount = (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2;
42+
bucketCount = Math.max(1, (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2);
4343
this.data = new int[bucketCount];
4444
this.seed = Hash.randomSeed();
4545
}

fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus16.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ public static CuckooPlus16 construct(long[] keys) {
4242
public CuckooPlus16(int capacity) {
4343
// bucketCount needs to be even for bucket2 to work
4444
bucketCount = (int) Math.ceil((double) capacity) / 2 * 2;
45-
this.data = new short[bucketCount + 1];
45+
this.data = new short[bucketCount + 2];
4646
this.seed = Hash.randomSeed();
4747
}
4848

fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus8.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ public static CuckooPlus8 construct(long[] keys) {
4242
public CuckooPlus8(int capacity) {
4343
// bucketCount needs to be even for bucket2 to work
4444
bucketCount = (int) Math.ceil((double) capacity) / 2 * 2;
45-
this.data = new byte[bucketCount + 1];
45+
this.data = new byte[bucketCount + 2];
4646
this.seed = Hash.randomSeed();
4747
}
4848

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package org.fastfilter;
2+
3+
import org.fastfilter.utils.Hash;
4+
import org.junit.Test;
5+
import org.junit.runner.RunWith;
6+
import org.junit.runners.Parameterized;
7+
8+
import static org.fastfilter.FilterType.*;
9+
import static org.junit.Assert.assertTrue;
10+
11+
@RunWith(Parameterized.class)
12+
public class RegressionTests {
13+
14+
15+
@Parameterized.Parameters(name = "{0}/seed={1}/{3} bits per key")
16+
public static Object[][] regressionCases() {
17+
return new Object[][]{
18+
{BLOCKED_BLOOM, 872153271794238865L, new long[]{1, 2, 3}, 8},
19+
{SUCCINCT_COUNTING_BLOCKED_BLOOM_RANKED, -401700599714690558L, new long[]{1, 2, 3}, 8},
20+
{SUCCINCT_COUNTING_BLOCKED_BLOOM, 6049486880293779298L, new long[]{1, 2, 3}, 8},
21+
{SUCCINCT_COUNTING_BLOCKED_BLOOM, 353772444652436712L, new long[]{5828366214313827392L, -8467365400393984494L, -424469057572555653L}, 8},
22+
// actually this one is impossible to reproduce because of the volatile seed
23+
{XOR_SIMPLE, 6831634639270950343L, new long[]{1, 2, 3}, 8},
24+
{CUCKOO_8, 6335419348330489927L, new long[]{1, 2, 3}, 8},
25+
{CUCKOO_16, -9087718164446355442L, new long[]{1, 2, 3}, 8},
26+
{CUCKOO_PLUS_8, -4031187722136552688L, new long[]{2173645522219008926L, 589862361776609381L, -1776331367981897399L, -7505626095864333717L, 6968992741301426055L, -3110009760358584538L,
27+
4126573288832158972L, -7561361506777543806L, -5363365907738450196L, 4406554949060325754L, 6610203208080690753L, 3455015316204788042L, 7863420196911575708L, 1875128261287193281L,
28+
6163360156169844663L, -24248169001003216L, -62326545792238735L, 5810209567031734221L, -2543215903193150719L, 8066741310405890113L, -1700763885488699715L, 331022494986758365L,
29+
6921011948518481376L, -4135401271689018905L, -3648707841443156724L, 8304743068009082509L, -6681730404693737112L, 1427756985322103926L, 7726889622988885916L, 4123575358133211499L,
30+
4537462330215573723L, 9078573934276235401L, 32187183317483562L, -1841847540329070596L, -8420216857639877248L, -8421265231581213825L, -8233517952154774510L, -4678911007264536715L,
31+
-8526674353687284449L, -27365118851637401L, -254145228777582712L, 2965855027055207977L, -3466341725845433998L, 7006973965168506949L, -3585814173337365788L, 7264252236018528601L,
32+
4058857911179366207L, 561654263008010300L, 2389635521107751132L, 7314182055688934933L, 5884448457819665732L, -7686492008813074402L, 298658331691777464L, -5830719925234073017L,
33+
-6985871982812486035L, -4355730107235544811L, -6914420638144647786L, 7092124037956934799L, 5352744066168866120L, 4081227363605418964L, 2175125725804301191L, -5792740580295507772L,
34+
-6183692349471335223L, -1221949547344177675L, -8340921677695714065L, 6519388252075884491L, -4726807568999917298L, 2930512993631049657L, -7721504975700326069L, -8479276039617916927L,
35+
-2112370952694584366L, -9059529185598491289L, -6189590607337131826L, -5949793064086556159L, 1557391959671056410L, 4107630139293131578L, 4738411557430294180L, -3606951019798437215L,
36+
-1742301458061239008L, -7389522306890543715L, 3726370125210336256L, -2051912870295294004L, -7639673055712206584L, -2767802468218389090L, 3131241789318669061L, -8316329307438505860L,
37+
-4007166641668927959L, -6102930542977036947L, 7088919565484666773L, -3593550123383986925L, 6613817918373076399L, -7596314495989542882L, -5059595045899697395L, -547306193171270722L,
38+
8660029473572898552L, -7731225535097214079L, 2058313776967259523L, 2964665398310080884L, 6291785408569188246L, -329774438524923459L, -5664134174314856593L, -5756681006397171776L, 6223635625117218437L}, 8},
39+
{MPHF, 5400005265475528641L, new long[]{1773227589100607582L, 1401008621823229258L, 901259869510331588L, 1197333276475942193L, 1651119322544330030L, 986112488938952069L,
40+
1675726966169519337L, 1888976485651830901L, 1912475806632315628L, 74149177065144196L, 942187212974983392L, 4215890488646823727L, 3694125823111201993L, 3793738020275325587L,
41+
2995933316126352930L, 4017238031310632606L, 3798301062142417109L, 4113831042388378630L, 2707645218409175553L, 3919094501360474098L, 4252303149040498185L, 4199952774063362014L,
42+
3327107703856825600L, 3964961892107416731L, 3966935050689896802L, 5921581983460164542L, 5314808407468600915L, 4696106051339789101L, 6634550099558541650L, 6382215924765560390L,
43+
5154426188333895839L, 6466726512887879802L, 4836037707257613543L, 5608288809216362089L, 6793579614382201757L, 6709676086154795823L, 5972763369063718749L, 4765003610184494484L,
44+
5635899990946803784L, 5349364953307177057L, 6264947502670452080L, 6912802837350428240L, 5429101923532929753L, 5668285853203792528L, 6563481559119688471L, 6317103420640399795L,
45+
8937635149702679081L, 8062485652179232600L, 8942552659025336850L, 8508924203915110088L, 8938353353354172574L, 7907183519152868142L, 8654059200278009367L, 9151769575477085925L,
46+
8494748655862745947L, 8180511740959930009L, 8244780136171765059L, 9165671267726030534L, 8022333815153416350L, -7348602598025993307L, -7137527130402610919L, -8864995500791741494L,
47+
-7906426467332813681L, -7343692788430814188L, -9007903685362026026L, -9178084101442809748L, -7526812997805935236L, -7640655228186765204L, -6001026700792546473L, -6870431948453764034L,
48+
-5271447769651360857L, -5591560689279781023L, -5868299437269234751L, -6226415928272647338L, -5431159857161381398L, -6370987534222793305L, -3043487285958836631L, -4301361355076290527L,
49+
-3682760495848399784L, -3038236626480548566L, -3895662199162059335L, -3192071612777396897L, -2729235696166508115L, -3087500698602513665L, -4156274151845244416L, -3309406490623888358L,
50+
-2528282539021436624L, -1633985981412420612L, -360913997783076114L, -111396594598251164L, -1339842643116805785L, -1403112313973786426L, -856792793066744400L, -392622225906607155L,
51+
-863763710126232180L, -400874713595065720L, -373641626604004087L, -1951676159570020905L, -1774490078013273270L, -468961924964997308L, -1210600430103212706L, -384877607682781339L, -1945436007627906978L}, 8},
52+
{COUNTING_BLOOM, 6360526788365209414L, new long[]{-4535795219140351433L, 4882771549875911188L, -6502814355560814028L}, 16},
53+
{GCS2, -2130647756636796307L, new long[]{1, 2, 3}, 8}
54+
};
55+
}
56+
57+
private final FilterType type;
58+
private final long seed;
59+
private final long[] keys;
60+
private final int bitsPerKey;
61+
62+
public RegressionTests(FilterType type, long seed, long[] keys, int bitsPerKey) {
63+
this.type = type;
64+
this.seed = seed;
65+
this.keys = keys;
66+
this.bitsPerKey = bitsPerKey;
67+
}
68+
69+
@Test
70+
public void regressionTest() {
71+
Hash.setSeed(seed);
72+
Filter filter = type.construct(keys, bitsPerKey);
73+
for (long key : keys) {
74+
assertTrue(filter.mayContain(key));
75+
}
76+
}
77+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package org.fastfilter;
2+
3+
import org.fastfilter.utils.Hash;
4+
5+
import java.util.Arrays;
6+
import java.util.EnumSet;
7+
import java.util.concurrent.ThreadLocalRandom;
8+
import java.util.stream.LongStream;
9+
10+
import static junit.framework.TestCase.assertTrue;
11+
import static org.fastfilter.FilterType.*;
12+
13+
public class SimpleFuzzer {
14+
15+
public static void main(String... args) {
16+
long seed = 0;
17+
for (int bitsPerKey = 8; bitsPerKey < 32; bitsPerKey += 8) {
18+
for (int keyLength = 3; keyLength < 1_000_000; keyLength += ThreadLocalRandom.current().nextInt(10000)) {
19+
long[] keys = LongStream.range(0, keyLength).map(i -> ThreadLocalRandom.current().nextLong()).toArray();
20+
for (FilterType type : FilterType.values()) {
21+
try {
22+
for (int i = 0; i < 1_000_000; ++i) {
23+
seed = ThreadLocalRandom.current().nextLong();
24+
Hash.setSeed(seed);
25+
Filter filter = type.construct(keys, bitsPerKey);
26+
for (long key : keys) {
27+
assertTrue(seed + "/" + type + "/" + Arrays.toString(keys), filter.mayContain(key));
28+
}
29+
}
30+
} catch (Exception e) {
31+
System.out.println(seed + "/" + type + "/" + Arrays.toString(keys));
32+
throw e;
33+
}
34+
}
35+
}
36+
}
37+
}
38+
}

0 commit comments

Comments
 (0)