Skip to content

Commit 435a4e3

Browse files
committed
Update comments, and more consistent use of 'this.' when assigning instance fields
1 parent 1f92499 commit 435a4e3

File tree

1 file changed

+22
-16
lines changed

1 file changed

+22
-16
lines changed

fastfilter/src/main/java/org/fastfilter/xorplus/XorPlus8.java

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@ public class XorPlus8 implements Filter {
2525

2626
// TODO compression; now we have 9% / 11.5% / 36.5% free entries
2727

28+
// the number of hash slots which are XORed to compute the stored hash for each key
2829
private static final int HASHES = 3;
2930

31+
// this figure is added when computing the size of the table, i.e. it is the 32 in "1.23 * size + 32"
3032
private static final int OFFSET = 32;
3133

3234
// the table needs to be 1.23 times the number of keys to store
@@ -38,19 +40,19 @@ public class XorPlus8 implements Filter {
3840
// the number of keys in the filter
3941
private final int size;
4042

41-
// the table (array) length, that is size * 1.23
43+
// the table (array) length, that is size * 1.23 + 32
4244
private final int arrayLength;
4345

44-
// if the table is divided into 3 blocks (one block for each hash)
45-
// this allows to better compress the filter,
46-
// because the last block contains more zero entries than the first two
46+
// the table is divided into 3 (HASHES) blocks, one block for each hash. Each block holds this number of entries.
47+
// this allows to better compress the filter, because the last block contains more zero entries than the first two.
4748
private final int blockLength;
4849

4950
private long seed;
5051

51-
// the fingerprints (internally an array of long)
52+
// the fingerprints
5253
private byte[] fingerprints;
5354

55+
// the table (array) length, in bits
5456
private int bitCount;
5557

5658
private Rank9 rank;
@@ -81,7 +83,7 @@ public static XorPlus8 construct(long[] keys) {
8183
public XorPlus8(int size, byte[] fingerprints) {
8284
this.size = size;
8385
this.arrayLength = getArrayLength(size);
84-
bitCount = arrayLength * BITS_PER_FINGERPRINT;
86+
this.bitCount = arrayLength * BITS_PER_FINGERPRINT;
8587
this.blockLength = arrayLength / HASHES;
8688
this.fingerprints = fingerprints;
8789
}
@@ -105,9 +107,9 @@ public XorPlus8(int size, byte[] fingerprints) {
105107
*/
106108
public XorPlus8(long[] keys) {
107109
this.size = keys.length;
108-
arrayLength = getArrayLength(size);
109-
bitCount = arrayLength * BITS_PER_FINGERPRINT;
110-
blockLength = arrayLength / HASHES;
110+
this.arrayLength = getArrayLength(size);
111+
this.bitCount = arrayLength * BITS_PER_FINGERPRINT;
112+
this.blockLength = arrayLength / HASHES;
111113
int m = arrayLength;
112114

113115
// the order in which the fingerprints are inserted, where
@@ -120,8 +122,8 @@ public XorPlus8(long[] keys) {
120122
int reverseOrderPos;
121123

122124
// == mapping step ==
123-
// hashIndex is usually 0; only if we detect a cycle
124-
// (which is extremely unlikely) we would have to use a larger hashIndex
125+
// we usually execute this loop just once. If we detect a cycle (which is extremely unlikely)
126+
// then we try again, with a new random seed.
125127
long seed = 0;
126128
do {
127129
seed = Hash.randomSeed();
@@ -140,8 +142,9 @@ public XorPlus8(long[] keys) {
140142
int h = getHash(k, seed, hi);
141143
t2[h] ^= k;
142144
if (t2count[h] > 120) {
143-
// probably something wrong with the hash function
144-
throw new IllegalArgumentException();
145+
// probably something wrong with the hash function; or, the keys[] array contains many copies
146+
// of the same value
147+
throw new IllegalArgumentException("More than 120 keys hashed to the same location; indicates duplicate keys, or a bad hash function");
145148
}
146149
t2count[h]++;
147150
}
@@ -183,7 +186,10 @@ public XorPlus8(long[] keys) {
183186
break;
184187
}
185188
if (t2count[i] <= 0) {
186-
continue;
189+
continue; // if a key is the sole occupant for more than one of its hashes, it will wind up
190+
// being listed in multiple slots of the "alone" table; in that case, when we come
191+
// to the second or third "alone" entry for that key, it will already have been
192+
// removed, and so t2count will be 0.
187193
}
188194
long k = t2[i];
189195
if (t2count[i] != 1) {
@@ -251,9 +257,9 @@ public XorPlus8(long[] keys) {
251257
set.set(i);
252258
}
253259
}
254-
rank = new Rank9(set, blockLength);
260+
this.rank = new Rank9(set, blockLength);
255261

256-
fingerprints = new byte[2 * blockLength + set.cardinality()];
262+
this.fingerprints = new byte[2 * blockLength + set.cardinality()];
257263
if (2 * blockLength >= 0) {
258264
System.arraycopy(fp, 0, fingerprints, 0, 2 * blockLength);
259265
}

0 commit comments

Comments
 (0)