@@ -25,8 +25,10 @@ public class XorPlus8 implements Filter {
2525
2626 // TODO compression; now we have 9% / 11.5% / 36.5% free entries
2727
28+ // the number of hash slots which are XORed to compute the stored hash for each key
2829 private static final int HASHES = 3 ;
2930
31+ // this figure is added when computing the size of the table, i.e. it is the 32 in "1.23 * size + 32"
3032 private static final int OFFSET = 32 ;
3133
3234 // the table needs to be 1.23 times the number of keys to store
@@ -38,19 +40,19 @@ public class XorPlus8 implements Filter {
3840 // the number of keys in the filter
3941 private final int size ;
4042
41- // the table (array) length, that is size * 1.23
43+ // the table (array) length, that is size * 1.23 + 32
4244 private final int arrayLength ;
4345
44- // if the table is divided into 3 blocks (one block for each hash)
45- // this allows to better compress the filter,
46- // because the last block contains more zero entries than the first two
46+ // the table is divided into 3 (HASHES) blocks, one block for each hash. Each block holds this number of entries.
47+ // this allows to better compress the filter, because the last block contains more zero entries than the first two.
4748 private final int blockLength ;
4849
4950 private long seed ;
5051
51- // the fingerprints (internally an array of long)
52+ // the fingerprints
5253 private byte [] fingerprints ;
5354
55+ // the table (array) length, in bits
5456 private int bitCount ;
5557
5658 private Rank9 rank ;
@@ -81,7 +83,7 @@ public static XorPlus8 construct(long[] keys) {
8183 public XorPlus8 (int size , byte [] fingerprints ) {
8284 this .size = size ;
8385 this .arrayLength = getArrayLength (size );
84- bitCount = arrayLength * BITS_PER_FINGERPRINT ;
86+ this . bitCount = arrayLength * BITS_PER_FINGERPRINT ;
8587 this .blockLength = arrayLength / HASHES ;
8688 this .fingerprints = fingerprints ;
8789 }
@@ -105,9 +107,9 @@ public XorPlus8(int size, byte[] fingerprints) {
105107 */
106108 public XorPlus8 (long [] keys ) {
107109 this .size = keys .length ;
108- arrayLength = getArrayLength (size );
109- bitCount = arrayLength * BITS_PER_FINGERPRINT ;
110- blockLength = arrayLength / HASHES ;
110+ this . arrayLength = getArrayLength (size );
111+ this . bitCount = arrayLength * BITS_PER_FINGERPRINT ;
112+ this . blockLength = arrayLength / HASHES ;
111113 int m = arrayLength ;
112114
113115 // the order in which the fingerprints are inserted, where
@@ -120,8 +122,8 @@ public XorPlus8(long[] keys) {
120122 int reverseOrderPos ;
121123
122124 // == mapping step ==
123- // hashIndex is usually 0; only if we detect a cycle
124- // (which is extremely unlikely) we would have to use a larger hashIndex
125+ // we usually execute this loop just once. If we detect a cycle (which is extremely unlikely)
126+ // then we try again, with a new random seed.
125127 long seed = 0 ;
126128 do {
127129 seed = Hash .randomSeed ();
@@ -140,8 +142,9 @@ public XorPlus8(long[] keys) {
140142 int h = getHash (k , seed , hi );
141143 t2 [h ] ^= k ;
142144 if (t2count [h ] > 120 ) {
143- // probably something wrong with the hash function
144- throw new IllegalArgumentException ();
145+ // probably something wrong with the hash function; or, the keys[] array contains many copies
146+ // of the same value
147+ throw new IllegalArgumentException ("More than 120 keys hashed to the same location; indicates duplicate keys, or a bad hash function" );
145148 }
146149 t2count [h ]++;
147150 }
@@ -183,7 +186,10 @@ public XorPlus8(long[] keys) {
183186 break ;
184187 }
185188 if (t2count [i ] <= 0 ) {
186- continue ;
189+ continue ; // if a key is the sole occupant for more than one of its hashes, it will wind up
190+ // being listed in multiple slots of the "alone" table; in that case, when we come
191+ // to the second or third "alone" entry for that key, it will already have been
192+ // removed, and so t2count will be 0.
187193 }
188194 long k = t2 [i ];
189195 if (t2count [i ] != 1 ) {
@@ -251,9 +257,9 @@ public XorPlus8(long[] keys) {
251257 set .set (i );
252258 }
253259 }
254- rank = new Rank9 (set , blockLength );
260+ this . rank = new Rank9 (set , blockLength );
255261
256- fingerprints = new byte [2 * blockLength + set .cardinality ()];
262+ this . fingerprints = new byte [2 * blockLength + set .cardinality ()];
257263 if (2 * blockLength >= 0 ) {
258264 System .arraycopy (fp , 0 , fingerprints , 0 , 2 * blockLength );
259265 }
0 commit comments