Skip to content

Commit 8adeb0f

Browse files
committed
Visual peeling
1 parent 399fc9f commit 8adeb0f

File tree

1 file changed

+233
-0
lines changed

1 file changed

+233
-0
lines changed
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
package org.fastfilter.xor;
2+
3+
import java.util.Locale;
4+
5+
import org.fastfilter.utils.Hash;
6+
import org.fastfilter.utils.RandomGenerator;
7+
8+
/**
9+
* Visualize the peeling / burning of fuse filters.
10+
*
11+
* See also "Peeling Close to the Orientability Threshold - Spatial Coupling in
12+
* Hashing-Based Data Structures"
13+
*
14+
*/
15+
public class VisualPeeling {
16+
17+
// true for regular xor filter, false for everything else
18+
private static final boolean BPZ = false;
19+
20+
// true for fuse filter, false for coupled
21+
private static final boolean FUSE = true;
22+
23+
// only for coupled: true for mirrored (one fuse), two for regular (two fuses)
24+
private static final boolean MIRROR = true;
25+
26+
private static final int HASHES = 3;
27+
28+
static void testOneMillion() {
29+
int size = 1_000_000;
30+
int segmentLengthBits = 13;
31+
int segmentLength = 1 << segmentLengthBits;
32+
System.out.println("cf2 segmentLength " + segmentLength + " ");
33+
double min = 0.87, max = 0.888, step = 0.001;
34+
if (FUSE) {
35+
min = 0.885;
36+
max = 0.9;
37+
step = 0.001;
38+
}
39+
if (BPZ) {
40+
min = 0.81;
41+
max = 0.82;
42+
step = 0.002;
43+
}
44+
for (double load = min; load <= max; load += step) {
45+
Data d = getProbability(size, segmentLengthBits, load, null);
46+
System.out.println(d);
47+
}
48+
}
49+
50+
public static void main(String... args) {
51+
testOneMillion();
52+
}
53+
54+
static Data getProbability(int size, int segmentLengthBits, double load, Data best) {
55+
int segmentLength = 1 << segmentLengthBits;
56+
int arrayLength = (int) (size / load);
57+
if (arrayLength <= 0) {
58+
return null;
59+
}
60+
int segmentCount = arrayLength - 1 * segmentLength;
61+
if (FUSE) {
62+
segmentCount = (arrayLength - 2 * segmentLength) / segmentLength;
63+
}
64+
if (BPZ) {
65+
segmentCount = 3;
66+
segmentLength = (arrayLength - 2) / 3;
67+
}
68+
if (segmentCount <= 0) {
69+
return null;
70+
}
71+
Data d = new Data();
72+
d.size = size;
73+
d.load = load;
74+
d.segmentLength = segmentLength;
75+
d.bitsPerKey = (double) arrayLength * 8 / size;
76+
if (best != null && d.bitsPerKey > best.bitsPerKey) {
77+
return null;
78+
}
79+
// System.out.println(" test " + d);
80+
int successCount = 0;
81+
int testCount = Math.max(5, 10_000_000 / size);
82+
for(int seed = 0; seed < testCount; seed++) {
83+
long[] keys = new long[size];
84+
RandomGenerator.createRandomUniqueListFast(keys, seed);
85+
int[] success = testMapping(keys, segmentLengthBits, segmentCount, arrayLength, seed);
86+
if (success != null) {
87+
d.data = success;
88+
successCount++;
89+
}
90+
}
91+
double p = 1.0 * successCount / testCount;
92+
d.p = p;
93+
return d;
94+
}
95+
96+
public static int[] testMapping(long[] keys, int segmentLengthBits, int segmentCount, int arrayLength, long seed) {
97+
int segmentLength = 1 << segmentLengthBits;
98+
if (BPZ) {
99+
segmentLength = (arrayLength - 2) / 3;
100+
}
101+
int size = keys.length;
102+
int m = arrayLength;
103+
seed = Hash.randomSeed();
104+
byte[] t2count = new byte[m];
105+
long[] t2 = new long[m];
106+
for (long k : keys) {
107+
for (int hi = 0; hi < HASHES; hi++) {
108+
int h = getHash(segmentLengthBits, segmentLength, segmentCount, k, seed, hi);
109+
t2[h] ^= k;
110+
if (t2count[h] > 120) {
111+
// probably something wrong with the hash function
112+
throw new IllegalArgumentException();
113+
}
114+
t2count[h]++;
115+
}
116+
}
117+
int count = 0;
118+
int[] alone = new int[arrayLength];
119+
int[] alone2 = new int[arrayLength];
120+
int alonePos = 0;
121+
for (int i = 0; i < arrayLength; i++) {
122+
if (t2count[i] == 1) {
123+
alone[alonePos++] = i;
124+
}
125+
}
126+
System.out.println();
127+
int levels = 0;
128+
while (count < size) {
129+
int mod = BPZ ? 3 : 200;
130+
if (levels % mod == 0) {
131+
int[] nonZeroCount = new int[40];
132+
for (int i = 0; i < t2count.length; i++) {
133+
if (t2count[i] > 0) {
134+
nonZeroCount[i * 40 / t2count.length]++;
135+
}
136+
}
137+
int max = t2count.length / 40;
138+
for (int i = 0; i < 40; i++) {
139+
System.out.print(nonZeroCount[i] == 0 ? 0 : (1 + nonZeroCount[i] * 8 / max));
140+
}
141+
System.out.println();
142+
}
143+
levels++;
144+
if (alonePos == 0) {
145+
System.out.println("FAIL: levels=" + levels);
146+
return null;
147+
}
148+
int alonePos2 = 0;
149+
while (alonePos > 0) {
150+
int i = alone[--alonePos];
151+
if (t2count[i] <= 0) {
152+
continue;
153+
}
154+
if (t2count[i] != 1) {
155+
throw new AssertionError();
156+
}
157+
--t2count[i];
158+
count++;
159+
long k = t2[i];
160+
for (int hi = 0; hi < HASHES; hi++) {
161+
int h = getHash(segmentLengthBits, segmentLength, segmentCount, k, seed, hi);
162+
int newCount = --t2count[h];
163+
if (h == i) {
164+
// ignore
165+
} else {
166+
if (newCount == 1) {
167+
alone2[alonePos2++] = h;
168+
}
169+
t2[h] ^= k;
170+
}
171+
}
172+
}
173+
System.arraycopy(alone2, 0, alone, 0, alonePos2);
174+
alonePos = alonePos2;
175+
176+
}
177+
System.out.println("SUCCESS: levels=" + levels);
178+
return new int[0];
179+
}
180+
181+
private static int getHash(int segmentLengthBits, int segmentLength, int segmentCount, long key, long seed, int index) {
182+
if (BPZ) {
183+
long hash = Hash.hash64(key, seed + index);
184+
return index * segmentLength + Hash.reduce((int) hash, segmentLength);
185+
}
186+
if (FUSE) {
187+
long hash = Hash.hash64(key, seed);
188+
int seg = Hash.reduce((int) hash, segmentCount);
189+
long hh = (hash ^ (hash >>> 32));
190+
int h0 = (seg + 0) * segmentLength + (int) ((hh >> (0 * segmentLengthBits)) & (segmentLength - 1));
191+
int h1 = (seg + 1) * segmentLength + (int) ((hh >> (1 * segmentLengthBits)) & (segmentLength - 1));
192+
int h2 = (seg + 2) * segmentLength + (int) ((hh >> (2 * segmentLengthBits)) & (segmentLength - 1));
193+
return index == 0 ? h0 : index == 1 ? h1 : h2;
194+
}
195+
if (MIRROR) {
196+
long hash = Hash.hash64(key, seed);
197+
int r0 = (int) Hash.hash64(hash, 1);
198+
int x = Hash.reduce(r0, segmentCount * 2 + segmentLength - 1);
199+
int h0 = x + (int) (Hash.hash64(hash, 2) & (segmentLength - 1));
200+
int h1 = x + (int) (Hash.hash64(hash, 3) & (segmentLength - 1));
201+
int h2 = x + (int) (Hash.hash64(hash, 4) & (segmentLength - 1));
202+
h0 = Math.abs(h0 - segmentCount - segmentLength + 1);
203+
h1 = Math.abs(h1 - segmentCount - segmentLength + 1);
204+
h2 = Math.abs(h2 - segmentCount - segmentLength + 1);
205+
return index == 0 ? h0 : index == 1 ? h1 : h2;
206+
} else {
207+
long hash = Hash.hash64(key, seed);
208+
int r0 = (int) Hash.hash64(hash, 1);
209+
int x = Hash.reduce(r0, segmentCount);
210+
int h0 = x + (int) (Hash.hash64(hash, 2) & (segmentLength - 1));
211+
int h1 = x + (int) (Hash.hash64(hash, 3) & (segmentLength - 1));
212+
int h2 = x + (int) (Hash.hash64(hash, 4) & (segmentLength - 1));
213+
return index == 0 ? h0 : index == 1 ? h1 : h2;
214+
}
215+
}
216+
217+
static class Data {
218+
int size;
219+
double load;
220+
int segmentLength;
221+
double bitsPerKey;
222+
double p;
223+
int[] data;
224+
225+
public String toString() {
226+
return String.format(Locale.ENGLISH, "size %d load %.3f " +
227+
"segmentLength %d bits/key %.1f p %.2f"
228+
, size, load, segmentLength, bitsPerKey, p);
229+
}
230+
231+
}
232+
233+
}

0 commit comments

Comments
 (0)