Skip to content

Commit 2f5910f

Browse files
committed
Decent.
1 parent dd4637e commit 2f5910f

File tree

4 files changed

+142
-4
lines changed

4 files changed

+142
-4
lines changed

.github/workflows/ubuntu.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: Ubuntu 22.04 Sanitized CI (GCC 11)
2+
3+
on:
4+
push:
5+
branches:
6+
- master
7+
pull_request:
8+
branches:
9+
- master
10+
11+
jobs:
12+
ubuntu-build:
13+
if: >-
14+
! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
15+
! contains(toJSON(github.event.commits.*.message), '[skip github]')
16+
runs-on: ubuntu-22.04
17+
strategy:
18+
matrix:
19+
include:
20+
- {shared: ON}
21+
- {shared: OFF}
22+
steps:
23+
- uses: actions/checkout@v3
24+
- name: Use make
25+
run: |
26+
make &&
27+
./build_filter -o filter.bin sample.txt &&
28+
./query_filter filter.bin AF8978B1797B72AC

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ Though the filter can use little memory (less than a GB), it seems unwarranted t
5757

5858
There are two executables:
5959

60-
- `build_filter` is the expensive program that parses the large text files containing password hashes.
60+
- `build_filter` is the expensive program that parses the large text files containing password hashes. If you want to check the filter at construction time, you can use the `-V` flag. You may also specify the filter type with the `-f` flag: `-f binaryfuse8`, `-f binaryfuse16`, `-f xor8`, `-f bloom12`.
6161
- `query_filter` is a simple program that takes a 64-bit hash in hexadecimal for (the first 16 hexadecimal characters from the SHA1 hash) and checks whether the hash is contained in our set.
6262

6363
Do
@@ -89,6 +89,7 @@ Expected number of queries per second: 17241.379
8989
```
9090
9191
92+
9293
## Performance comparisons
9394
9495
For a comparable false positive probability (about 0.3%), the Bloom filter uses more space

src/build_filter.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,78 @@ int main(int argc, char **argv) {
236236
return EXIT_FAILURE;
237237
}
238238
binary_fuse8_free(&filter);
239+
} else if (strcmp("binaryfuse16", filtername) == 0) {
240+
start = clock();
241+
binary_fuse16_t filter;
242+
if(!binary_fuse16_allocate(array_size, &filter)) {
243+
printf("failed to allocate memory.\n");
244+
return EXIT_FAILURE;
245+
}
246+
if(!binary_fuse16_populate(array, array_size, &filter)){
247+
printf("failed to build the filter, do you have sufficient memory?\n");
248+
return EXIT_FAILURE;
249+
}
250+
end = clock();
251+
printf("Done in %.3f seconds.\n", (float)(end - start) / CLOCKS_PER_SEC);
252+
if (verify) {
253+
printf("Checking for false negatives\n");
254+
for (size_t i = 0; i < array_size; i++) {
255+
if (!binary_fuse16_contain(array[i], &filter)) {
256+
printf("Detected a false negative. You probably have a bug. "
257+
"Aborting.\n");
258+
return EXIT_FAILURE;
259+
}
260+
}
261+
printf("Verified with success: no false negatives\n");
262+
size_t matches = 0;
263+
size_t volume = 100000;
264+
for (size_t t = 0; t < volume; t++) {
265+
if (binary_fuse16_contain(t * 10001 + 13 + array_size, &filter)) {
266+
matches++;
267+
}
268+
}
269+
printf("estimated false positive rate: %.3f percent\n",
270+
matches * 100.0 / volume);
271+
}
272+
free(array);
273+
274+
FILE *write_ptr;
275+
write_ptr = fopen(outputfilename, "wb");
276+
if (write_ptr == NULL) {
277+
printf("Cannot write to the output file %s.", outputfilename);
278+
return EXIT_FAILURE;
279+
}
280+
uint64_t cookie = 1234570;
281+
bool isok = true;
282+
size_t total_bytes =
283+
sizeof(cookie) + sizeof(filter.Seed) + sizeof(filter.SegmentLength) +
284+
sizeof(filter.SegmentLengthMask) + sizeof(filter.SegmentCount) +
285+
sizeof(filter.SegmentCountLength) + sizeof(filter.ArrayLength) +
286+
sizeof(uint16_t) * filter.ArrayLength;
287+
288+
isok &= fwrite(&cookie, sizeof(cookie), 1, write_ptr);
289+
isok &= fwrite(&filter.Seed, sizeof(filter.Seed), 1, write_ptr);
290+
isok &= fwrite(&filter.SegmentLength, sizeof(filter.SegmentLength), 1,
291+
write_ptr);
292+
isok &= fwrite(&filter.SegmentLengthMask, sizeof(filter.SegmentLengthMask),
293+
1, write_ptr);
294+
isok &=
295+
fwrite(&filter.SegmentCount, sizeof(filter.SegmentCount), 1, write_ptr);
296+
isok &= fwrite(&filter.SegmentCountLength,
297+
sizeof(filter.SegmentCountLength), 1, write_ptr);
298+
isok &=
299+
fwrite(&filter.ArrayLength, sizeof(filter.ArrayLength), 1, write_ptr);
300+
isok &= fwrite(filter.Fingerprints, sizeof(uint16_t) * filter.ArrayLength, 1,
301+
write_ptr);
302+
isok &= (fclose(write_ptr) == 0);
303+
if (isok) {
304+
printf("filter data saved to %s. Total bytes = %zu. \n", outputfilename,
305+
total_bytes);
306+
} else {
307+
printf("failed to write filter data to %s.\n", outputfilename);
308+
return EXIT_FAILURE;
309+
}
310+
binary_fuse16_free(&filter);
239311
} else if (strcmp("xor8", filtername) == 0) {
240312
start = clock();
241313
xor8_t filter;

src/query_filter.cpp

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,14 +88,17 @@ int main(int argc, char **argv) {
8888
return EXIT_FAILURE;
8989
}
9090
bool xor8 = false;
91+
bool bin16 = false;
9192
bool bloom12 = false;
9293
if (fread(&cookie, sizeof(cookie), 1, fp) != 1)
9394
printf("failed read.\n");
9495
if (fread(&seed, sizeof(seed), 1, fp) != 1)
9596
printf("failed read.\n");
9697

9798
if (cookie != 1234569) {
98-
if (cookie == 1234567) {
99+
if(cookie == 1234570) {
100+
bin16 = true;
101+
} else if (cookie == 1234567) {
99102
xor8 = true;
100103
if (fread(&BlockLength, sizeof(BlockLength), 1, fp) != 1)
101104
printf("failed read.\n");
@@ -106,17 +109,38 @@ int main(int argc, char **argv) {
106109
printf("failed read.\n");
107110

108111
} else {
109-
printf("Not a filter file.\n");
112+
printf("Not a filter file. Cookie found: %llu.\n", cookie);
110113
return EXIT_FAILURE;
111114
}
112115
}
113116
size_t length = 0;
114117
binary_fuse8_t binfilter;
118+
binary_fuse16_t binfilter16;
115119

116120
if (bloom12) {
117121
length = BlockLength * sizeof(uint64_t) + 3 * sizeof(uint64_t);
118122
} else if (xor8) {
119123
length = 3 * BlockLength * sizeof(uint8_t) + 3 * sizeof(uint64_t);
124+
} else if(bin16) {
125+
bool isok = true;
126+
binfilter16.Seed = seed;
127+
isok &=
128+
fread(&binfilter16.SegmentLength, sizeof(binfilter16.SegmentLength), 1, fp);
129+
isok &=
130+
fread(&binfilter16.SegmentLengthMask, sizeof(binfilter16.SegmentLengthMask), 1, fp);
131+
isok &=
132+
fread(&binfilter16.SegmentCount, sizeof(binfilter16.SegmentCount), 1, fp);
133+
isok &= fread(&binfilter16.SegmentCountLength,
134+
sizeof(binfilter16.SegmentCountLength), 1, fp);
135+
isok &= fread(&binfilter16.ArrayLength, sizeof(binfilter16.ArrayLength), 1, fp);
136+
if (!isok)
137+
printf("failed read.\n");
138+
length =
139+
sizeof(cookie) + sizeof(binfilter16.Seed) +
140+
sizeof(binfilter16.SegmentLength) + sizeof(binfilter16.SegmentLengthMask) +
141+
sizeof(binfilter16.SegmentCount) + sizeof(binfilter16.SegmentCountLength) +
142+
sizeof(binfilter16.ArrayLength) + sizeof(uint16_t) * binfilter16.ArrayLength;
143+
120144
} else {
121145
bool isok = true;
122146
binfilter.Seed = seed;
@@ -142,8 +166,10 @@ int main(int argc, char **argv) {
142166
printf("Bloom filter detected.\n");
143167
else if (xor8)
144168
printf("Xor filter detected.\n");
169+
else if(bin16)
170+
printf("16-bit binary fuse filter detected.\n");
145171
else
146-
printf("Binary fuse filter detected.\n");
172+
printf("8-bit binary fuse filter detected.\n");
147173
fclose(fp);
148174
int fd = open(filename, O_RDONLY);
149175
bool shared = false;
@@ -183,6 +209,17 @@ int main(int argc, char **argv) {
183209
} else {
184210
printf("Surely not in the set.\n");
185211
}
212+
} else if (bin16) {
213+
binfilter16.Fingerprints = reinterpret_cast<uint16_t*>(
214+
addr + sizeof(cookie) + sizeof(binfilter16.Seed) +
215+
sizeof(binfilter16.SegmentLength) + sizeof(binfilter16.SegmentLengthMask) +
216+
sizeof(binfilter16.SegmentCount) + sizeof(binfilter16.SegmentCountLength) +
217+
sizeof(binfilter16.ArrayLength));
218+
if (binary_fuse16_contain(hexval, &binfilter16)) {
219+
printf("Probably in the set.\n");
220+
} else {
221+
printf("Surely not in the set.\n");
222+
}
186223
} else {
187224
binfilter.Fingerprints =
188225
addr + sizeof(cookie) + sizeof(binfilter.Seed) +

0 commit comments

Comments
 (0)