Skip to content

Commit e57d3e3

Browse files
committed
Minor tweaks.
1 parent 846e10f commit e57d3e3

File tree

2 files changed

+93
-75
lines changed

2 files changed

+93
-75
lines changed

src/build_filter.cpp

Lines changed: 57 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,12 @@
1111

1212
#include "bloom/bloom.h"
1313
#include "hexutil.h"
14-
#include "xorfilter/xorfilter.h"
15-
#include "xor_singleheader/include/xorfilter.h"
1614
#include "xor_singleheader/include/binaryfusefilter.h"
15+
#include "xor_singleheader/include/xorfilter.h"
16+
#include "xorfilter/xorfilter.h"
1717

1818
#include "mappeablebloomfilter.h"
1919

20-
2120
static void printusage(char *command) {
2221
printf(" Try %s -f xor8 -o filter.bin mydatabase \n", command);
2322
;
@@ -26,9 +25,8 @@ static void printusage(char *command) {
2625
printf("The -V flag verifies the resulting filter.\n");
2726
}
2827

29-
30-
31-
uint64_t * read_data(const char *filename, size_t & array_size, size_t maxline, bool printall) {
28+
uint64_t *read_data(const char *filename, size_t &array_size, size_t maxline,
29+
bool printall) {
3230
char *line = NULL;
3331
size_t line_capacity = 0;
3432
int read;
@@ -140,11 +138,11 @@ int main(int argc, char **argv) {
140138
}
141139

142140
size_t array_size;
143-
uint64_t * array;
144-
if(synthetic) {
141+
uint64_t *array;
142+
if (synthetic) {
145143
array_size = synthetic_size;
146144
array = (uint64_t *)malloc(array_size * sizeof(uint64_t));
147-
for(size_t i = 0; i < array_size; i++) {
145+
for (size_t i = 0; i < array_size; i++) {
148146
array[i] = i;
149147
}
150148
} else {
@@ -154,13 +152,12 @@ int main(int argc, char **argv) {
154152
}
155153
const char *filename = argv[optind];
156154
array = read_data(filename, array_size, maxline, printall);
157-
if(array == nullptr) {
155+
if (array == nullptr) {
158156
return EXIT_FAILURE;
159157
}
160158
}
161159
clock_t start, end;
162160

163-
164161
printf("Constructing the filter...\n");
165162
fflush(NULL);
166163
if (strcmp("binaryfuse8", filtername) == 0) {
@@ -170,23 +167,25 @@ int main(int argc, char **argv) {
170167
binary_fuse8_populate(array, array_size, &filter);
171168
end = clock();
172169
printf("Done in %.3f seconds.\n", (float)(end - start) / CLOCKS_PER_SEC);
173-
if(verify) {
170+
if (verify) {
174171
printf("Checking for false negatives\n");
175-
for(size_t i = 0; i < array_size; i++) {
176-
if(!binary_fuse8_contain(array[i],&filter)) {
177-
printf("Detected a false negative. You probably have a bug. Aborting.\n");
172+
for (size_t i = 0; i < array_size; i++) {
173+
if (!binary_fuse8_contain(array[i], &filter)) {
174+
printf("Detected a false negative. You probably have a bug. "
175+
"Aborting.\n");
178176
return EXIT_FAILURE;
179177
}
180178
}
181179
printf("Verified with success: no false negatives\n");
182180
size_t matches = 0;
183181
size_t volume = 100000;
184-
for(size_t t = 0; t < volume; t++) {
185-
if(binary_fuse8_contain( t * 10001 + 13 + array_size,&filter)) {
182+
for (size_t t = 0; t < volume; t++) {
183+
if (binary_fuse8_contain(t * 10001 + 13 + array_size, &filter)) {
186184
matches++;
187185
}
188186
}
189-
printf("estimated false positive rate: %.3f percent\n", matches * 100.0 / volume);
187+
printf("estimated false positive rate: %.3f percent\n",
188+
matches * 100.0 / volume);
190189
}
191190
free(array);
192191

@@ -198,21 +197,24 @@ int main(int argc, char **argv) {
198197
}
199198
uint64_t cookie = 1234569;
200199
bool isok = true;
201-
size_t total_bytes = sizeof(cookie) + sizeof(filter.Seed) + sizeof(filter.SegmentLength)
202-
+ sizeof(filter.SegmentLengthMask) + sizeof(filter.SegmentCount)
203-
+ sizeof(filter.SegmentCountLength) + sizeof(filter.ArrayLength)
204-
+ sizeof(uint8_t) * filter.ArrayLength;
205-
200+
size_t total_bytes =
201+
sizeof(cookie) + sizeof(filter.Seed) + sizeof(filter.SegmentLength) +
202+
sizeof(filter.SegmentLengthMask) + sizeof(filter.SegmentCount) +
203+
sizeof(filter.SegmentCountLength) + sizeof(filter.ArrayLength) +
204+
sizeof(uint8_t) * filter.ArrayLength;
206205

207206
isok &= fwrite(&cookie, sizeof(cookie), 1, write_ptr);
208207
isok &= fwrite(&filter.Seed, sizeof(filter.Seed), 1, write_ptr);
209-
isok &= fwrite(&filter.SegmentLength, sizeof(filter.SegmentLength), 1, write_ptr);
210-
isok &= fwrite(&filter.SegmentLengthMask, sizeof(filter.SegmentLengthMask), 1, write_ptr);
211-
isok &= fwrite(&filter.SegmentCount, sizeof(filter.SegmentCount), 1, write_ptr);
212-
isok &= fwrite(&filter.SegmentCountLength, sizeof(filter.SegmentCountLength), 1, write_ptr);
213-
isok &= fwrite(&filter.ArrayLength, sizeof(filter.ArrayLength), 1, write_ptr);
214-
215-
208+
isok &= fwrite(&filter.SegmentLength, sizeof(filter.SegmentLength), 1,
209+
write_ptr);
210+
isok &= fwrite(&filter.SegmentLengthMask, sizeof(filter.SegmentLengthMask),
211+
1, write_ptr);
212+
isok &=
213+
fwrite(&filter.SegmentCount, sizeof(filter.SegmentCount), 1, write_ptr);
214+
isok &= fwrite(&filter.SegmentCountLength,
215+
sizeof(filter.SegmentCountLength), 1, write_ptr);
216+
isok &=
217+
fwrite(&filter.ArrayLength, sizeof(filter.ArrayLength), 1, write_ptr);
216218
isok &= fwrite(filter.Fingerprints, sizeof(uint8_t) * filter.ArrayLength, 1,
217219
write_ptr);
218220
isok &= (fclose(write_ptr) == 0);
@@ -231,23 +233,25 @@ int main(int argc, char **argv) {
231233
xor8_buffered_populate(array, array_size, &filter);
232234
end = clock();
233235
printf("Done in %.3f seconds.\n", (float)(end - start) / CLOCKS_PER_SEC);
234-
if(verify) {
236+
if (verify) {
235237
printf("Checking for false negatives\n");
236-
for(size_t i = 0; i < array_size; i++) {
237-
if(!xor8_contain(array[i],&filter)) {
238-
printf("Detected a false negative. You probably have a bug. Aborting.\n");
238+
for (size_t i = 0; i < array_size; i++) {
239+
if (!xor8_contain(array[i], &filter)) {
240+
printf("Detected a false negative. You probably have a bug. "
241+
"Aborting.\n");
239242
return EXIT_FAILURE;
240243
}
241244
}
242245
printf("Verified with success: no false negatives\n");
243246
size_t matches = 0;
244247
size_t volume = 100000;
245-
for(size_t t = 0; t < volume; t++) {
246-
if(xor8_contain( t * 10001 + 13 + array_size,&filter)) {
248+
for (size_t t = 0; t < volume; t++) {
249+
if (xor8_contain(t * 10001 + 13 + array_size, &filter)) {
247250
matches++;
248251
}
249252
}
250-
printf("estimated false positive rate: %.3f percent\n", matches * 100.0 / volume);
253+
printf("estimated false positive rate: %.3f percent\n",
254+
matches * 100.0 / volume);
251255
}
252256
free(array);
253257

@@ -281,36 +285,39 @@ int main(int argc, char **argv) {
281285
start = clock();
282286
using Table = bloomfilter::BloomFilter<uint64_t, 12, false, SimpleMixSplit>;
283287
Table table(array_size);
284-
for(size_t i = 0; i < array_size; i++) {
288+
for (size_t i = 0; i < array_size; i++) {
285289
table.Add(array[i]);
286290
}
287291
end = clock();
288292
printf("Done in %.3f seconds.\n", (float)(end - start) / CLOCKS_PER_SEC);
289-
if(verify) {
293+
if (verify) {
290294
printf("Checking for false negatives\n");
291-
for(size_t i = 0; i < array_size; i++) {
292-
if(table.Contain(array[i]) != bloomfilter::Ok) {
293-
printf("Detected a false negative. You probably have a bug. Aborting.\n");
295+
for (size_t i = 0; i < array_size; i++) {
296+
if (table.Contain(array[i]) != bloomfilter::Ok) {
297+
printf("Detected a false negative. You probably have a bug. "
298+
"Aborting.\n");
294299
return EXIT_FAILURE;
295300
}
296301
}
297-
MappeableBloomFilter<12> filter(
298-
table.SizeInBytes() / 8, table.hasher.seed, table.data);
299-
for(size_t i = 0; i < array_size; i++) {
300-
if(!filter.Contain(array[i])) {
301-
printf("Detected a false negative. You probably have a bug. Aborting.\n");
302+
MappeableBloomFilter<12> filter(table.SizeInBytes() / 8,
303+
table.hasher.seed, table.data);
304+
for (size_t i = 0; i < array_size; i++) {
305+
if (!filter.Contain(array[i])) {
306+
printf("Detected a false negative. You probably have a bug. "
307+
"Aborting.\n");
302308
return EXIT_FAILURE;
303309
}
304310
}
305311
printf("Verified with success: no false negatives\n");
306312
size_t matches = 0;
307313
size_t volume = 100000;
308-
for(size_t t = 0; t < volume; t++) {
309-
if(filter.Contain( t * 10001 + 13 + array_size)) {
314+
for (size_t t = 0; t < volume; t++) {
315+
if (filter.Contain(t * 10001 + 13 + array_size)) {
310316
matches++;
311317
}
312318
}
313-
printf("estimated false positive rate: %.3f percent\n", matches * 100.0 / volume);
319+
printf("estimated false positive rate: %.3f percent\n",
320+
matches * 100.0 / volume);
314321
}
315322
free(array);
316323
FILE *write_ptr;

src/query_filter.cpp

Lines changed: 36 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#include "hexutil.h"
33
#include "mappeablebloomfilter.h"
44
#include "sha.h"
5+
#include "xor_singleheader/include/binaryfusefilter.h"
6+
#include "xor_singleheader/include/xorfilter.h"
57
#include <fcntl.h>
68
#include <getopt.h>
79
#include <inttypes.h>
@@ -12,8 +14,6 @@
1214
#include <stdlib.h>
1315
#include <string.h>
1416
#include <sys/mman.h>
15-
#include "xor_singleheader/include/xorfilter.h"
16-
#include "xor_singleheader/include/binaryfusefilter.h"
1717

1818
static void printusage(char *command) {
1919
printf(" Try %s filter.bin 7C4A8D09CA3762AF \n", command);
@@ -47,7 +47,8 @@ int main(int argc, char **argv) {
4747
printf("We are going to hash your input.\n");
4848
sha1::SHA1 s;
4949
const char *tobehashed = argv[optind + 1];
50-
printf("hashing this word: %s (length in bytes = %zu)\n", tobehashed, strlen(tobehashed));
50+
printf("hashing this word: %s (length in bytes = %zu)\n", tobehashed,
51+
strlen(tobehashed));
5152
s.processBytes(tobehashed, strlen(tobehashed));
5253
uint32_t digest[5];
5354
s.getDigest(digest);
@@ -94,12 +95,16 @@ int main(int argc, char **argv) {
9495
printf("failed read.\n");
9596

9697
if (cookie != 1234569) {
97-
if(cookie == 1234567) {
98+
if (cookie == 1234567) {
9899
xor8 = true;
99-
if (fread(&BlockLength, sizeof(BlockLength), 1, fp) != 1) printf("failed read.\n");
100+
if (fread(&BlockLength, sizeof(BlockLength), 1, fp) != 1)
101+
printf("failed read.\n");
102+
100103
} else if (cookie == 1234567 + 1) {
101104
bloom12 = true;
102-
if (fread(&BlockLength, sizeof(BlockLength), 1, fp) != 1) printf("failed read.\n");
105+
if (fread(&BlockLength, sizeof(BlockLength), 1, fp) != 1)
106+
printf("failed read.\n");
107+
103108
} else {
104109
printf("Not a filter file.\n");
105110
return EXIT_FAILURE;
@@ -108,30 +113,34 @@ int main(int argc, char **argv) {
108113
size_t length = 0;
109114
binary_fuse8_t binfilter;
110115

111-
if(bloom12) {
116+
if (bloom12) {
112117
length = BlockLength * sizeof(uint64_t) + 3 * sizeof(uint64_t);
113-
} else if(xor8) {
114-
length = 3 * BlockLength * sizeof(uint8_t) + 3 * sizeof(uint64_t);
118+
} else if (xor8) {
119+
length = 3 * BlockLength * sizeof(uint8_t) + 3 * sizeof(uint64_t);
115120
} else {
116121
bool isok = true;
117-
isok &= fread(&binfilter.Seed, sizeof(binfilter.Seed), 1, fp);
118-
isok &= fread(&binfilter.SegmentLength, sizeof(binfilter.SegmentLength), 1, fp);
119-
isok &= fread(&binfilter.SegmentLengthMask, sizeof(binfilter.SegmentLengthMask), 1, fp);
120-
isok &= fread(&binfilter.SegmentCount, sizeof(binfilter.SegmentCount), 1, fp);
121-
isok &= fread(&binfilter.SegmentCountLength, sizeof(binfilter.SegmentCountLength), 1, fp);
122+
binfilter.Seed = seed;
123+
isok &=
124+
fread(&binfilter.SegmentLength, sizeof(binfilter.SegmentLength), 1, fp);
125+
isok &=
126+
fread(&binfilter.SegmentCount, sizeof(binfilter.SegmentCount), 1, fp);
127+
isok &= fread(&binfilter.SegmentCountLength,
128+
sizeof(binfilter.SegmentCountLength), 1, fp);
122129
isok &= fread(&binfilter.ArrayLength, sizeof(binfilter.ArrayLength), 1, fp);
123-
if (!isok) printf("failed read.\n");
124-
length = sizeof(cookie) + sizeof(binfilter.Seed) + sizeof(binfilter.SegmentLength)
125-
+ sizeof(binfilter.SegmentLengthMask) + sizeof(binfilter.SegmentCount)
126-
+ sizeof(binfilter.SegmentCountLength) + sizeof(binfilter.ArrayLength)
127-
+ sizeof(uint8_t) * binfilter.ArrayLength;
130+
if (!isok)
131+
printf("failed read.\n");
132+
length =
133+
sizeof(cookie) + sizeof(binfilter.Seed) +
134+
sizeof(binfilter.SegmentLength) + sizeof(binfilter.SegmentLengthMask) +
135+
sizeof(binfilter.SegmentCount) + sizeof(binfilter.SegmentCountLength) +
136+
sizeof(binfilter.ArrayLength) + sizeof(uint8_t) * binfilter.ArrayLength;
128137
}
129138

130139
if (bloom12)
131140
printf("Bloom filter detected.\n");
132141
else if (xor8)
133142
printf("Xor filter detected.\n");
134-
else
143+
else
135144
printf("Binary fuse filter detected.\n");
136145
fclose(fp);
137146
int fd = open(filename, O_RDONLY);
@@ -162,7 +171,7 @@ int main(int argc, char **argv) {
162171
} else {
163172
printf("Surely not in the set.\n");
164173
}
165-
} else if(xor8) {
174+
} else if (xor8) {
166175
xor8_t filter;
167176
filter.seed = seed;
168177
filter.blockLength = BlockLength;
@@ -173,14 +182,16 @@ int main(int argc, char **argv) {
173182
printf("Surely not in the set.\n");
174183
}
175184
} else {
176-
binfilter.Fingerprints = addr + sizeof(cookie) + sizeof(binfilter.Seed) + sizeof(binfilter.SegmentLength)
177-
+ sizeof(binfilter.SegmentLengthMask) + sizeof(binfilter.SegmentCount)
178-
+ sizeof(binfilter.SegmentCountLength) + sizeof(binfilter.ArrayLength);
185+
binfilter.Fingerprints =
186+
addr + sizeof(cookie) + sizeof(binfilter.Seed) +
187+
sizeof(binfilter.SegmentLength) + sizeof(binfilter.SegmentLengthMask) +
188+
sizeof(binfilter.SegmentCount) + sizeof(binfilter.SegmentCountLength) +
189+
sizeof(binfilter.ArrayLength);
179190
if (binary_fuse8_contain(hexval, &binfilter)) {
180191
printf("Probably in the set.\n");
181192
} else {
182193
printf("Surely not in the set.\n");
183-
}
194+
}
184195
}
185196
clock_t end = clock();
186197

0 commit comments

Comments
 (0)