Skip to content

Commit 85ec0a5

Browse files
committed
Solving issue 2 #2
1 parent 8aa3c24 commit 85ec0a5

File tree

3 files changed

+59
-4
lines changed

3 files changed

+59
-4
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ dependencies/xor_singleheader/include/xorfilter.h:
77
git submodule update --init --recursive
88

99
query_filter: src/query_filter.cpp src/hexutil.h dependencies/xor_singleheader/include/xorfilter.h
10-
c++ -O3 -o query_filter src/query_filter.cpp -Wall -std=c++11 -Idependencies
10+
c++ -O3 -o query_filter src/query_filter.cpp -Wall -std=c++11 -Idependencies/fastfilter_cpp/src -Idependencies
1111

1212
build_filter: src/build_filter.cpp dependencies/fastfilter_cpp/src/xorfilter.h dependencies/fastfilter_cpp/src/xorfilter_plus.h src/hexutil.h dependencies/xor_singleheader/include/xorfilter.h
1313
c++ -O3 -o build_filter src/build_filter.cpp -std=c++11 -Wall -Idependencies/fastfilter_cpp/src -Idependencies

src/build_filter.cpp

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,15 @@
1313
#include "hexutil.h"
1414
#include "xorfilter.h"
1515
#include "xor_singleheader/include/xorfilter.h"
16+
#include "mappeablebloomfilter.h"
17+
1618

1719
static void printusage(char *command) {
1820
printf(" Try %s -f xor8 -o filter.bin mydatabase \n", command);
1921
;
2022
printf("The supported filters are xor8 and bloom12.\n");
23+
24+
printf("The -V flag verifies the resulting filter.\n");
2125
}
2226

2327
int main(int argc, char **argv) {
@@ -26,15 +30,19 @@ int main(int argc, char **argv) {
2630
1000 * 1000 * 1000; // one billion lines ought to be more than enough?
2731
const char *filtername = "xor8";
2832
bool printall = false;
33+
bool verify = false;
2934
const char *outputfilename = "filter.bin";
30-
while ((c = getopt(argc, argv, "af:ho:m:")) != -1)
35+
while ((c = getopt(argc, argv, "af:ho:m:V")) != -1)
3136
switch (c) {
3237
case 'f':
3338
filtername = optarg;
3439
break;
3540
case 'o':
3641
outputfilename = optarg;
3742
break;
43+
case 'V':
44+
verify = true;
45+
break;
3846
case 'm':
3947
maxline = atoll(optarg);
4048
printf("setting the max. number of entries to %zu \n", maxline);
@@ -130,6 +138,16 @@ int main(int argc, char **argv) {
130138
xor8_buffered_populate(array, array_size, &filter);
131139
end = clock();
132140
printf("Done in %.3f seconds.\n", (float)(end - start) / CLOCKS_PER_SEC);
141+
if(verify) {
142+
printf("Checking for false negatives\n");
143+
for(size_t i = 0; i < array_size; i++) {
144+
if(!xor8_contain(array[i],&filter)) {
145+
printf("Detected a false negative. You probably have a bug. Aborting.\n");
146+
return EXIT_FAILURE;
147+
}
148+
}
149+
printf("Verified with success: no false negatives\n");
150+
}
133151
free(array);
134152

135153
FILE *write_ptr;
@@ -165,6 +183,16 @@ int main(int argc, char **argv) {
165183
table.AddAll(array, 0, array_size);
166184
end = clock();
167185
printf("Done in %.3f seconds.\n", (float)(end - start) / CLOCKS_PER_SEC);
186+
if(verify) {
187+
printf("Checking for false negatives\n");
188+
for(size_t i = 0; i < array_size; i++) {
189+
if(table.Contain(array[i]) != xorfilter::Ok) {
190+
printf("Detected a false negative. You probably have a bug. Aborting.\n");
191+
return EXIT_FAILURE;
192+
}
193+
}
194+
printf("Verified with success: no false negatives\n");
195+
}
168196
free(array);
169197

170198
FILE *write_ptr;
@@ -199,6 +227,24 @@ int main(int argc, char **argv) {
199227
table.AddAll(array, 0, array_size);
200228
end = clock();
201229
printf("Done in %.3f seconds.\n", (float)(end - start) / CLOCKS_PER_SEC);
230+
if(verify) {
231+
printf("Checking for false negatives\n");
232+
for(size_t i = 0; i < array_size; i++) {
233+
if(table.Contain(array[i]) != bloomfilter::Ok) {
234+
printf("Detected a false negative. You probably have a bug. Aborting.\n");
235+
return EXIT_FAILURE;
236+
}
237+
}
238+
MappeableBloomFilter<12> filter(
239+
table.SizeInBytes() / 8, table.hasher.seed, table.data);
240+
for(size_t i = 0; i < array_size; i++) {
241+
if(!filter.Contain(array[i])) {
242+
printf("Detected a false negative. You probably have a bug. Aborting.\n");
243+
return EXIT_FAILURE;
244+
}
245+
}
246+
printf("Verified with success: no false negatives\n");
247+
}
202248
free(array);
203249
FILE *write_ptr;
204250
write_ptr = fopen(outputfilename, "wb");

src/mappeablebloomfilter.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,26 @@
33
#include <inttypes.h>
44
#include <limits.h>
55
#include <sstream>
6+
#include <cmath>
7+
8+
namespace {
9+
static inline size_t getBestK(size_t bitsPerItem) {
10+
return std::max(1, (int)round((double)bitsPerItem * log(2)));
11+
}
12+
}
13+
614
inline uint64_t getBit(uint32_t index) { return 1L << (index & 63); }
715

8-
template <int k> class MappeableBloomFilter {
16+
template <int bitsPerItem> class MappeableBloomFilter {
917
public:
1018
size_t arrayLength;
1119
const uint64_t *data;
1220
MixSplit hasher;
21+
int k;
1322

1423
explicit MappeableBloomFilter(const size_t arrayLength, const uint64_t seed,
1524
const uint64_t *fps)
16-
: arrayLength(arrayLength), data(fps), hasher(seed) {}
25+
: arrayLength(arrayLength), data(fps), hasher(seed), k(getBestK(bitsPerItem)) {}
1726

1827
// Report if the item is inserted, with false positive rate.
1928
bool Contain(const uint64_t key) const {

0 commit comments

Comments
 (0)