Skip to content

Commit ea53455

Browse files
committed
Adding the ability to run tests.
1 parent 6a22f5a commit ea53455

File tree

1 file changed

+86
-41
lines changed

1 file changed

+86
-41
lines changed

src/build_filter.cpp

Lines changed: 86 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -24,60 +24,26 @@ static void printusage(char *command) {
2424
printf("The -V flag verifies the resulting filter.\n");
2525
}
2626

27-
int main(int argc, char **argv) {
28-
int c;
29-
size_t maxline =
30-
1000 * 1000 * 1000; // one billion lines ought to be more than enough?
31-
const char *filtername = "xor8";
32-
bool printall = false;
33-
bool verify = false;
34-
const char *outputfilename = "filter.bin";
35-
while ((c = getopt(argc, argv, "af:ho:m:V")) != -1)
36-
switch (c) {
37-
case 'f':
38-
filtername = optarg;
39-
break;
40-
case 'o':
41-
outputfilename = optarg;
42-
break;
43-
case 'V':
44-
verify = true;
45-
break;
46-
case 'm':
47-
maxline = atoll(optarg);
48-
printf("setting the max. number of entries to %zu \n", maxline);
49-
break;
50-
case 'a':
51-
printall = true;
52-
break;
53-
case 'h':
54-
default:
55-
printusage(argv[0]);
56-
return 0;
57-
}
58-
if (optind >= argc) {
59-
printusage(argv[0]);
60-
return -1;
61-
}
62-
const char *filename = argv[optind];
6327

28+
29+
uint64_t * read_data(const char *filename, size_t & array_size, size_t maxline, bool printall) {
6430
char *line = NULL;
6531
size_t line_capacity = 0;
6632
int read;
6733

6834
size_t array_capacity = 600 * 1024 * 1024;
6935
uint64_t *array = (uint64_t *)malloc(array_capacity * sizeof(uint64_t));
7036
if (array == NULL) {
71-
printf("Cannot allocate 5GB. Use a machine with plenty of RAM.");
72-
return EXIT_FAILURE;
37+
printf("Cannot allocate memory. Use a machine with plenty of RAM.");
38+
return nullptr;
7339
}
74-
size_t array_size = 0;
40+
array_size = 0;
7541

7642
FILE *fp = fopen(filename, "r");
7743
if (fp == NULL) {
7844
printf("Cannot read the input file %s.", filename);
7945
free(array);
80-
return EXIT_FAILURE;
46+
return nullptr;
8147
}
8248
clock_t start = clock();
8349

@@ -104,7 +70,7 @@ int main(int argc, char **argv) {
10470
uint64_t *newarray = (uint64_t *)realloc(array, array_capacity);
10571
if (newarray == NULL) {
10672
printf("Reallocation failed. Aborting.\n");
107-
return EXIT_FAILURE;
73+
return nullptr;
10874
}
10975
array = newarray;
11076
}
@@ -129,6 +95,69 @@ int main(int argc, char **argv) {
12995
printf("\rI read %zu hashes in total (%.3f seconds).\n", array_size,
13096
(float)(end - start) / CLOCKS_PER_SEC);
13197
printf("Bytes read = %zu.\n", numberofbytes);
98+
return array;
99+
}
100+
101+
int main(int argc, char **argv) {
102+
int c;
103+
size_t maxline =
104+
1000 * 1000 * 1000; // one billion lines ought to be more than enough?
105+
const char *filtername = "xor8";
106+
bool printall = false;
107+
bool verify = false;
108+
bool synthetic = false;
109+
size_t synthetic_size = 0;
110+
111+
const char *outputfilename = "filter.bin";
112+
while ((c = getopt(argc, argv, "af:ho:m:Vs:")) != -1)
113+
switch (c) {
114+
case 'f':
115+
filtername = optarg;
116+
break;
117+
case 's':
118+
synthetic = true;
119+
synthetic_size = atoll(optarg);
120+
break;
121+
case 'o':
122+
outputfilename = optarg;
123+
break;
124+
case 'V':
125+
verify = true;
126+
break;
127+
case 'm':
128+
maxline = atoll(optarg);
129+
printf("setting the max. number of entries to %zu \n", maxline);
130+
break;
131+
case 'a':
132+
printall = true;
133+
break;
134+
case 'h':
135+
default:
136+
printusage(argv[0]);
137+
return 0;
138+
}
139+
if (optind >= argc) {
140+
printusage(argv[0]);
141+
return -1;
142+
}
143+
size_t array_size;
144+
uint64_t * array;
145+
if(synthetic) {
146+
array_size = synthetic_size;
147+
array = (uint64_t *)malloc(array_size * sizeof(uint64_t));
148+
for(size_t i = 0; i < array_size; i++) {
149+
array[i] = i;
150+
}
151+
} else {
152+
const char *filename = argv[optind];
153+
array = read_data(filename, array_size, maxline, printall);
154+
if(array == nullptr) {
155+
return EXIT_FAILURE;
156+
}
157+
}
158+
clock_t start, end;
159+
160+
132161
printf("Constructing the filter...\n");
133162
fflush(NULL);
134163
if (strcmp("xor8", filtername) == 0) {
@@ -147,6 +176,14 @@ int main(int argc, char **argv) {
147176
}
148177
}
149178
printf("Verified with success: no false negatives\n");
179+
size_t matches = 0;
180+
size_t volume = 100000;
181+
for(size_t t = 0; t < volume; t++) {
182+
if(xor8_contain( t * 10001 + 13 + array_size,&filter)) {
183+
matches++;
184+
}
185+
}
186+
printf("estimated false positive rate: %.3f percent\n", matches * 100.0 / volume);
150187
}
151188
free(array);
152189

@@ -202,6 +239,14 @@ int main(int argc, char **argv) {
202239
}
203240
}
204241
printf("Verified with success: no false negatives\n");
242+
size_t matches = 0;
243+
size_t volume = 100000;
244+
for(size_t t = 0; t < volume; t++) {
245+
if(filter.Contain( t * 10001 + 13 + array_size)) {
246+
matches++;
247+
}
248+
}
249+
printf("estimated false positive rate: %.3f percent\n", matches * 100.0 / volume);
205250
}
206251
free(array);
207252
FILE *write_ptr;

0 commit comments

Comments
 (0)