@@ -24,60 +24,26 @@ static void printusage(char *command) {
2424 printf (" The -V flag verifies the resulting filter.\n " );
2525}
2626
27- int main (int argc, char **argv) {
28- int c;
29- size_t maxline =
30- 1000 * 1000 * 1000 ; // one billion lines ought to be more than enough?
31- const char *filtername = " xor8" ;
32- bool printall = false ;
33- bool verify = false ;
34- const char *outputfilename = " filter.bin" ;
35- while ((c = getopt (argc, argv, " af:ho:m:V" )) != -1 )
36- switch (c) {
37- case ' f' :
38- filtername = optarg;
39- break ;
40- case ' o' :
41- outputfilename = optarg;
42- break ;
43- case ' V' :
44- verify = true ;
45- break ;
46- case ' m' :
47- maxline = atoll (optarg);
48- printf (" setting the max. number of entries to %zu \n " , maxline);
49- break ;
50- case ' a' :
51- printall = true ;
52- break ;
53- case ' h' :
54- default :
55- printusage (argv[0 ]);
56- return 0 ;
57- }
58- if (optind >= argc) {
59- printusage (argv[0 ]);
60- return -1 ;
61- }
62- const char *filename = argv[optind];
6327
28+
29+ uint64_t * read_data (const char *filename, size_t & array_size, size_t maxline, bool printall) {
6430 char *line = NULL ;
6531 size_t line_capacity = 0 ;
6632 int read;
6733
6834 size_t array_capacity = 600 * 1024 * 1024 ;
6935 uint64_t *array = (uint64_t *)malloc (array_capacity * sizeof (uint64_t ));
7036 if (array == NULL ) {
71- printf (" Cannot allocate 5GB . Use a machine with plenty of RAM." );
72- return EXIT_FAILURE ;
37+ printf (" Cannot allocate memory . Use a machine with plenty of RAM." );
38+ return nullptr ;
7339 }
74- size_t array_size = 0 ;
40+ array_size = 0 ;
7541
7642 FILE *fp = fopen (filename, " r" );
7743 if (fp == NULL ) {
7844 printf (" Cannot read the input file %s." , filename);
7945 free (array);
80- return EXIT_FAILURE ;
46+ return nullptr ;
8147 }
8248 clock_t start = clock ();
8349
@@ -104,7 +70,7 @@ int main(int argc, char **argv) {
10470 uint64_t *newarray = (uint64_t *)realloc (array, array_capacity);
10571 if (newarray == NULL ) {
10672 printf (" Reallocation failed. Aborting.\n " );
107- return EXIT_FAILURE ;
73+ return nullptr ;
10874 }
10975 array = newarray;
11076 }
@@ -129,6 +95,69 @@ int main(int argc, char **argv) {
12995 printf (" \r I read %zu hashes in total (%.3f seconds).\n " , array_size,
13096 (float )(end - start) / CLOCKS_PER_SEC);
13197 printf (" Bytes read = %zu.\n " , numberofbytes);
98+ return array;
99+ }
100+
101+ int main (int argc, char **argv) {
102+ int c;
103+ size_t maxline =
104+ 1000 * 1000 * 1000 ; // one billion lines ought to be more than enough?
105+ const char *filtername = " xor8" ;
106+ bool printall = false ;
107+ bool verify = false ;
108+ bool synthetic = false ;
109+ size_t synthetic_size = 0 ;
110+
111+ const char *outputfilename = " filter.bin" ;
112+ while ((c = getopt (argc, argv, " af:ho:m:Vs:" )) != -1 )
113+ switch (c) {
114+ case ' f' :
115+ filtername = optarg;
116+ break ;
117+ case ' s' :
118+ synthetic = true ;
119+ synthetic_size = atoll (optarg);
120+ break ;
121+ case ' o' :
122+ outputfilename = optarg;
123+ break ;
124+ case ' V' :
125+ verify = true ;
126+ break ;
127+ case ' m' :
128+ maxline = atoll (optarg);
129+ printf (" setting the max. number of entries to %zu \n " , maxline);
130+ break ;
131+ case ' a' :
132+ printall = true ;
133+ break ;
134+ case ' h' :
135+ default :
136+ printusage (argv[0 ]);
137+ return 0 ;
138+ }
139+ if (optind >= argc) {
140+ printusage (argv[0 ]);
141+ return -1 ;
142+ }
143+ size_t array_size;
144+ uint64_t * array;
145+ if (synthetic) {
146+ array_size = synthetic_size;
147+ array = (uint64_t *)malloc (array_size * sizeof (uint64_t ));
148+ for (size_t i = 0 ; i < array_size; i++) {
149+ array[i] = i;
150+ }
151+ } else {
152+ const char *filename = argv[optind];
153+ array = read_data (filename, array_size, maxline, printall);
154+ if (array == nullptr ) {
155+ return EXIT_FAILURE;
156+ }
157+ }
158+ clock_t start, end;
159+
160+
132161 printf (" Constructing the filter...\n " );
133162 fflush (NULL );
134163 if (strcmp (" xor8" , filtername) == 0 ) {
@@ -147,6 +176,14 @@ int main(int argc, char **argv) {
147176 }
148177 }
149178 printf (" Verified with success: no false negatives\n " );
179+ size_t matches = 0 ;
180+ size_t volume = 100000 ;
181+ for (size_t t = 0 ; t < volume; t++) {
182+ if (xor8_contain ( t * 10001 + 13 + array_size,&filter)) {
183+ matches++;
184+ }
185+ }
186+ printf (" estimated false positive rate: %.3f percent\n " , matches * 100.0 / volume);
150187 }
151188 free (array);
152189
@@ -202,6 +239,14 @@ int main(int argc, char **argv) {
202239 }
203240 }
204241 printf (" Verified with success: no false negatives\n " );
242+ size_t matches = 0 ;
243+ size_t volume = 100000 ;
244+ for (size_t t = 0 ; t < volume; t++) {
245+ if (filter.Contain ( t * 10001 + 13 + array_size)) {
246+ matches++;
247+ }
248+ }
249+ printf (" estimated false positive rate: %.3f percent\n " , matches * 100.0 / volume);
205250 }
206251 free (array);
207252 FILE *write_ptr;
0 commit comments