@@ -43,7 +43,8 @@ int usage(int exit_code) {
4343" -s Use sparse representation for smaller cardinalities\n "
4444" -t Test mode - print cardinalities regularily\n "
4545" -y Show relative error along with cardinality estimates\n "
46- " -e Use improved cardinality estimator by Otmar Ertl, too\n " ;
46+ " -e Use improved cardinality estimator by Otmar Ertl, too\n "
47+ " -E Use exact cardinality counting (implemented w/ unordered_set, not working w/ test mode)\n " ;
4748 return exit_code;
4849 }
4950
@@ -119,6 +120,7 @@ int main(int argc, char **argv) {
119120 bool test_mode = false ;
120121 bool heule_too = true ;
121122 bool ertl_too = false ;
123+ bool exact_counting = false ;
122124 bool flajolet_too = false ;
123125 bool show_rel_error = false ;
124126 bool use_stdin = true ;
@@ -127,11 +129,12 @@ int main(int argc, char **argv) {
127129
128130 int c;
129131
130- while ((c = getopt (argc, argv, " shtep:r:yx:f " )) != -1 )
132+ while ((c = getopt (argc, argv, " shtep:r:yx:fE " )) != -1 )
131133 switch (c) {
132134 case ' s' : sparse = true ; break ;
133135 case ' t' : test_mode = true ; break ;
134136 case ' e' : ertl_too = true ; break ;
137+ case ' E' : exact_counting = true ; break ;
135138 case ' f' : flajolet_too = true ; break ;
136139 case ' y' : show_rel_error = true ; break ;
137140 case ' p' : p = stoi (optarg); break ;
@@ -157,7 +160,7 @@ int main(int argc, char **argv) {
157160 HyperLogLogPlusMinus<uint64_t > hll (p, sparse); // unique k-mer count per taxon
158161 // HyperLogLogPlusMinus<uint64_t> hll(p, sparse, wang_mixer); // unique k-mer count per taxon
159162
160- if (test_mode) {
163+ if (test_mode && ! exact_counting ) {
161164 cout << " observed\t estimate_heule" ;
162165 if (flajolet_too) {
163166 cout << " \t estimate_flajolet" ;
@@ -178,14 +181,24 @@ int main(int argc, char **argv) {
178181 cout << ' \n ' ;
179182 }
180183 uint64_t ctr = 0 ;
181-
184+ unordered_set<uint64_t > exact_counter;
185+
182186 if (use_stdin) {
183187 uint64_t nr;
184188 while (cin >> nr) {
185- add_to_hll (hll, nr, ctr, test_mode, show_rel_error, heule_too, flajolet_too, ertl_too);
189+ if (exact_counting) {
190+ exact_counter.insert (nr);
191+ } else {
192+ add_to_hll (hll, nr, ctr, test_mode, show_rel_error, heule_too, flajolet_too, ertl_too);
193+ }
194+ }
195+ if (!test_mode) {
196+ if (exact_counting) {
197+ cout << exact_counter.size () << " \n " ;
198+ } else {
199+ print_card (hll, ctr, show_rel_error, heule_too, flajolet_too, ertl_too);
200+ }
186201 }
187- if (!test_mode)
188- print_card (hll, ctr, show_rel_error, heule_too, flajolet_too, ertl_too);
189202 } else {
190203 // get random seed from random_device RNG
191204 std::random_device rd;
@@ -197,11 +210,21 @@ int main(int argc, char **argv) {
197210 for (size_t j = 0 ; j < n_redo; ++j) {
198211
199212 for (size_t i = 0 ; i < n_rand; i++) {
200- add_to_hll (hll, distr (rng), ctr, test_mode, show_rel_error, heule_too, flajolet_too, ertl_too);
213+ if (exact_counting) {
214+ exact_counter.insert (distr (rng));
215+ } else {
216+ add_to_hll (hll, distr (rng), ctr, test_mode, show_rel_error, heule_too, flajolet_too, ertl_too);
217+ }
218+ }
219+ if (!test_mode) {
220+ if (exact_counting) {
221+ cout << exact_counter.size () << " \n " ;
222+ } else {
223+ print_card (hll, ctr, show_rel_error, heule_too, flajolet_too, ertl_too);
224+ }
201225 }
202- if (!test_mode)
203- print_card (hll, ctr, show_rel_error, heule_too, flajolet_too, ertl_too);
204226 hll.reset ();
227+ exact_counter.clear ();
205228 ctr = 0 ;
206229 }
207230 }
0 commit comments