Skip to content

Commit 8dc7ea9

Browse files
committed
Add exact counting to count_unique
1 parent 42ff23e commit 8dc7ea9

File tree

2 files changed

+34
-11
lines changed

2 files changed

+34
-11
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.4.9
1+
0.4.10

src/count_unique.cpp

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ int usage(int exit_code) {
4343
" -s Use sparse representation for smaller cardinalities\n"
4444
" -t Test mode - print cardinalities regularily\n"
4545
" -y Show relative error along with cardinality estimates\n"
46-
" -e Use improved cardinality estimator by Otmar Ertl, too\n";
46+
" -e Use improved cardinality estimator by Otmar Ertl, too\n"
47+
" -E Use exact cardinality counting (implemented w/ unordered_set, not working w/ test mode)\n ";
4748
return exit_code;
4849
}
4950

@@ -119,6 +120,7 @@ int main(int argc, char **argv) {
119120
bool test_mode = false;
120121
bool heule_too = true;
121122
bool ertl_too = false;
123+
bool exact_counting = false;
122124
bool flajolet_too = false;
123125
bool show_rel_error = false;
124126
bool use_stdin = true;
@@ -127,11 +129,12 @@ int main(int argc, char **argv) {
127129

128130
int c;
129131

130-
while ((c = getopt (argc, argv, "shtep:r:yx:f")) != -1)
132+
while ((c = getopt (argc, argv, "shtep:r:yx:fE")) != -1)
131133
switch (c) {
132134
case 's': sparse = true; break;
133135
case 't': test_mode = true; break;
134136
case 'e': ertl_too = true; break;
137+
case 'E': exact_counting = true; break;
135138
case 'f': flajolet_too = true; break;
136139
case 'y': show_rel_error = true; break;
137140
case 'p': p = stoi(optarg); break;
@@ -157,7 +160,7 @@ int main(int argc, char **argv) {
157160
HyperLogLogPlusMinus<uint64_t> hll(p, sparse); // unique k-mer count per taxon
158161
//HyperLogLogPlusMinus<uint64_t> hll(p, sparse, wang_mixer); // unique k-mer count per taxon
159162

160-
if (test_mode) {
163+
if (test_mode && ! exact_counting) {
161164
cout << "observed\testimate_heule";
162165
if (flajolet_too) {
163166
cout << "\testimate_flajolet";
@@ -178,14 +181,24 @@ int main(int argc, char **argv) {
178181
cout << '\n';
179182
}
180183
uint64_t ctr = 0;
181-
184+
unordered_set<uint64_t> exact_counter;
185+
182186
if (use_stdin) {
183187
uint64_t nr;
184188
while (cin >> nr) {
185-
add_to_hll(hll, nr, ctr, test_mode, show_rel_error, heule_too, flajolet_too, ertl_too);
189+
if (exact_counting) {
190+
exact_counter.insert(nr);
191+
} else {
192+
add_to_hll(hll, nr, ctr, test_mode, show_rel_error, heule_too, flajolet_too, ertl_too);
193+
}
194+
}
195+
if (!test_mode) {
196+
if (exact_counting) {
197+
cout << exact_counter.size() << "\n";
198+
} else {
199+
print_card(hll, ctr, show_rel_error, heule_too, flajolet_too, ertl_too);
200+
}
186201
}
187-
if (!test_mode)
188-
print_card(hll, ctr, show_rel_error, heule_too, flajolet_too, ertl_too);
189202
} else {
190203
// get random seed from random_device RNG
191204
std::random_device rd;
@@ -197,11 +210,21 @@ int main(int argc, char **argv) {
197210
for (size_t j = 0; j < n_redo; ++j) {
198211

199212
for(size_t i = 0; i < n_rand; i++) {
200-
add_to_hll(hll, distr(rng), ctr, test_mode, show_rel_error, heule_too, flajolet_too, ertl_too);
213+
if (exact_counting) {
214+
exact_counter.insert(distr(rng));
215+
} else {
216+
add_to_hll(hll, distr(rng), ctr, test_mode, show_rel_error, heule_too, flajolet_too, ertl_too);
217+
}
218+
}
219+
if (!test_mode) {
220+
if (exact_counting) {
221+
cout << exact_counter.size() << "\n";
222+
} else {
223+
print_card(hll, ctr, show_rel_error, heule_too, flajolet_too, ertl_too);
224+
}
201225
}
202-
if (!test_mode)
203-
print_card(hll, ctr, show_rel_error, heule_too, flajolet_too, ertl_too);
204226
hll.reset();
227+
exact_counter.clear();
205228
ctr = 0;
206229
}
207230
}

0 commit comments

Comments
 (0)