Skip to content

Commit a32e775

Browse files
authored
Beta 18 (#60)
* init commit * Added an FDR estimator (--fdr) and improved the behvaior of --tune_indel
1 parent 75fb17a commit a32e775

File tree

5 files changed

+45
-20
lines changed

5 files changed

+45
-20
lines changed

src/cli.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ void Settings::prepare_settings() {
100100
"Do not output BED or JSON annotation")
101101
->group("Output");
102102

103+
app.add_flag("--fdr", this->estimate_fdr,
104+
"Estimate the False Discovery rate (runtime will be twice as long)")
105+
->group("Output");
106+
103107
// *************
104108
// Mask options
105109
// *************
@@ -480,7 +484,7 @@ bool Settings::parse_input(int argc, const char **argv) {
480484
passed = false;
481485
}
482486

483-
if (this->tune_only || this->tune_medium || this->tune_large) {
487+
if (this->tune_only || this->tune_medium || this->tune_large || this->tune_indels) {
484488
this->tune = true;
485489
}
486490

src/cli.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#ifndef ULTRA_CLI_HPP
66
#define ULTRA_CLI_HPP
77

8-
#define ULTRA_VERSION_STRING "1.0.0 (beta 17)"
8+
#define ULTRA_VERSION_STRING "1.0.0 (beta 18)"
99

1010

1111
#include "../lib/CLI11.hpp"
@@ -26,6 +26,7 @@ struct Settings {
2626
float p_value_loc = 4.27294921875;
2727
float p_value_scale = 1.8913602828979492;
2828
float p_value_freq = 0.000330256;
29+
bool estimate_fdr = false;
2930

3031
bool json = false;
3132
bool hide_seq = false;

src/main.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,13 @@ int main(int argc, const char *argv[]) {
133133
}
134134

135135
// Perform the actual run
136+
137+
136138
auto *ultra = new Ultra(settings);
137139
ultra->AnalyzeFile();
138140
ultra->OutputRepeats(true);
139-
141+
unsigned long long true_coverage = ultra->Coverage();
142+
unsigned long long shuff_coverage = 0;
140143
// Check to see if we are making a masked file
141144
if (settings->produce_mask) {
142145
FILE *f = fopen(settings->mask_file.c_str(), "w");
@@ -146,6 +149,21 @@ int main(int argc, const char *argv[]) {
146149
}
147150

148151
delete ultra;
152+
if (settings->estimate_fdr) {
153+
settings->suppress_out = true;
154+
settings->hide_settings = true;
155+
settings->produce_mask = true;
156+
settings->no_split = true;
157+
settings->max_split = 0;
158+
settings->json = false;
159+
ultra = new Ultra(settings);
160+
ultra->shuffleSequence = true;
161+
ultra->AnalyzeFile();
162+
ultra->OutputRepeats(true);
163+
shuff_coverage = ultra->Coverage();
164+
float fdr = (float)shuff_coverage / (float)true_coverage;
165+
printf("Estimated false discovery rate: %g = (%llu / %llu)\n", fdr, shuff_coverage, true_coverage);
166+
}
149167

150168
return 0;
151169
}

src/ultra.cpp

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,20 @@ void Ultra::OutputULTRASettings() {
297297
}
298298

299299
void Ultra::OutputRepeat(RepeatRegion *r, bool isSubRep) {
300+
301+
if (r->readID > last_read_id) {
302+
last_rep_end = 0;
303+
}
304+
305+
auto rep_start = r->sequenceStart;
306+
if (rep_start < last_rep_end) {
307+
rep_start = last_rep_end;
308+
}
309+
310+
if (rep_start < r->sequenceStart + r->repeatLength) {
311+
total_coverage += (r->sequenceStart + r->repeatLength) - rep_start;
312+
}
313+
300314
if (!settings->suppress_out)
301315
writer->WriteRepeat(r);
302316
if (settings->produce_mask) {
@@ -316,23 +330,7 @@ void Ultra::SortRepeatRegions() {
316330
}
317331

318332
unsigned long long Ultra::Coverage() {
319-
unsigned long long coverage = 0;
320-
321-
// Itterate through each sequence and find the coverage for that sequence
322-
for (auto tup : this->masks_for_seq) {
323-
auto seq_id = tup.first;
324-
auto regions = tup.second;
325-
auto cleaned_regions = CleanedMasks(regions);
326-
327-
for (auto region : *cleaned_regions) {
328-
coverage += region.end - region.start;
329-
}
330-
331-
cleaned_regions->clear();
332-
delete cleaned_regions;
333-
334-
}
335-
return coverage;
333+
return total_coverage;
336334
}
337335

338336
Ultra::Ultra(Settings *s) {

src/ultra.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ class Ultra {
6767
bool firstRepeat = true;
6868
unsigned long repeatBuffer = 2000;
6969

70+
unsigned long long total_coverage;
71+
int last_read_id = 0;
72+
unsigned long long last_rep_end = 0;
73+
7074
bool storeTraceAndSequence = false;
7175

7276

0 commit comments

Comments
 (0)