Skip to content

Commit d87d1ca

Browse files
committed
removed extra pop_size module; removed break statement
1 parent ca68143 commit d87d1ca

File tree

1 file changed

+0
-240
lines changed

1 file changed

+0
-240
lines changed

src/preseq.cpp

Lines changed: 0 additions & 240 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ vector_median_and_ci(const vector<vector<double> > &bootstrap_estimates,
127127

128128
const size_t n_est = bootstrap_estimates.size();
129129
vector<double> estimates_row(n_est, 0.0);
130-
double prev_estimate = 0;
131130
for (size_t i = 0; i < bootstrap_estimates[0].size(); i++) {
132131

133132
// estimates is in wrong order, work locally on const val
@@ -139,14 +138,9 @@ vector_median_and_ci(const vector<vector<double> > &bootstrap_estimates,
139138
lower_ci_estimate, upper_ci_estimate);
140139
sort(begin(estimates_row), end(estimates_row));
141140

142-
if(median_estimate - prev_estimate < 1.0)
143-
break;
144-
145141
yield_estimates.push_back(median_estimate);
146142
lower_ci_lognorm.push_back(lower_ci_estimate);
147143
upper_ci_lognorm.push_back(upper_ci_estimate);
148-
149-
prev_estimate = median_estimate;
150144
}
151145
}
152146

@@ -781,240 +775,6 @@ lc_extrap(const int argc, const char **argv) {
781775
return EXIT_SUCCESS;
782776
}
783777

784-
////////////////////////////////////////////////////////////////////////
785-
// POP_SIZE
786-
static int
787-
pop_size(const int argc, const char **argv) {
788-
789-
try {
790-
791-
static const size_t min_required_counts = 4;
792-
static const string min_required_counts_error_message =
793-
"max count before zero is less than min required count (" +
794-
to_string(min_required_counts) + ") duplicates removed";
795-
796-
string outfile;
797-
798-
size_t orig_max_terms = 100;
799-
double max_extrap = 1.0e10;
800-
size_t n_bootstraps = 100;
801-
int diagonal = 0;
802-
double c_level = 0.95;
803-
unsigned long int seed = 408;
804-
805-
/* FLAGS */
806-
bool VERBOSE = false;
807-
bool VALS_INPUT = false;
808-
bool PAIRED_END = false;
809-
bool HIST_INPUT = false;
810-
bool SINGLE_ESTIMATE = false;
811-
bool allow_defects = false;
812-
813-
#ifdef HAVE_HTSLIB
814-
bool BAM_FORMAT_INPUT = false;
815-
size_t MAX_SEGMENT_LENGTH = 5000;
816-
#endif
817-
818-
const string description =
819-
"Needs writing";
820-
821-
/********** GET COMMAND LINE ARGUMENTS FOR LC EXTRAP ***********/
822-
OptionParser opt_parse(strip_path(argv[1]), description, "<input-file>");
823-
opt_parse.add_opt("output", 'o', "output file (default: stdout)",
824-
false , outfile);
825-
opt_parse.add_opt("extrap",'e',"maximum extrapolation", false, max_extrap);
826-
opt_parse.add_opt("boots",'n',"number of bootstraps", false, n_bootstraps);
827-
opt_parse.add_opt("cval", 'c', "level for confidence intervals", false, c_level);
828-
opt_parse.add_opt("terms",'x',"maximum terms in estimator", false, orig_max_terms);
829-
opt_parse.add_opt("verbose", 'v', "print more info", false, VERBOSE);
830-
#ifdef HAVE_HTSLIB
831-
opt_parse.add_opt("bam", 'B', "input is in BAM format",
832-
false, BAM_FORMAT_INPUT);
833-
opt_parse.add_opt("seg_len", 'l', "maximum segment length when merging "
834-
"paired end bam reads",
835-
false, MAX_SEGMENT_LENGTH);
836-
#endif
837-
opt_parse.add_opt("pe", 'P', "input is paired end read file",
838-
false, PAIRED_END);
839-
opt_parse.add_opt("vals", 'V',
840-
"input is a text file containing only the observed counts",
841-
false, VALS_INPUT);
842-
opt_parse.add_opt("hist", 'H',
843-
"input is a text file containing the observed histogram",
844-
false, HIST_INPUT);
845-
opt_parse.add_opt("quick", 'Q',
846-
"quick mode (no bootstraps) for confidence intervals",
847-
false, SINGLE_ESTIMATE);
848-
opt_parse.add_opt("defects", 'D', "no testing for defects", false, allow_defects);
849-
opt_parse.add_opt("seed", 'r', "seed for random number generator",
850-
false, seed);
851-
opt_parse.set_show_defaults();
852-
vector<string> leftover_args;
853-
854-
// ADS: suspect bug below; "-about" isn't working.
855-
opt_parse.parse(argc-1, argv+1, leftover_args);
856-
if (argc == 2 || opt_parse.help_requested()) {
857-
cerr << opt_parse.help_message() << endl;
858-
return EXIT_SUCCESS;
859-
}
860-
if (opt_parse.about_requested()) {
861-
cerr << opt_parse.about_message() << endl;
862-
return EXIT_SUCCESS;
863-
}
864-
if (opt_parse.option_missing()) {
865-
cerr << opt_parse.option_missing_message() << endl;
866-
return EXIT_SUCCESS;
867-
}
868-
if (leftover_args.empty()) {
869-
cerr << opt_parse.help_message() << endl;
870-
return EXIT_SUCCESS;
871-
}
872-
const string input_file_name = leftover_args.front();
873-
/******************************************************************/
874-
875-
throw runtime_error("NOT YET IMPLEMENTED");
876-
877-
vector<double> counts_hist;
878-
size_t n_reads = 0;
879-
880-
/************ loading input ***************************************/
881-
if (HIST_INPUT) {
882-
if (VERBOSE)
883-
cerr << "HIST_INPUT" << endl;
884-
n_reads = load_histogram(input_file_name, counts_hist);
885-
}
886-
else if (VALS_INPUT) {
887-
if (VERBOSE)
888-
cerr << "VALS_INPUT" << endl;
889-
n_reads = load_counts(input_file_name, counts_hist);
890-
}
891-
#ifdef HAVE_HTSLIB
892-
else if (BAM_FORMAT_INPUT && PAIRED_END) {
893-
if (VERBOSE)
894-
cerr << "PAIRED_END_BAM_INPUT" << endl;
895-
const size_t MAX_READS_TO_HOLD = 5000000;
896-
size_t n_paired = 0;
897-
size_t n_mates = 0;
898-
n_reads = load_counts_BAM_pe(VERBOSE, input_file_name,
899-
MAX_SEGMENT_LENGTH,
900-
MAX_READS_TO_HOLD, n_paired,
901-
n_mates, counts_hist);
902-
if (VERBOSE) {
903-
cerr << "MERGED PAIRED END READS = " << n_paired << endl;
904-
cerr << "MATES PROCESSED = " << n_mates << endl;
905-
}
906-
}
907-
else if (BAM_FORMAT_INPUT) {
908-
if (VERBOSE)
909-
cerr << "BAM_INPUT" << endl;
910-
n_reads = load_counts_BAM_se(input_file_name, counts_hist);
911-
}
912-
#endif
913-
else if (PAIRED_END) {
914-
if (VERBOSE)
915-
cerr << "PAIRED_END_BED_INPUT" << endl;
916-
n_reads = load_counts_BED_pe(input_file_name, counts_hist);
917-
}
918-
else { // default is single end bed file
919-
if (VERBOSE)
920-
cerr << "BED_INPUT" << endl;
921-
n_reads = load_counts_BED_se(input_file_name, counts_hist);
922-
}
923-
/************ done loading input **********************************/
924-
925-
const size_t max_observed_count = counts_hist.size() - 1;
926-
const double distinct_reads =
927-
accumulate(begin(counts_hist), end(counts_hist), 0.0);
928-
929-
// ENSURE THAT THE MAX TERMS ARE ACCEPTABLE
930-
size_t first_zero = 1;
931-
while (first_zero < counts_hist.size() && counts_hist[first_zero] > 0)
932-
++first_zero;
933-
934-
orig_max_terms = std::min(orig_max_terms, first_zero - 1);
935-
orig_max_terms = orig_max_terms - (orig_max_terms % 2 == 1);
936-
937-
const size_t distinct_counts =
938-
std::count_if(begin(counts_hist), end(counts_hist),
939-
[](const double x) {return x > 0.0;});
940-
941-
if (VERBOSE)
942-
cerr << "TOTAL READS = " << n_reads << endl
943-
<< "DISTINCT READS = " << distinct_reads << endl
944-
<< "DISTINCT COUNTS = " << distinct_counts << endl
945-
<< "MAX COUNT = " << max_observed_count << endl
946-
<< "COUNTS OF 1 = " << counts_hist[1] << endl
947-
<< "MAX TERMS = " << orig_max_terms << endl;
948-
949-
// ADS: step size to be set based on the original number of
950-
// observations
951-
const double step_size = n_reads;
952-
953-
if (VERBOSE) {
954-
// OUTPUT THE ORIGINAL HISTOGRAM
955-
cerr << "OBSERVED COUNTS (" << counts_hist.size() << ")" << endl;
956-
for (size_t i = 0; i < counts_hist.size(); i++)
957-
if (counts_hist[i] > 0)
958-
cerr << i << '\t' << static_cast<size_t>(counts_hist[i]) << endl;
959-
cerr << endl;
960-
}
961-
962-
// check to make sure library is not overly saturated
963-
const double two_fold_extrap = GoodToulmin2xExtrap(counts_hist);
964-
if (two_fold_extrap < 0.0)
965-
throw runtime_error("Saturation expected at double initial sample size."
966-
" Unable to extrapolate");
967-
968-
// const size_t total_reads = get_counts_from_hist(counts_hist);
969-
970-
//assert(total_reads == n_reads); // ADS: why commented out?
971-
972-
// check that min required count is satisfied
973-
if (orig_max_terms < min_required_counts)
974-
throw runtime_error(min_required_counts_error_message);
975-
976-
if (VERBOSE)
977-
cerr << "[ESTIMATING YIELD CURVE]" << endl;
978-
vector<double> yield_estimates;
979-
980-
if (VERBOSE)
981-
cerr << "[BOOTSTRAPPING HISTOGRAM]" << endl;
982-
983-
const size_t max_iter = 100*n_bootstraps;
984-
985-
vector<vector <double> > bootstrap_estimates;
986-
extrap_bootstrap(VERBOSE, allow_defects, seed, counts_hist, n_bootstraps,
987-
orig_max_terms, diagonal, step_size, max_extrap,
988-
max_iter, bootstrap_estimates);
989-
990-
if (VERBOSE)
991-
cerr << "[COMPUTING CONFIDENCE INTERVALS]" << endl;
992-
// yield ci
993-
vector<double> yield_upper_ci_lognorm, yield_lower_ci_lognorm;
994-
995-
vector_median_and_ci(bootstrap_estimates, c_level, yield_estimates,
996-
yield_lower_ci_lognorm, yield_upper_ci_lognorm);
997-
998-
/////////////////////////////////////////////////////////////////////
999-
if (VERBOSE)
1000-
cerr << "[WRITING OUTPUT]" << endl;
1001-
1002-
write_predicted_complexity_curve(outfile, c_level, step_size,
1003-
yield_estimates, yield_lower_ci_lognorm,
1004-
yield_upper_ci_lognorm);
1005-
}
1006-
catch (runtime_error &e) {
1007-
cerr << "ERROR:\t" << e.what() << endl;
1008-
return EXIT_FAILURE;
1009-
}
1010-
catch (std::bad_alloc &ba) {
1011-
cerr << "ERROR: could not allocate memory" << endl;
1012-
return EXIT_FAILURE;
1013-
}
1014-
return EXIT_SUCCESS;
1015-
}
1016-
1017-
1018778
///////////////////////////////////////////////////////////////////////
1019779
//////////////////////////////////////////////////////////////////////
1020780
///// GC_EXTRAP: predicting genomic coverage

0 commit comments

Comments
 (0)