@@ -68,7 +68,7 @@ get_counts_from_hist(const vector<T> &h) {
6868
6969template <typename T> T
7070median_from_sorted_vector (const vector<T> sorted_data,
71- const size_t stride, const size_t n) {
71+ const size_t stride, const size_t n) {
7272
7373 if (n == 0 || sorted_data.empty ()) return 0.0 ;
7474
@@ -81,19 +81,19 @@ median_from_sorted_vector (const vector<T> sorted_data,
8181}
8282
8383template <typename T> T
84- quantile_from_sorted_vector (const vector<T> sorted_data,
85- const size_t stride, const size_t n,
86- const double f) {
84+ quantile_from_sorted_vector (const vector<T> sorted_data,
85+ const size_t stride, const size_t n,
86+ const double f) {
8787 const double index = f * (n - 1 );
8888 const size_t lhs = (int )index;
8989 const double delta = index - lhs;
9090
9191 if (n == 0 || sorted_data.empty ()) return 0.0 ;
9292
9393 if (lhs == n - 1 ) return sorted_data[lhs * stride];
94-
95- return (1 - delta) * sorted_data[lhs * stride]
96- + delta * sorted_data[(lhs + 1 ) * stride];
94+
95+ return (1 - delta) * sorted_data[lhs * stride]
96+ + delta * sorted_data[(lhs + 1 ) * stride];
9797}
9898
9999// Confidence interval stuff
@@ -190,21 +190,21 @@ factorial (double x) {
190190 x -= 1.0 ;
191191
192192 vector<double > lanczos {
193- 0.99999999999980993227684700473478 ,
194- 676.520368121885098567009190444019 ,
195- -1259.13921672240287047156078755283 ,
196- 771.3234287776530788486528258894 ,
197- -176.61502916214059906584551354 ,
198- 12.507343278686904814458936853 ,
199- -0.13857109526572011689554707 ,
200- 9.984369578019570859563e-6 ,
201- 1.50563273514931155834e-7
193+ 0.99999999999980993227684700473478 ,
194+ 676.520368121885098567009190444019 ,
195+ -1259.13921672240287047156078755283 ,
196+ 771.3234287776530788486528258894 ,
197+ -176.61502916214059906584551354 ,
198+ 12.507343278686904814458936853 ,
199+ -0.13857109526572011689554707 ,
200+ 9.984369578019570859563e-6 ,
201+ 1.50563273514931155834e-7
202202 };
203203
204204 double Ag = lanczos[0 ];
205205
206- for (size_t k=1 ; k < lanczos.size (); k++)
207- Ag += lanczos[k] / (x + k);
206+ for (size_t k=1 ; k < lanczos.size (); k++)
207+ Ag += lanczos[k] / (x + k);
208208
209209 double term1 = (x + 0.5 ) * log ((x + 7.5 ) / Euler);
210210 double term2 = LogRootTwoPi + log (Ag);
@@ -230,9 +230,9 @@ resample_hist(mt19937 &gen, const vector<size_t> &vals_hist_distinct_counts,
230230
231231 unsigned int distinct =
232232 accumulate (begin (distinct_counts_hist), end (distinct_counts_hist), 0.0 );
233-
233+
234234 multinomial (gen, distinct_counts_hist, distinct,
235- sample_distinct_counts_hist);
235+ sample_distinct_counts_hist);
236236
237237 out_hist.clear ();
238238 out_hist.resize (vals_hist_distinct_counts.back () + 1 , 0.0 );
@@ -546,6 +546,12 @@ lc_extrap(const bool pop_size, const int argc, const char **argv) {
546546 double c_level = 0.95 ;
547547 unsigned long int seed = 408 ;
548548
549+ if (pop_size) {
550+ // ADS: extrapolate far, without too many steps...
551+ max_extrap = 1.0e20 ;
552+ step_size = 1.0e18 ;
553+ }
554+
549555 /* FLAGS */
550556 bool VERBOSE = false ;
551557 bool VALS_INPUT = false ;
@@ -558,20 +564,20 @@ lc_extrap(const bool pop_size, const int argc, const char **argv) {
558564 bool BAM_FORMAT_INPUT = false ;
559565 size_t MAX_SEGMENT_LENGTH = 5000 ;
560566#endif
561-
567+
562568 string description;
563569 if (!pop_size) {
564- description =
565- " Extrapolate the complexity of a library. This is the approach \
566- described in Daley & Smith (2013). The method applies rational \
567- function approximation via continued fractions with the \
568- original goal of estimating the number of distinct reads that a \
569- sequencing library would yield upon deeper sequencing. This \
570- method has been used for many different purposes since then." ;
570+ description =
571+ " Extrapolate the complexity of a library. This is the approach \
572+ described in Daley & Smith (2013). The method applies rational \
573+ function approximation via continued fractions with the \
574+ original goal of estimating the number of distinct reads that a \
575+ sequencing library would yield upon deeper sequencing. This \
576+ method has been used for many different purposes since then." ;
571577 }
572578 else {
573- description =
574- " Determine the estimate of the number of unique classes in a library." ;
579+ description =
580+ " Determine the estimate of the number of unique classes in a library." ;
575581 }
576582
577583 /* ********* GET COMMAND LINE ARGUMENTS FOR LC EXTRAP ***********/
@@ -774,24 +780,27 @@ lc_extrap(const bool pop_size, const int argc, const char **argv) {
774780 cerr << " [WRITING OUTPUT]" << endl;
775781
776782 if (!pop_size) {
777- write_predicted_complexity_curve (outfile, c_level, step_size,
778- yield_estimates, yield_lower_ci_lognorm,
779- yield_upper_ci_lognorm);
783+ write_predicted_complexity_curve (outfile, c_level, step_size,
784+ yield_estimates, yield_lower_ci_lognorm,
785+ yield_upper_ci_lognorm);
780786 }
781787 else {
782- std::ofstream of;
788+ std::ofstream of;
783789 if (!outfile.empty ()) of.open (outfile.c_str ());
784790 std::ostream out (outfile.empty () ? std::cout.rdbuf () : of.rdbuf ());
785791
786792 out.setf (std::ios_base::fixed, std::ios_base::floatfield);
787793 out.precision (1 );
788794
789- out << " pop_size_estimate" << ' \t '
795+ out << " pop_size_estimate" << ' \t '
790796 << " lower_ci" << ' \t ' << " upper_ci" << endl;
791- out << yield_estimates.back () << ' \t '
792- << yield_lower_ci_lognorm.back () << ' \t '
797+ out << yield_estimates[yield_estimates.size () - 2 ] << ' \t '
798+ << yield_lower_ci_lognorm[yield_estimates.size () - 2 ] << ' \t '
799+ << yield_upper_ci_lognorm[yield_estimates.size () - 2 ] << endl;
800+ out << yield_estimates.back () << ' \t '
801+ << yield_lower_ci_lognorm.back () << ' \t '
793802 << yield_upper_ci_lognorm.back () << endl;
794- }
803+ }
795804 }
796805 }
797806 catch (runtime_error &e) {
@@ -1023,7 +1032,7 @@ gc_extrap(const int argc, const char **argv) {
10231032 bin_size, coverage_estimates,
10241033 coverage_lower_ci_lognorm,
10251034 coverage_upper_ci_lognorm);
1026-
1035+
10271036 }
10281037 }
10291038 catch (runtime_error &e) {
@@ -1501,7 +1510,7 @@ bound_pop(const int argc, const char **argv) {
15011510 bootstrap_moments.push_back (exp (factorial (i + 3 ) +
15021511 log (sample_hist[i + 2 ]) -
15031512 log (sample_hist[1 ])) );
1504- }
1513+ }
15051514
15061515 size_t n_points = 0 ;
15071516 n_points = ensure_pos_def_mom_seq (bootstrap_moments, tolerance, VERBOSE);
0 commit comments