Skip to content

Commit 0a9a44d

Browse files
Setting some initial values for the pop_size
1 parent 309753d commit 0a9a44d

File tree

1 file changed

+49
-40
lines changed

1 file changed

+49
-40
lines changed

src/preseq.cpp

Lines changed: 49 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ get_counts_from_hist(const vector<T> &h) {
6868

6969
template<typename T> T
7070
median_from_sorted_vector (const vector<T> sorted_data,
71-
const size_t stride, const size_t n) {
71+
const size_t stride, const size_t n) {
7272

7373
if (n == 0 || sorted_data.empty()) return 0.0;
7474

@@ -81,19 +81,19 @@ median_from_sorted_vector (const vector<T> sorted_data,
8181
}
8282

8383
template<typename T> T
84-
quantile_from_sorted_vector (const vector<T> sorted_data,
85-
const size_t stride, const size_t n,
86-
const double f) {
84+
quantile_from_sorted_vector (const vector<T> sorted_data,
85+
const size_t stride, const size_t n,
86+
const double f) {
8787
const double index = f * (n - 1);
8888
const size_t lhs = (int)index;
8989
const double delta = index - lhs;
9090

9191
if (n == 0 || sorted_data.empty()) return 0.0;
9292

9393
if (lhs == n - 1) return sorted_data[lhs * stride];
94-
95-
return (1 - delta) * sorted_data[lhs * stride]
96-
+ delta * sorted_data[(lhs + 1) * stride];
94+
95+
return (1 - delta) * sorted_data[lhs * stride]
96+
+ delta * sorted_data[(lhs + 1) * stride];
9797
}
9898

9999
// Confidence interval stuff
@@ -190,21 +190,21 @@ factorial (double x) {
190190
x -= 1.0;
191191

192192
vector<double> lanczos {
193-
0.99999999999980993227684700473478,
194-
676.520368121885098567009190444019,
195-
-1259.13921672240287047156078755283,
196-
771.3234287776530788486528258894,
197-
-176.61502916214059906584551354,
198-
12.507343278686904814458936853,
199-
-0.13857109526572011689554707,
200-
9.984369578019570859563e-6,
201-
1.50563273514931155834e-7
193+
0.99999999999980993227684700473478,
194+
676.520368121885098567009190444019,
195+
-1259.13921672240287047156078755283,
196+
771.3234287776530788486528258894,
197+
-176.61502916214059906584551354,
198+
12.507343278686904814458936853,
199+
-0.13857109526572011689554707,
200+
9.984369578019570859563e-6,
201+
1.50563273514931155834e-7
202202
};
203203

204204
double Ag = lanczos[0];
205205

206-
for (size_t k=1; k < lanczos.size(); k++)
207-
Ag += lanczos[k] / (x + k);
206+
for (size_t k=1; k < lanczos.size(); k++)
207+
Ag += lanczos[k] / (x + k);
208208

209209
double term1 = (x + 0.5) * log((x + 7.5) / Euler);
210210
double term2 = LogRootTwoPi + log(Ag);
@@ -230,9 +230,9 @@ resample_hist(mt19937 &gen, const vector<size_t> &vals_hist_distinct_counts,
230230

231231
unsigned int distinct =
232232
accumulate(begin(distinct_counts_hist), end(distinct_counts_hist), 0.0);
233-
233+
234234
multinomial(gen, distinct_counts_hist, distinct,
235-
sample_distinct_counts_hist);
235+
sample_distinct_counts_hist);
236236

237237
out_hist.clear();
238238
out_hist.resize(vals_hist_distinct_counts.back() + 1, 0.0);
@@ -546,6 +546,12 @@ lc_extrap(const bool pop_size, const int argc, const char **argv) {
546546
double c_level = 0.95;
547547
unsigned long int seed = 408;
548548

549+
if (pop_size) {
550+
// ADS: extrapolate far, without too many steps...
551+
max_extrap = 1.0e20;
552+
step_size = 1.0e18;
553+
}
554+
549555
/* FLAGS */
550556
bool VERBOSE = false;
551557
bool VALS_INPUT = false;
@@ -558,20 +564,20 @@ lc_extrap(const bool pop_size, const int argc, const char **argv) {
558564
bool BAM_FORMAT_INPUT = false;
559565
size_t MAX_SEGMENT_LENGTH = 5000;
560566
#endif
561-
567+
562568
string description;
563569
if (!pop_size) {
564-
description =
565-
"Extrapolate the complexity of a library. This is the approach \
566-
described in Daley & Smith (2013). The method applies rational \
567-
function approximation via continued fractions with the \
568-
original goal of estimating the number of distinct reads that a \
569-
sequencing library would yield upon deeper sequencing. This \
570-
method has been used for many different purposes since then.";
570+
description =
571+
"Extrapolate the complexity of a library. This is the approach \
572+
described in Daley & Smith (2013). The method applies rational \
573+
function approximation via continued fractions with the \
574+
original goal of estimating the number of distinct reads that a \
575+
sequencing library would yield upon deeper sequencing. This \
576+
method has been used for many different purposes since then.";
571577
}
572578
else {
573-
description =
574-
"Determine the estimate of the number of unique classes in a library.";
579+
description =
580+
"Determine the estimate of the number of unique classes in a library.";
575581
}
576582

577583
/********** GET COMMAND LINE ARGUMENTS FOR LC EXTRAP ***********/
@@ -774,24 +780,27 @@ lc_extrap(const bool pop_size, const int argc, const char **argv) {
774780
cerr << "[WRITING OUTPUT]" << endl;
775781

776782
if (!pop_size) {
777-
write_predicted_complexity_curve(outfile, c_level, step_size,
778-
yield_estimates, yield_lower_ci_lognorm,
779-
yield_upper_ci_lognorm);
783+
write_predicted_complexity_curve(outfile, c_level, step_size,
784+
yield_estimates, yield_lower_ci_lognorm,
785+
yield_upper_ci_lognorm);
780786
}
781787
else {
782-
std::ofstream of;
788+
std::ofstream of;
783789
if (!outfile.empty()) of.open(outfile.c_str());
784790
std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
785791

786792
out.setf(std::ios_base::fixed, std::ios_base::floatfield);
787793
out.precision(1);
788794

789-
out << "pop_size_estimate" << '\t'
795+
out << "pop_size_estimate" << '\t'
790796
<< "lower_ci" << '\t' << "upper_ci" << endl;
791-
out << yield_estimates.back() << '\t'
792-
<< yield_lower_ci_lognorm.back() << '\t'
797+
out << yield_estimates[yield_estimates.size() - 2] << '\t'
798+
<< yield_lower_ci_lognorm[yield_estimates.size() - 2] << '\t'
799+
<< yield_upper_ci_lognorm[yield_estimates.size() - 2] << endl;
800+
out << yield_estimates.back() << '\t'
801+
<< yield_lower_ci_lognorm.back() << '\t'
793802
<< yield_upper_ci_lognorm.back() << endl;
794-
}
803+
}
795804
}
796805
}
797806
catch (runtime_error &e) {
@@ -1023,7 +1032,7 @@ gc_extrap(const int argc, const char **argv) {
10231032
bin_size, coverage_estimates,
10241033
coverage_lower_ci_lognorm,
10251034
coverage_upper_ci_lognorm);
1026-
1035+
10271036
}
10281037
}
10291038
catch (runtime_error &e) {
@@ -1501,7 +1510,7 @@ bound_pop(const int argc, const char **argv) {
15011510
bootstrap_moments.push_back(exp(factorial(i + 3) +
15021511
log(sample_hist[i + 2]) -
15031512
log(sample_hist[1])) );
1504-
}
1513+
}
15051514

15061515
size_t n_points = 0;
15071516
n_points = ensure_pos_def_mom_seq(bootstrap_moments, tolerance, VERBOSE);

0 commit comments

Comments
 (0)