@@ -55,13 +55,15 @@ using std::to_string;
5555
5656using bamxx::bgzf_file;
5757
58+ template <typename T> using num_lim = std::numeric_limits<T>;
59+
5860struct pmd_summary {
5961 pmd_summary (const vector<GenomicRegion> &pmds) {
6062 pmd_count = pmds.size ();
6163 pmd_total_size = accumulate (cbegin (pmds), cend (pmds), 0 ,
6264 [](const uint64_t t, const GenomicRegion &p) {
6365 return t + p.get_width (); });
64- pmd_mean_size =
66+ pmd_mean_size =
6567 static_cast <double >(pmd_total_size)/std::max (1ul , pmd_count);
6668 }
6769 // pmd_count is the number of identified PMDs.
@@ -166,7 +168,7 @@ find_best_bound(const bool IS_RIGHT_BOUNDARY,
166168 }
167169
168170 size_t best_idx = 0 ;
169- double best_score = -numeric_limits <double >::max ();
171+ double best_score = -num_lim <double >::max ();
170172 if (meth_tot.size () > 0 )
171173 for (size_t i = 1 ; i < meth_tot.size ()-1 ; ++i) {
172174 size_t N_low, k_low, N_hi, k_hi;
@@ -199,8 +201,8 @@ find_best_bound(const bool IS_RIGHT_BOUNDARY,
199201 }
200202 }
201203 }
202- return (best_score > -numeric_limits <double >::max ()) ?
203- positions[best_idx] : numeric_limits <size_t >::max ();
204+ return (best_score > -num_lim <double >::max ()) ?
205+ positions[best_idx] : num_lim <size_t >::max ();
204206}
205207
206208
@@ -419,10 +421,10 @@ optimize_boundaries(const size_t bin_size,
419421 // ///
420422 for (size_t i = 0 ; i < pmds.size (); ++i) {
421423 const size_t start_site = bound_site[2 *i];
422- if (start_site != numeric_limits <size_t >::max ())
424+ if (start_site != num_lim <size_t >::max ())
423425 pmds[i].set_start (start_site);
424426 const size_t end_site = bound_site[2 *i + 1 ];
425- if (end_site != numeric_limits <size_t >::max ())
427+ if (end_site != num_lim <size_t >::max ())
426428 pmds[i].set_end (end_site + 1 );
427429 }
428430
@@ -476,15 +478,15 @@ optimize_boundaries(const size_t bin_size,
476478double
477479get_score_cutoff_for_fdr (const vector<double > &scores, const double fdr) {
478480 if (fdr <= 0 )
479- return numeric_limits <double >::max ();
481+ return num_lim <double >::max ();
480482 else if (fdr > 1 )
481- return numeric_limits <double >::min ();
483+ return num_lim <double >::min ();
482484 vector<double > local (scores);
483485 std::sort (begin (local), end (local));
484486 size_t i = 0 ;
485487 for (; i < local.size () - 1 &&
486488 local[i+1 ] < fdr*static_cast <double >(i+1 )/local.size (); ++i);
487- return local[i];
489+ return local[i] + 1.0 /scores. size () ;
488490}
489491
490492
@@ -616,7 +618,7 @@ separate_regions(const bool VERBOSE, const size_t desert_size,
616618 size_t prev_cpg = 0 ;
617619 for (size_t i = 0 ; i < bins[0 ].size (); ++i) {
618620 const size_t dist = (i > 0 && bins[0 ][i].same_chrom (bins[0 ][i - 1 ])) ?
619- bins[0 ][i].get_start () - prev_cpg : numeric_limits <size_t >::max ();
621+ bins[0 ][i].get_start () - prev_cpg : num_lim <size_t >::max ();
620622 if (dist > desert_size)
621623 reset_points.push_back (i);
622624 prev_cpg = bins[0 ][i].get_start ();
@@ -662,16 +664,14 @@ static void
662664assign_p_values (const vector<double > &random_scores,
663665 const vector<double > &observed_scores,
664666 vector<double > &p_values) {
665- const double n_randoms =
666- random_scores.size () == 0 ? 1 : random_scores.size ();
667- for (size_t i = 0 ; i < observed_scores.size (); ++i)
668- p_values.push_back ((end (random_scores) -
669- upper_bound (begin (random_scores),
670- end (random_scores),
671- observed_scores[i]))/n_randoms);
667+ const double n_randoms = random_scores.size () == 0 ? 1 : random_scores.size ();
668+ for (auto scr : observed_scores) {
669+ const auto scr_itr =
670+ upper_bound (cbegin (random_scores), cend (random_scores), scr);
671+ p_values.push_back (std::distance (scr_itr, cend (random_scores)) / n_randoms);
672+ }
672673}
673674
674-
675675static void
676676read_params_file (const bool VERBOSE,
677677 const string ¶ms_file,
@@ -794,8 +794,8 @@ load_array_data(const size_t bin_size,
794794 MSite site;
795795 while (read_site (in, site)) {
796796 // TODO: MN: I think that the block below should be placed later
797- // in this scope. At this location, the methylation level of the
798- // first site in a new chrom is contributed to the last bin of the
797+ // in this scope. At this location, the methylation level of the
798+ // first site in a new chrom is contributed to the last bin of the
799799 // previous chrom.
800800 if (site.n_reads > 0 ) { // its covered by a probe
801801 ++num_probes_in_bin;
@@ -1026,7 +1026,7 @@ get_min_reads_for_confidence(const double conf_level) {
10261026}
10271027
10281028
1029- // ADS: this function will return numeric_limits <size_t>::max() if the
1029+ // ADS: this function will return num_lim <size_t>::max() if the
10301030// fraction of "good" bins is zero for all attempted bin sizes.
10311031static size_t
10321032binsize_selection (const bool &VERBOSE, const size_t resolution,
@@ -1049,8 +1049,8 @@ binsize_selection(const bool &VERBOSE, const size_t resolution,
10491049 if (frac_passed < min_frac_passed)
10501050 bin_size += resolution;
10511051 }
1052- return frac_passed <= numeric_limits <double >::min () ?
1053- std::numeric_limits <size_t >::max () : bin_size;
1052+ return frac_passed <= num_lim <double >::min () ?
1053+ num_lim <size_t >::max () : bin_size;
10541054}
10551055
10561056
@@ -1090,7 +1090,7 @@ get_union_of_bins(const vector<vector<SimpleGenomicRegion> > &orig,
10901090 std::inplace_merge (first, middle, middle + orig[i].size ());
10911091 }
10921092 // ensure unique bins
1093- bins.resize ( std::distance (begin (bins), unique ( begin ( bins), end (bins)) ));
1093+ bins.erase ( unique (begin (bins), end ( bins)) , end (bins));
10941094 bins.shrink_to_fit ();
10951095
10961096 // make sure all bins are aligned at same boundaries
@@ -1243,7 +1243,7 @@ main_pmd(int argc, const char **argv) {
12431243 min_bin_size, max_bin_size,
12441244 confidence_interval, prop_accept,
12451245 cpgs_file[i]);
1246- if (bin_size == std::numeric_limits <size_t >::max ())
1246+ if (bin_size == num_lim <size_t >::max ())
12471247 insufficient_data = true ;
12481248 desert_size = 5 *bin_size; // TODO: explore extrapolation number
12491249 }
@@ -1288,7 +1288,7 @@ main_pmd(int argc, const char **argv) {
12881288 reads[i], array_status);
12891289 const double total_observations =
12901290 accumulate (begin (reads[i]), end (reads[i]), 0 );
1291- if (total_observations <= numeric_limits <double >::min ())
1291+ if (total_observations <= num_lim <double >::min ())
12921292 insufficient_data = true ;
12931293 if (VERBOSE)
12941294 cerr << " TOTAL BINS: " << bins[i].size () << endl
@@ -1336,7 +1336,7 @@ main_pmd(int argc, const char **argv) {
13361336 vector<double > reps_fg_beta (n_replicates, 0.95 );
13371337 vector<double > reps_bg_alpha (n_replicates, 0.95 );
13381338 vector<double > reps_bg_beta (n_replicates, 0.05 );
1339- double score_cutoff_for_fdr = numeric_limits <double >::max ();
1339+ double score_cutoff_for_fdr = num_lim <double >::max ();
13401340
13411341 if (!params_in_file.empty ()) {
13421342 // read parameters files
@@ -1387,9 +1387,8 @@ main_pmd(int argc, const char **argv) {
13871387 reps_bg_alpha, reps_bg_beta, classes,
13881388 scores, array_status);
13891389
1390- if (!posteriors_out_prefix.empty ()) {
1391- write_posteriors_file (posteriors_out_prefix + " .emissions" , bins, scores);
1392- }
1390+ if (!posteriors_out_prefix.empty ())
1391+ write_posteriors_file (posteriors_out_prefix + " .posteriors" , bins, scores);
13931392
13941393 vector<double > domain_scores;
13951394 get_domain_scores (classes, meth, reset_points, domain_scores);
@@ -1405,7 +1404,7 @@ main_pmd(int argc, const char **argv) {
14051404 vector<double > p_values;
14061405 assign_p_values (random_scores, domain_scores, p_values);
14071406
1408- if (score_cutoff_for_fdr == numeric_limits <double >::max () &&
1407+ if (score_cutoff_for_fdr == num_lim <double >::max () &&
14091408 !p_values.empty ())
14101409 score_cutoff_for_fdr = get_score_cutoff_for_fdr (p_values, 0.01 );
14111410
0 commit comments