2424#include < cmath>
2525#include < sys/stat.h>
2626#include < unistd.h>
27+ #include < filesystem>
2728
2829#include " OptionParser.hpp"
2930#include " smithlab_os.hpp"
@@ -58,8 +59,8 @@ wilson_ci_for_binomial(const double alpha, const double n,
5859static int
5960binom_null (const double alpha, const double n,
6061 const double p_hat, const double p) {
61- // ADS: function tests if final argument p is outside the (1 -
62- // alpha) CI for p_hat given counts n.
62+ // ADS: function tests if final argument p is outside the (1-alpha)
63+ // CI for p_hat given counts n.
6364 double lower = 0.0 ;
6465 double upper = 0.0 ;
6566 wilson_ci_for_binomial (alpha, n, p_hat, lower, upper);
@@ -140,6 +141,7 @@ static void
140141expectation (const size_t a, const size_t x,
141142 const double p, const double q,
142143 vector<vector<double >> &coeff) {
144+
143145 assert (p > 0.0 && q > 0.0 );
144146 assert (p + q <= 1.0 );
145147
@@ -156,8 +158,7 @@ expectation(const size_t a, const size_t x,
156158 coeff = vector<vector<double >>(x + 1 , vector<double >(a + 1 , 0.0 ));
157159
158160 for (size_t k = 0 ; k <= x; ++k) {
159- const double x_c_k =
160- lnchoose (x, k) + log_p*k + log_1mpq*(x - k) - log_1mq*x;
161+ const double x_c_k = lnchoose (x, k) + log_p*k + log_1mpq*(x-k) - log_1mq*x;
161162 for (size_t j = 0 ; j <= a; ++j)
162163 coeff[k][j] = exp (a_c_j[j] + x_c_k);
163164 }
@@ -201,8 +202,6 @@ expectation_maximization(const bool DEBUG,
201202 const double tolerance,
202203 double &p, double &q) {
203204 constexpr auto max_iterations = 500 ;
204- size_t iter = 0 ;
205- double delta = std::numeric_limits<double >::max ();
206205
207206 if (DEBUG) {
208207 cerr << " t:" << a << " , u:" << b
@@ -211,6 +210,8 @@ expectation_maximization(const bool DEBUG,
211210 << " p:" << p << " , q:" << q << endl;
212211 }
213212
213+ uint32_t iter = 0u ;
214+ double delta = std::numeric_limits<double >::max ();
214215 do {
215216 vector<vector<double >> coeff;
216217
@@ -226,9 +227,7 @@ expectation_maximization(const bool DEBUG,
226227 q = max (tolerance, min (q, 1 -tolerance-p));
227228 delta = max (fabs (p_old - p), fabs (q_old - q));
228229
229- ++iter;
230-
231- } while (delta > tolerance && iter <= max_iterations);
230+ } while (delta > tolerance && iter++ < max_iterations);
232231
233232 if (DEBUG) {
234233 cerr << iter << ' \t '
@@ -242,39 +241,33 @@ expectation_maximization(const bool DEBUG,
242241// / only 2 input files ///
243242// ////////////////////////
244243static double
245- log_L (const size_t x, const size_t y,
246- const size_t z, const size_t w,
244+ log_L (const size_t x, const size_t y, const size_t z, const size_t w,
247245 const double p, const double q) {
248- assert (p+ q <= 1 );
249- double log_lkhd = lnchoose (x+ y, x) + lnchoose (z+ w, z);
250- if (p > 0 ) log_lkhd += x* log (p);
251- if (p < 1 ) log_lkhd += y* log (1 - p);
246+ assert (p + q <= 1 );
247+ double log_lkhd = lnchoose (x + y, x) + lnchoose (z + w, z);
248+ if (p > 0 ) log_lkhd += x * log (p);
249+ if (p < 1 ) log_lkhd += y * log (1 - p);
252250
253- if (q > 0 ) log_lkhd += z* log (q);
254- if (q < 1 ) log_lkhd += w* log (1 - q);
251+ if (q > 0 ) log_lkhd += z * log (q);
252+ if (q < 1 ) log_lkhd += w * log (1 - q);
255253
256- return (log_lkhd);
254+ return (log_lkhd);
257255}
258256
259257/* Get start point for p and q, if only 2 inputs are available */
260- static void
261- get_start_point (const size_t x, const size_t y,
262- const size_t z, const size_t w,
263- const double tolerance,
264- double &p, double &q) {
265-
266- p = tolerance + 1.0 *x/(x + y);
267- q = tolerance + 1.0 *z/(z + w);
258+ static inline void
259+ get_start_point (const size_t x, const size_t y, const size_t z, const size_t w,
260+ const double tolerance, double &p, double &q) {
261+ p = tolerance + 1.0 * x / (x + y);
262+ q = tolerance + 1.0 * z / (z + w);
268263 if (p + q >= 1.0 ) {
269- p = max (tolerance, (1.0 - tolerance)*p/(p+ q));
264+ p = max (tolerance, (1.0 - tolerance) * p / (p + q));
270265 q = max (tolerance, 1.0 - p - tolerance);
271266 }
272-
273267}
274268
275269static void
276- expectation (const size_t y,
277- const double p, const double q,
270+ expectation (const size_t y, const double p, const double q,
278271 vector<double > &coeff) {
279272 assert (p > 0.0 && q > 0.0 );
280273 assert (p + q < 1.0 );
@@ -283,30 +276,24 @@ expectation(const size_t y,
283276 const double log_q = log (q);
284277 const double log_1mp = log (1.0 - p);
285278 for (size_t j = 0 ; j <= y; ++j)
286- coeff.push_back (exp ( lnchoose (y, j) + log_q*j
287- + log_1mpq*(y- j) - log_1mp* y));
279+ coeff.push_back (
280+ exp ( lnchoose (y, j) + log_q * j + log_1mpq * (y - j) - log_1mp * y));
288281}
289282
290- static double
291- maximization (const size_t x, const size_t y,
292- const vector<double > &coeff) {
293- double num = x, denom = x+y;
294- for (size_t j = 0 ; j <= y; ++j) {
295- denom -= coeff[j]*j;
296- }
297- return num/denom;
283+ static inline double
284+ maximization (const size_t x, const size_t y, const vector<double > &coeff) {
285+ double num = x, denom = x + y;
286+ for (size_t j = 0 ; j <= y; ++j) { denom -= coeff[j] * j; }
287+ return num / denom;
298288}
299289
300- static double
301- update_q (const size_t x, const size_t y,
302- const size_t z, const size_t w,
290+ static inline double
291+ update_q (const size_t x, const size_t y, const size_t z, const size_t w,
303292 const vector<double > &coeff) {
304293 double num = z;
305- double denom = x+y+z+w;
306- for (size_t j =0 ; j <=y; ++j) {
307- num += coeff[j]*j;
308- }
309- return num/denom;
294+ const double denom = x + y + z + w;
295+ for (size_t j = 0 ; j <= y; ++j) num += coeff[j] * j;
296+ return num / denom;
310297}
311298
312299static void
@@ -340,12 +327,9 @@ expectation_maximization(const bool DEBUG,
340327 }
341328}
342329
343-
344- static bool
330+ static inline bool
345331check_file_non_empty (const string &filename) {
346- struct stat buf;
347- stat (filename.c_str (), &buf);
348- return buf.st_size != 0 ;
332+ return std::filesystem::file_size (filename) > 0 ;
349333}
350334
351335static void
@@ -561,7 +545,8 @@ process_two_types(const double alpha,
561545 size_t &total_sites,
562546 size_t &overshoot_sites,
563547 size_t &conflict_sites) {
564- constexpr auto max_read_count = 500 ;
548+
549+ constexpr auto max_read_count = 500u ;
565550
566551 ofstream of;
567552 if (!outfile.empty ()) {
@@ -587,23 +572,30 @@ process_two_types(const double alpha,
587572 if (oxbs_file.empty ()) {
588573 s_rev = true ;
589574 f_in.open (hydroxy_file);
575+ if (!f_in) throw dnmt_error (" failed to open file: " + hydroxy_file);
590576 s_in.open (bs_seq_file);
577+ if (!s_in) throw dnmt_error (" failed to open file: " + bs_seq_file);
591578 }
592579 else if (hydroxy_file.empty ()) {
593580 f_rev = true ;
594581 f_in.open (bs_seq_file);
582+ if (!f_in) throw dnmt_error (" failed to open file: " + bs_seq_file);
595583 s_in.open (oxbs_file);
584+ if (!s_in) throw dnmt_error (" failed to open file: " + oxbs_file);
596585 }
597586 else {
598587 f_in.open (oxbs_file);
588+ if (!f_in) throw dnmt_error (" failed to open file: " + oxbs_file);
599589 s_in.open (hydroxy_file);
590+ if (!s_in) throw dnmt_error (" failed to open file: " + hydroxy_file);
600591 }
601592
593+
602594 MSite f, s;
603595 while (f_in >> f && s_in >> s) {
604596
605597 if (f.chrom != s.chrom || f.pos != s.pos )
606- throw runtime_error (" error: sites not synchronized between files" );
598+ throw dnmt_error (" error: sites not synchronized between files" );
607599
608600 if (f.n_reads > max_read_count) f.n_reads = max_read_count;
609601 size_t x = f.n_meth ();
0 commit comments