2424#include < numeric>
2525#include < utility>
2626#include < stdexcept>
27+ #include < regex>
2728
2829#include " OptionParser.hpp"
2930#include " smithlab_utils.hpp"
@@ -47,6 +48,7 @@ using std::runtime_error;
4748using std::ifstream;
4849using std::isfinite;
4950using std::is_sorted;
51+ using std::regex_match;
5052
5153static pair<bool , bool >
5254meth_unmeth_calls (const size_t n_meth, const size_t n_unmeth) {
@@ -360,6 +362,59 @@ process_with_cpgs_on_disk(const bool PRINT_NUMERIC_ONLY,
360362// /
361363// //////////////////////////////////////////////////////////////////////
362364
365+ static inline bool
366+ is_float (const string &str) {
367+ try {
368+ size_t pos;
369+ std::stof (str, &pos);
370+ return pos == str.size (); // Check if entire string was consumed
371+ } catch (const std::invalid_argument &) {
372+ return false ; // Conversion failed due to invalid argument
373+ } catch (const std::out_of_range &) {
374+ return false ; // Conversion failed due to out of range
375+ }
376+ }
377+
378+ static inline bool
379+ is_integer (const string &str) {
380+ try {
381+ size_t pos;
382+ std::stoi (str, &pos);
383+ return pos == str.size (); // Check if entire string was consumed
384+ } catch (const std::invalid_argument &) {
385+ return false ; // Conversion failed due to invalid argument
386+ } catch (const std::out_of_range &) {
387+ return false ; // Conversion failed due to out of range
388+ }
389+ }
390+
391+
392+ static bool
393+ is_methylation_file (const string &file) {
394+ ifstream in (file);
395+ if (!in)
396+ throw runtime_error (" cannot open file: " + file);
397+
398+ string line;
399+ getline (in, line);
400+
401+ std::istringstream iss (line);
402+ string token;
403+
404+ vector<string> tokens;
405+ while (iss >> token) {
406+ tokens.push_back (token);
407+ }
408+
409+ std::regex pattern (" ^C[pHWX][GH]$" );
410+
411+ return tokens.size () == 6 &&
412+ is_integer (tokens[1 ]) &&
413+ (tokens[2 ] == " +" || tokens[2 ] == " -" ) &&
414+ regex_match (tokens[3 ], pattern) &&
415+ is_float (tokens[4 ]) &&
416+ is_integer (tokens[5 ]);
417+ }
363418
364419static size_t
365420check_bed_format (const string ®ions_file) {
@@ -474,6 +529,16 @@ Columns (beyond the first 6) in the BED format output:
474529 // bed format
475530 if (n_columns != 3 && n_columns < 6 )
476531 throw runtime_error (" format must be 3 or 6+ column bed: " + regions_file);
532+ if (is_methylation_file (regions_file)) {
533+ cerr << opt_parse.help_message () << endl;
534+ throw runtime_error (" The file seems to be a methylation file: " +
535+ regions_file + " \n Check the order of the input arguments" );
536+ }
537+ if (!is_methylation_file (cpgs_file)) {
538+ cerr << opt_parse.help_message () << endl;
539+ throw runtime_error (" The file is not a methylation file: " + cpgs_file);
540+ }
541+
477542
478543 vector<GenomicRegion> regions;
479544 ReadBEDFile (regions_file, regions);
0 commit comments