Skip to content

Commit 87373cd

Browse files
added a function to check if a file is a methylation file.
1 parent e6aad25 commit 87373cd

File tree

1 file changed

+65
-0
lines changed

1 file changed

+65
-0
lines changed

src/analysis/roimethstat.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <numeric>
2525
#include <utility>
2626
#include <stdexcept>
27+
#include <regex>
2728

2829
#include "OptionParser.hpp"
2930
#include "smithlab_utils.hpp"
@@ -47,6 +48,7 @@ using std::runtime_error;
4748
using std::ifstream;
4849
using std::isfinite;
4950
using std::is_sorted;
51+
using std::regex_match;
5052

5153
static pair<bool, bool>
5254
meth_unmeth_calls(const size_t n_meth, const size_t n_unmeth) {
@@ -360,6 +362,59 @@ process_with_cpgs_on_disk(const bool PRINT_NUMERIC_ONLY,
360362
///
361363
////////////////////////////////////////////////////////////////////////
362364

365+
static inline bool
366+
is_float(const string &str) {
367+
try {
368+
size_t pos;
369+
std::stof(str, &pos);
370+
return pos == str.size(); // Check if entire string was consumed
371+
} catch (const std::invalid_argument &) {
372+
return false; // Conversion failed due to invalid argument
373+
} catch (const std::out_of_range &) {
374+
return false; // Conversion failed due to out of range
375+
}
376+
}
377+
378+
static inline bool
379+
is_integer(const string &str) {
380+
try {
381+
size_t pos;
382+
std::stoi(str, &pos);
383+
return pos == str.size(); // Check if entire string was consumed
384+
} catch (const std::invalid_argument &) {
385+
return false; // Conversion failed due to invalid argument
386+
} catch (const std::out_of_range &) {
387+
return false; // Conversion failed due to out of range
388+
}
389+
}
390+
391+
392+
static bool
393+
is_methylation_file(const string &file) {
394+
ifstream in(file);
395+
if (!in)
396+
throw runtime_error("cannot open file: " + file);
397+
398+
string line;
399+
getline(in, line);
400+
401+
std::istringstream iss(line);
402+
string token;
403+
404+
vector<string> tokens;
405+
while(iss >> token) {
406+
tokens.push_back(token);
407+
}
408+
409+
std::regex pattern("^C[pHWX][GH]$");
410+
411+
return tokens.size() == 6 &&
412+
is_integer(tokens[1]) &&
413+
(tokens[2] == "+" || tokens[2] == "-") &&
414+
regex_match(tokens[3], pattern) &&
415+
is_float(tokens[4]) &&
416+
is_integer(tokens[5]);
417+
}
363418

364419
static size_t
365420
check_bed_format(const string &regions_file) {
@@ -474,6 +529,16 @@ Columns (beyond the first 6) in the BED format output:
474529
// bed format
475530
if (n_columns != 3 && n_columns < 6)
476531
throw runtime_error("format must be 3 or 6+ column bed: " + regions_file);
532+
if (is_methylation_file(regions_file)) {
533+
cerr << opt_parse.help_message() << endl;
534+
throw runtime_error("The file seems to be a methylation file: " +
535+
regions_file + "\nCheck the order of the input arguments");
536+
}
537+
if (!is_methylation_file(cpgs_file)) {
538+
cerr << opt_parse.help_message() << endl;
539+
throw runtime_error("The file is not a methylation file: " + cpgs_file);
540+
}
541+
477542

478543
vector<GenomicRegion> regions;
479544
ReadBEDFile(regions_file, regions);

0 commit comments

Comments
 (0)