Skip to content

Commit a77dfc9

Browse files
updated file check function and move it to MSite.*pp
1 parent 87373cd commit a77dfc9

File tree

3 files changed

+54
-55
lines changed

3 files changed

+54
-55
lines changed

src/analysis/roimethstat.cpp

Lines changed: 5 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -362,59 +362,6 @@ process_with_cpgs_on_disk(const bool PRINT_NUMERIC_ONLY,
362362
///
363363
////////////////////////////////////////////////////////////////////////
364364

365-
static inline bool
366-
is_float(const string &str) {
367-
try {
368-
size_t pos;
369-
std::stof(str, &pos);
370-
return pos == str.size(); // Check if entire string was consumed
371-
} catch (const std::invalid_argument &) {
372-
return false; // Conversion failed due to invalid argument
373-
} catch (const std::out_of_range &) {
374-
return false; // Conversion failed due to out of range
375-
}
376-
}
377-
378-
static inline bool
379-
is_integer(const string &str) {
380-
try {
381-
size_t pos;
382-
std::stoi(str, &pos);
383-
return pos == str.size(); // Check if entire string was consumed
384-
} catch (const std::invalid_argument &) {
385-
return false; // Conversion failed due to invalid argument
386-
} catch (const std::out_of_range &) {
387-
return false; // Conversion failed due to out of range
388-
}
389-
}
390-
391-
392-
static bool
393-
is_methylation_file(const string &file) {
394-
ifstream in(file);
395-
if (!in)
396-
throw runtime_error("cannot open file: " + file);
397-
398-
string line;
399-
getline(in, line);
400-
401-
std::istringstream iss(line);
402-
string token;
403-
404-
vector<string> tokens;
405-
while(iss >> token) {
406-
tokens.push_back(token);
407-
}
408-
409-
std::regex pattern("^C[pHWX][GH]$");
410-
411-
return tokens.size() == 6 &&
412-
is_integer(tokens[1]) &&
413-
(tokens[2] == "+" || tokens[2] == "-") &&
414-
regex_match(tokens[3], pattern) &&
415-
is_float(tokens[4]) &&
416-
is_integer(tokens[5]);
417-
}
418365

419366
static size_t
420367
check_bed_format(const string &regions_file) {
@@ -529,12 +476,12 @@ Columns (beyond the first 6) in the BED format output:
529476
// bed format
530477
if (n_columns != 3 && n_columns < 6)
531478
throw runtime_error("format must be 3 or 6+ column bed: " + regions_file);
532-
if (is_methylation_file(regions_file)) {
479+
if (is_msite_file(regions_file)) {
533480
cerr << opt_parse.help_message() << endl;
534481
throw runtime_error("The file seems to be a methylation file: " +
535482
regions_file + "\nCheck the order of the input arguments");
536483
}
537-
if (!is_methylation_file(cpgs_file)) {
484+
if (!is_msite_file(cpgs_file)) {
538485
cerr << opt_parse.help_message() << endl;
539486
throw runtime_error("The file is not a methylation file: " + cpgs_file);
540487
}
@@ -584,3 +531,6 @@ Columns (beyond the first 6) in the BED format output:
584531
}
585532
return EXIT_SUCCESS;
586533
}
534+
535+
536+

src/common/MSite.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020
#include <fstream>
2121
#include <sstream>
2222
#include <stdexcept>
23+
#include <regex>
2324

2425
#include "smithlab_utils.hpp"
2526

2627
using std::string;
2728
using std::runtime_error;
29+
using std::regex_match;
2830

2931
MSite::MSite(const string &line) {
3032
/* GS: this is faster but seems to be genenerating issues when
@@ -152,3 +154,45 @@ find_offset_for_msite(const std::string &chr,
152154
move_to_start_of_line(site_in);
153155
}
154156
}
157+
158+
159+
160+
bool
161+
is_msite_file(const string &file) {
162+
ifstream in(file);
163+
if (!in)
164+
throw runtime_error("cannot open file: " + file);
165+
166+
string line;
167+
if(!getline(in, line)) return false;
168+
169+
std::istringstream iss(line);
170+
171+
string chrom;
172+
if (!(iss >> chrom)) return false;
173+
174+
long int pos = 0;
175+
if (!(iss >> pos)) return false;
176+
177+
string strand;
178+
if (!(iss >> strand) ||
179+
(strand.size() != 1) ||
180+
((strand != "+") && (strand != "-")) )
181+
return false;
182+
183+
string context;
184+
std::regex pattern("^C[pHWX][GH]$");
185+
if (!(iss >> context) || !regex_match(context, pattern)) return false;
186+
187+
double level = 0.0;
188+
if (!(iss >> level) || level < 0 || level > 1) return false;
189+
190+
long int n_reads = 0;
191+
if (!(iss >> n_reads)) return false;
192+
193+
string temp;
194+
if (iss >> temp) return false;
195+
else return true;
196+
197+
}
198+

src/common/MSite.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,4 +138,9 @@ find_offset_for_msite(const std::string &chrom,
138138
const size_t start_pos,
139139
std::ifstream &site_in);
140140

141+
142+
bool
143+
is_msite_file(const std::string &file);
144+
145+
141146
#endif

0 commit comments

Comments
 (0)