Skip to content

Commit 0e4b9e3

Browse files
Merge pull request #95 from smithlabcode/roi-verify-input
Roi verify input
2 parents d66bc56 + a77dfc9 commit 0e4b9e3

File tree

3 files changed

+64
-0
lines changed

3 files changed

+64
-0
lines changed

src/analysis/roimethstat.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <numeric>
2525
#include <utility>
2626
#include <stdexcept>
27+
#include <regex>
2728

2829
#include "OptionParser.hpp"
2930
#include "smithlab_utils.hpp"
@@ -47,6 +48,7 @@ using std::runtime_error;
4748
using std::ifstream;
4849
using std::isfinite;
4950
using std::is_sorted;
51+
using std::regex_match;
5052

5153
static pair<bool, bool>
5254
meth_unmeth_calls(const size_t n_meth, const size_t n_unmeth) {
@@ -474,6 +476,16 @@ Columns (beyond the first 6) in the BED format output:
474476
// bed format
475477
if (n_columns != 3 && n_columns < 6)
476478
throw runtime_error("format must be 3 or 6+ column bed: " + regions_file);
479+
if (is_msite_file(regions_file)) {
480+
cerr << opt_parse.help_message() << endl;
481+
throw runtime_error("The file seems to be a methylation file: " +
482+
regions_file + "\nCheck the order of the input arguments");
483+
}
484+
if (!is_msite_file(cpgs_file)) {
485+
cerr << opt_parse.help_message() << endl;
486+
throw runtime_error("The file is not a methylation file: " + cpgs_file);
487+
}
488+
477489

478490
vector<GenomicRegion> regions;
479491
ReadBEDFile(regions_file, regions);
@@ -519,3 +531,6 @@ Columns (beyond the first 6) in the BED format output:
519531
}
520532
return EXIT_SUCCESS;
521533
}
534+
535+
536+

src/common/MSite.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020
#include <fstream>
2121
#include <sstream>
2222
#include <stdexcept>
23+
#include <regex>
2324

2425
#include "smithlab_utils.hpp"
2526

2627
using std::string;
2728
using std::runtime_error;
29+
using std::regex_match;
2830

2931
MSite::MSite(const string &line) {
3032
/* GS: this is faster but seems to be genenerating issues when
@@ -152,3 +154,45 @@ find_offset_for_msite(const std::string &chr,
152154
move_to_start_of_line(site_in);
153155
}
154156
}
157+
158+
159+
160+
bool
161+
is_msite_file(const string &file) {
162+
ifstream in(file);
163+
if (!in)
164+
throw runtime_error("cannot open file: " + file);
165+
166+
string line;
167+
if(!getline(in, line)) return false;
168+
169+
std::istringstream iss(line);
170+
171+
string chrom;
172+
if (!(iss >> chrom)) return false;
173+
174+
long int pos = 0;
175+
if (!(iss >> pos)) return false;
176+
177+
string strand;
178+
if (!(iss >> strand) ||
179+
(strand.size() != 1) ||
180+
((strand != "+") && (strand != "-")) )
181+
return false;
182+
183+
string context;
184+
std::regex pattern("^C[pHWX][GH]$");
185+
if (!(iss >> context) || !regex_match(context, pattern)) return false;
186+
187+
double level = 0.0;
188+
if (!(iss >> level) || level < 0 || level > 1) return false;
189+
190+
long int n_reads = 0;
191+
if (!(iss >> n_reads)) return false;
192+
193+
string temp;
194+
if (iss >> temp) return false;
195+
else return true;
196+
197+
}
198+

src/common/MSite.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,4 +138,9 @@ find_offset_for_msite(const std::string &chrom,
138138
const size_t start_pos,
139139
std::ifstream &site_in);
140140

141+
142+
bool
143+
is_msite_file(const std::string &file);
144+
145+
141146
#endif

0 commit comments

Comments
 (0)