File tree Expand file tree Collapse file tree 3 files changed +64
-0
lines changed
Expand file tree Collapse file tree 3 files changed +64
-0
lines changed Original file line number Diff line number Diff line change 2424#include < numeric>
2525#include < utility>
2626#include < stdexcept>
27+ #include < regex>
2728
2829#include " OptionParser.hpp"
2930#include " smithlab_utils.hpp"
@@ -47,6 +48,7 @@ using std::runtime_error;
4748using std::ifstream;
4849using std::isfinite;
4950using std::is_sorted;
51+ using std::regex_match;
5052
5153static pair<bool , bool >
5254meth_unmeth_calls (const size_t n_meth, const size_t n_unmeth) {
@@ -474,6 +476,16 @@ Columns (beyond the first 6) in the BED format output:
474476 // bed format
475477 if (n_columns != 3 && n_columns < 6 )
476478 throw runtime_error (" format must be 3 or 6+ column bed: " + regions_file);
479+ if (is_msite_file (regions_file)) {
480+ cerr << opt_parse.help_message () << endl;
481+ throw runtime_error (" The file seems to be a methylation file: " +
482+ regions_file + " \n Check the order of the input arguments" );
483+ }
484+ if (!is_msite_file (cpgs_file)) {
485+ cerr << opt_parse.help_message () << endl;
486+ throw runtime_error (" The file is not a methylation file: " + cpgs_file);
487+ }
488+
477489
478490 vector<GenomicRegion> regions;
479491 ReadBEDFile (regions_file, regions);
@@ -519,3 +531,6 @@ Columns (beyond the first 6) in the BED format output:
519531 }
520532 return EXIT_SUCCESS;
521533}
534+
535+
536+
Original file line number Diff line number Diff line change 2020#include < fstream>
2121#include < sstream>
2222#include < stdexcept>
23+ #include < regex>
2324
2425#include " smithlab_utils.hpp"
2526
2627using std::string;
2728using std::runtime_error;
29+ using std::regex_match;
2830
2931MSite::MSite (const string &line) {
3032 /* GS: this is faster but seems to be genenerating issues when
@@ -152,3 +154,45 @@ find_offset_for_msite(const std::string &chr,
152154 move_to_start_of_line (site_in);
153155 }
154156}
157+
158+
159+
160+ bool
161+ is_msite_file (const string &file) {
162+ ifstream in (file);
163+ if (!in)
164+ throw runtime_error (" cannot open file: " + file);
165+
166+ string line;
167+ if (!getline (in, line)) return false ;
168+
169+ std::istringstream iss (line);
170+
171+ string chrom;
172+ if (!(iss >> chrom)) return false ;
173+
174+ long int pos = 0 ;
175+ if (!(iss >> pos)) return false ;
176+
177+ string strand;
178+ if (!(iss >> strand) ||
179+ (strand.size () != 1 ) ||
180+ ((strand != " +" ) && (strand != " -" )) )
181+ return false ;
182+
183+ string context;
184+ std::regex pattern (" ^C[pHWX][GH]$" );
185+ if (!(iss >> context) || !regex_match (context, pattern)) return false ;
186+
187+ double level = 0.0 ;
188+ if (!(iss >> level) || level < 0 || level > 1 ) return false ;
189+
190+ long int n_reads = 0 ;
191+ if (!(iss >> n_reads)) return false ;
192+
193+ string temp;
194+ if (iss >> temp) return false ;
195+ else return true ;
196+
197+ }
198+
Original file line number Diff line number Diff line change @@ -138,4 +138,9 @@ find_offset_for_msite(const std::string &chrom,
138138 const size_t start_pos,
139139 std::ifstream &site_in);
140140
141+
142+ bool
143+ is_msite_file (const std::string &file);
144+
145+
141146#endif
You can’t perform that action at this time.
0 commit comments