2525#include < algorithm>
2626#include < numeric>
2727#include < unordered_map>
28-
28+ # include < filesystem >
2929#include < bamxx.hpp>
3030
3131#include " OptionParser.hpp"
@@ -46,43 +46,37 @@ using std::unordered_map;
4646
4747using bamxx::bgzf_file;
4848
49+ namespace fs = std::filesystem;
50+
4951static void
5052collapsebed (vector<GenomicRegion> ®ions) {
5153 size_t j = 0 ;
5254 for (size_t i = 1 ; i < regions.size (); ++i) {
5355 if (regions[j].same_chrom (regions[i]) &&
5456 regions[i].get_start () <= regions[j].get_end ()) {
55- regions[j].set_end (std::max (regions[j].get_end (),
56- regions[i].get_end ()));
57- }
58- else {
59- regions[++j] = regions[i];
57+ regions[j].set_end (std::max (regions[j].get_end (), regions[i].get_end ()));
6058 }
59+ else { regions[++j] = regions[i]; }
6160 }
6261 regions.erase (begin (regions) + j + 1 , end (regions));
6362}
6463
65- static bool
64+ static inline bool
6665precedes (const GenomicRegion &r, const MSite &s) {
6766 return (r.get_chrom () < s.chrom ||
6867 (r.get_chrom () == s.chrom && r.get_end () <= s.pos ));
6968}
7069
71-
72- static bool
70+ static inline bool
7371contains (const GenomicRegion &r, const MSite &s) {
7472 return (r.get_chrom () == s.chrom &&
7573 (r.get_start () <= s.pos && s.pos < r.get_end ()));
7674}
7775
78-
79- template <class T >
80- static void
81- process_all_sites (const bool VERBOSE,
82- const string &sites_file,
76+ template <class T > static void
77+ process_all_sites (const bool VERBOSE, const string &sites_file,
8378 const unordered_map<string, vector<GenomicRegion>> ®ions,
8479 T &out) {
85-
8680 bgzf_file in (sites_file, " r" );
8781 if (!in) throw runtime_error (" cannot open file: " + sites_file);
8882
@@ -91,32 +85,28 @@ process_all_sites(const bool VERBOSE,
9185 bool chrom_is_relevant = false ;
9286 while (read_site (in, the_site)) {
9387 if (the_site.chrom != prev_site.chrom ) {
94- if (VERBOSE)
95- cerr << " processing " << the_site.chrom << endl;
96- auto r = regions.find (the_site.chrom );
97- chrom_is_relevant = (r != end (regions));
88+ if (VERBOSE) cerr << " processing " << the_site.chrom << endl;
89+ const auto r = regions.find (the_site.chrom );
90+ chrom_is_relevant = (r != cend (regions));
9891 if (chrom_is_relevant) {
99- i = begin (r->second );
100- i_lim = end (r->second );
92+ i = cbegin (r->second );
93+ i_lim = cend (r->second );
10194 }
10295 }
10396 if (chrom_is_relevant) {
10497 while (i != i_lim && precedes (*i, the_site))
10598 ++i;
106-
107- if (contains (*i, the_site))
99+ if (i != i_lim && contains (*i, the_site))
108100 write_site (out, the_site);
109101 }
110102 std::swap (prev_site, the_site);
111103 }
112104}
113105
114-
115106static void
116107get_sites_in_region (ifstream &site_in, const GenomicRegion ®ion,
117108 std::ostream &out) {
118-
119- string chrom (region.get_chrom ());
109+ const string chrom{region.get_chrom ()};
120110 const size_t start_pos = region.get_start ();
121111 const size_t end_pos = region.get_end ();
122112 find_offset_for_msite (chrom, start_pos, site_in);
@@ -127,11 +117,9 @@ get_sites_in_region(ifstream &site_in, const GenomicRegion ®ion,
127117 while (site_in >> the_site &&
128118 (the_site.chrom < chrom ||
129119 (the_site.chrom == chrom && the_site.pos < end_pos)))
130- if (start_pos <= the_site.pos )
131- out << the_site << endl;
120+ if (start_pos <= the_site.pos ) out << the_site << endl;
132121}
133122
134-
135123static void
136124process_with_sites_on_disk (const string &sites_file,
137125 vector<GenomicRegion> ®ions,
@@ -182,7 +170,7 @@ main_selectsites(int argc, const char **argv) {
182170 try {
183171
184172 bool VERBOSE = false ;
185- bool LOAD_ENTIRE_FILE = false ;
173+ bool load_entire_file = false ;
186174
187175 string outfile;
188176
@@ -198,7 +186,7 @@ main_selectsites(int argc, const char **argv) {
198186 false , outfile);
199187 opt_parse.add_opt (" preload" , ' p' ,
200188 " preload sites (use for large target intervals)" ,
201- false , LOAD_ENTIRE_FILE );
189+ false , load_entire_file );
202190 opt_parse.add_opt (" verbose" , ' v' , " print more run info" , false , VERBOSE);
203191 opt_parse.set_show_defaults ();
204192 vector<string> leftover_args;
@@ -224,11 +212,11 @@ main_selectsites(int argc, const char **argv) {
224212 const string sites_file = leftover_args.back ();
225213 /* ***************** END COMMAND LINE OPTIONS *****************/
226214
227- if (isdir (sites_file. c_str ()) || ! file_exists (sites_file))
215+ if (! fs::is_regular_file (sites_file))
228216 throw runtime_error (" bad input sites file: " + sites_file);
229217
230218 if (is_compressed_file (sites_file)) {
231- LOAD_ENTIRE_FILE = true ;
219+ load_entire_file = true ;
232220 if (VERBOSE)
233221 cerr << " input file is so must be loaded" << endl;
234222 }
@@ -245,7 +233,7 @@ main_selectsites(int argc, const char **argv) {
245233 << n_orig_regions - regions.size () << " ]" << endl;
246234
247235 unordered_map<string, vector<GenomicRegion>> regions_lookup;
248- if ((outfile.empty () || !has_gz_ext (outfile)) && LOAD_ENTIRE_FILE )
236+ if ((outfile.empty () || !has_gz_ext (outfile)) && load_entire_file )
249237 regions_by_chrom (regions, regions_lookup);
250238
251239 if (outfile.empty () || !has_gz_ext (outfile)) {
@@ -255,7 +243,7 @@ main_selectsites(int argc, const char **argv) {
255243 if (!outfile.empty () && !out)
256244 throw runtime_error (" failed to open output file: " + outfile);
257245
258- if (LOAD_ENTIRE_FILE )
246+ if (load_entire_file )
259247 process_all_sites (VERBOSE, sites_file, regions_lookup, out);
260248 else
261249 process_with_sites_on_disk (sites_file, regions, out);
0 commit comments