Skip to content

Commit 58b53a0

Browse files
Merge pull request #122 from smithlabcode/sym-check-sorted
sym: checking that the input is sorted
2 parents 025939e + e33654f commit 58b53a0

File tree

1 file changed

+43
-19
lines changed

1 file changed

+43
-19
lines changed

src/utils/symmetric-cpgs.cpp

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
#include <iostream>
2323
#include <fstream>
2424
#include <stdexcept>
25-
25+
#include <unordered_set>
26+
#include <filesystem>
2627
#include <bamxx.hpp>
2728

2829
// from smithlab_cpp
@@ -36,56 +37,70 @@ using std::string;
3637
using std::cout;
3738
using std::cerr;
3839
using std::endl;
40+
using std::unordered_set;
3941

4042
using bamxx::bgzf_file;
4143

42-
inline bool
44+
static inline bool
4345
found_symmetric(const MSite &prev_cpg, const MSite &curr_cpg) {
4446
// assumes check for CpG already done
45-
return (prev_cpg.strand == '+' &&
46-
curr_cpg.strand == '-' &&
47+
return (prev_cpg.strand == '+' && curr_cpg.strand == '-' &&
4748
prev_cpg.pos + 1 == curr_cpg.pos);
4849
}
4950

50-
template <class T>
51-
static void
51+
template<class T> static bool
5252
process_sites(bgzf_file &in, T &out) {
53-
5453
MSite prev_site, curr_site;
5554
bool prev_is_cpg = false;
5655
if (read_site(in, prev_site))
57-
if (prev_site.is_cpg())
58-
prev_is_cpg = true;
56+
if (prev_site.is_cpg()) prev_is_cpg = true;
57+
58+
unordered_set<string> chroms_seen;
59+
bool sites_are_sorted = true;
5960

6061
while (read_site(in, curr_site)) {
6162
if (curr_site.is_cpg()) {
63+
if (curr_site.chrom != prev_site.chrom) {
64+
const auto chrom_itr = chroms_seen.find(curr_site.chrom);
65+
if (chrom_itr != cend(chroms_seen)) {
66+
sites_are_sorted = false;
67+
break;
68+
}
69+
else
70+
chroms_seen.insert(curr_site.chrom);
71+
}
72+
else if (curr_site.pos <= prev_site.pos) {
73+
sites_are_sorted = false;
74+
break;
75+
}
6276
if (prev_is_cpg && found_symmetric(prev_site, curr_site)) {
6377
prev_site.add(curr_site);
6478
write_site(out, prev_site);
6579
}
6680
prev_is_cpg = true;
6781
}
68-
else prev_is_cpg = false;
82+
else
83+
prev_is_cpg = false;
6984
std::swap(prev_site, curr_site);
7085
}
86+
87+
return sites_are_sorted;
7188
}
7289

7390
int
7491
main_symmetric_cpgs(int argc, const char **argv) {
75-
7692
try {
77-
7893
string outfile;
7994
// (not used) bool VERBOSE = false;
8095

8196
const string description =
8297
"Get CpG sites and make methylation levels symmetric.";
8398

8499
/****************** COMMAND LINE OPTIONS ********************/
85-
OptionParser opt_parse(strip_path(argv[0]),
86-
description, "<methcounts-file>");
87-
opt_parse.add_opt("output", 'o', "output file (default: stdout)",
88-
false, outfile);
100+
OptionParser opt_parse(strip_path(argv[0]), description,
101+
"<methcounts-file>");
102+
opt_parse.add_opt("output", 'o', "output file (default: stdout)", false,
103+
outfile);
89104
// opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
90105
std::vector<string> leftover_args;
91106
opt_parse.parse(argc, argv, leftover_args);
@@ -112,18 +127,27 @@ main_symmetric_cpgs(int argc, const char **argv) {
112127
bgzf_file in(filename, "r");
113128
if (!in) throw std::runtime_error("could not open file: " + filename);
114129

130+
bool sites_are_sorted = true;
115131
if (outfile.empty() || !has_gz_ext(outfile)) {
116132
std::ofstream of;
117133
if (!outfile.empty()) of.open(outfile.c_str());
118134
std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
119-
process_sites(in, out);
135+
sites_are_sorted = process_sites(in, out);
120136
}
121137
else {
122138
bgzf_file out(outfile, "w");
123-
process_sites(in, out);
139+
sites_are_sorted = process_sites(in, out);
140+
}
141+
142+
if (!sites_are_sorted) {
143+
cerr << "sites are not sorted in: " << filename << endl;
144+
namespace fs = std::filesystem;
145+
const fs::path outpath{outfile};
146+
if (fs::exists(outpath)) fs::remove(outpath);
147+
return EXIT_FAILURE;
124148
}
125149
}
126-
catch (const std::runtime_error &e) {
150+
catch (const std::runtime_error &e) {
127151
cerr << e.what() << endl;
128152
return EXIT_FAILURE;
129153
}

0 commit comments

Comments
 (0)