Skip to content

Commit c00622c

Browse files
multiple sources: replacing GenomicRegion and SimpleGenomicRegion with Interval6 and Interval
1 parent 8480e46 commit c00622c

20 files changed

+2143
-2281
lines changed

src/amrfinder/allelicmeth.cpp

Lines changed: 86 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
/* allelicmeth:
2-
*
3-
* Copyright (C) 2014-2022 University of Southern California,
1+
/* Copyright (C) 2014-2022 University of Southern California,
42
* Andrew D. Smith, and Benjamin E. Decato
53
*
64
* Authors: Andrew D. Smith and Benjamin E. Decato
@@ -19,7 +17,6 @@
1917
#include "Epiread.hpp"
2018
#include "MSite.hpp"
2119

22-
#include "GenomicRegion.hpp"
2320
#include "OptionParser.hpp"
2421
#include "smithlab_os.hpp"
2522
#include "smithlab_utils.hpp"
@@ -42,27 +39,16 @@
4239
#include <utility>
4340
#include <vector>
4441

45-
using std::cerr;
46-
using std::cout;
47-
using std::max;
48-
using std::min;
49-
using std::runtime_error;
50-
using std::string;
51-
using std::unordered_map;
52-
using std::unordered_set;
53-
using std::vector;
54-
55-
// NOLINTBEGIN(*-avoid-magic-numbers,*-narrowing-conversions,*-prefer-member-initializer)
42+
// NOLINTBEGIN(*-avoid-magic-numbers,*-narrowing-conversions)
5643

5744
static inline double
5845
log_sum_log(const double p, const double q) {
59-
if (p == 0) {
46+
if (p == 0)
6047
return q;
61-
}
62-
else if (q == 0) {
48+
if (q == 0)
6349
return p;
64-
}
65-
return p > q ? p + log1p(exp(q - p)) : q + log1p(exp(p - q));
50+
return p > q ? p + std::log1p(std::exp(q - p))
51+
: q + std::log1p(std::exp(p - q));
6652
}
6753

6854
static inline double
@@ -77,28 +63,29 @@ lnchoose(const unsigned int n, unsigned int m) {
7763

7864
// p(k) = C(n1, k) C(n2, t - k) / C(n1 + n2, t)
7965
static double
80-
log_hyper_g(const size_t k, const size_t n1, const size_t n2, const size_t t) {
66+
log_hyper_g(const std::size_t k, const std::size_t n1, const std::size_t n2,
67+
const std::size_t t) {
8168
return lnchoose(n1, k) + lnchoose(n2, t - k) - lnchoose(n1 + n2, t);
8269
}
8370

8471
static double
85-
fishers_exact(size_t a, size_t b, size_t c, size_t d) {
86-
const size_t m = a + c; // sum of first column
87-
const size_t n = b + d; // sum of second column
88-
const size_t k = a + b; // sum of first row
72+
fishers_exact(std::size_t a, std::size_t b, std::size_t c, std::size_t d) {
73+
const std::size_t m = a + c; // sum of first column
74+
const std::size_t n = b + d; // sum of second column
75+
const std::size_t k = a + b; // sum of first row
8976
// ADS: want more extreme than "observed"
9077
const double observed = log_hyper_g(a, m, n, k);
9178
double p = 0.0;
92-
for (size_t i = (n > k ? 0ul : k - n); i <= std::min(k, m); ++i) {
79+
for (std::size_t i = (n > k ? 0ul : k - n); i <= std::min(k, m); ++i) {
9380
const double curr = log_hyper_g(i, m, n, k);
9481
if (curr <= observed)
9582
p = log_sum_log(p, curr);
9683
}
97-
return exp(p);
84+
return std::exp(p);
9885
}
9986

100-
static size_t
101-
state_pair_to_index(const string &s, const size_t idx) {
87+
static std::size_t
88+
state_pair_to_index(const std::string &s, const std::size_t idx) {
10289
assert(idx < s.length() - 1);
10390
const char a = s[idx];
10491
if (a == 'C') {
@@ -136,13 +123,13 @@ template <class T> struct PairStateCounter {
136123
return CC + CT + TC + TT;
137124
}
138125

139-
string
126+
std::string
140127
tostring() const {
141128
return toa(CC) + '\t' + toa(CT) + '\t' + toa(TC) + '\t' + toa(TT);
142129
}
143130

144131
void
145-
increment(const size_t state) {
132+
increment(const std::size_t state) {
146133
if (state == 0)
147134
++CC;
148135
else if (state == 1)
@@ -156,19 +143,20 @@ template <class T> struct PairStateCounter {
156143

157144
template <typename T>
158145
void
159-
fit_states(const epiread &er, vector<PairStateCounter<T>> &counts) {
160-
for (size_t i = 0; i < er.length() - 1; ++i) {
161-
const size_t pos = er.pos + i;
146+
fit_states(const epiread &er, std::vector<PairStateCounter<T>> &counts) {
147+
for (std::size_t i = 0; i < er.length() - 1; ++i) {
148+
const std::size_t pos = er.pos + i;
162149
assert(pos < counts.size());
163-
const size_t curr_state = state_pair_to_index(er.seq, i);
150+
const std::size_t curr_state = state_pair_to_index(er.seq, i);
164151
counts[pos].increment(curr_state);
165152
}
166153
}
167154

168155
static void
169-
collect_cpgs(const string &s, unordered_map<size_t, size_t> &cpgs) {
170-
size_t cpg_idx = 0;
171-
size_t nuc_idx = 0;
156+
collect_cpgs(const std::string &s,
157+
std::unordered_map<std::size_t, std::size_t> &cpgs) {
158+
std::size_t cpg_idx = 0;
159+
std::size_t nuc_idx = 0;
172160
const auto lim = end(s) - (s.size() == 0 ? 0 : 1);
173161
for (auto itr = begin(s); itr != lim; ++itr, ++nuc_idx)
174162
if (*itr == 'C' && *(itr + 1) == 'G')
@@ -179,53 +167,58 @@ collect_cpgs(const string &s, unordered_map<size_t, size_t> &cpgs) {
179167
consecutive CpG sites. So there would be one fewer of them than the
180168
total CpGs in a chromosome. */
181169
static void
182-
convert_coordinates(const string &chrom, vector<MSite> &sites) {
183-
unordered_map<size_t, size_t> cpgs;
170+
convert_coordinates(const std::string &chrom, std::vector<MSite> &sites) {
171+
std::unordered_map<std::size_t, std::size_t> cpgs;
184172
collect_cpgs(chrom, cpgs);
185-
for (size_t i = 0; i < sites.size(); ++i) {
173+
for (std::size_t i = 0; i < sites.size(); ++i) {
186174
const auto pos_itr = cpgs.find(sites[i].pos);
187175
if (pos_itr == end(cpgs))
188-
throw runtime_error("failed converting site:\n" + sites[i].tostring());
176+
throw std::runtime_error("failed converting site:\n" +
177+
sites[i].tostring());
189178
sites[i].pos = pos_itr->second;
190179
}
191180
}
192181

193182
template <typename T>
194183
void
195-
add_cytosine(const string &chrom_name, const size_t start_cpg,
196-
vector<PairStateCounter<T>> &counts, vector<MSite> &cytosines) {
184+
add_cytosine(const std::string &chrom_name, const std::size_t start_cpg,
185+
std::vector<PairStateCounter<T>> &counts,
186+
std::vector<MSite> &cytosines) {
197187
std::ostringstream s;
198188
s << counts[start_cpg].score() << "\t" << counts[start_cpg].total() << "\t"
199189
<< counts[start_cpg].tostring();
200-
const string name(s.str());
190+
const std::string name(s.str());
201191
cytosines.push_back(MSite(chrom_name, start_cpg, '+', name, 0, 0));
202192
}
203193

204194
template <typename T>
205195
void
206-
process_chrom(const string &chrom_name, const vector<epiread> &epireads,
207-
vector<MSite> &cytosines, vector<PairStateCounter<T>> &counts) {
208-
for (size_t i = 0; i < epireads.size(); ++i)
196+
process_chrom(const std::string &chrom_name,
197+
const std::vector<epiread> &epireads,
198+
std::vector<MSite> &cytosines,
199+
std::vector<PairStateCounter<T>> &counts) {
200+
for (std::size_t i = 0; i < epireads.size(); ++i)
209201
fit_states(epireads[i], counts);
210-
for (size_t i = 0; i < counts.size(); ++i)
202+
for (std::size_t i = 0; i < counts.size(); ++i)
211203
add_cytosine(chrom_name, i, counts, cytosines);
212204
}
213205

214206
static void
215-
update_chroms_seen(const string &chrom_name,
216-
unordered_set<string> &chroms_seen) {
207+
update_chroms_seen(const std::string &chrom_name,
208+
std::unordered_set<std::string> &chroms_seen) {
217209
const auto chr_itr = chroms_seen.find(chrom_name);
218210
if (chr_itr != end(chroms_seen))
219-
throw runtime_error("chroms out of order: " + chrom_name);
211+
throw std::runtime_error("chroms out of order: " + chrom_name);
220212
chroms_seen.insert(chrom_name);
221213
}
222214

223215
static void
224-
verify_chroms_available(const string &chrom_name,
225-
unordered_map<string, size_t> &chrom_lookup) {
216+
verify_chroms_available(
217+
const std::string &chrom_name,
218+
std::unordered_map<std::string, std::size_t> &chrom_lookup) {
226219
const auto chr_itr = chrom_lookup.find(chrom_name);
227220
if (chr_itr == end(chrom_lookup))
228-
throw runtime_error("chrom not found: " + chrom_name);
221+
throw std::runtime_error("chrom not found: " + chrom_name);
229222
}
230223

231224
int
@@ -236,8 +229,8 @@ computes probability of allele-specific methylation at each tuple of CpGs
236229
)";
237230
bool VERBOSE = false;
238231

239-
string outfile;
240-
string chroms_dir;
232+
std::string outfile;
233+
std::string chroms_dir;
241234
/****************** COMMAND LINE OPTIONS ********************/
242235
OptionParser opt_parse(argv[0], // NOLINT(*-pointer-arithmetic)
243236
description, "<epireads>");
@@ -246,79 +239,79 @@ computes probability of allele-specific methylation at each tuple of CpGs
246239
opt_parse.add_opt("chrom", 'c', "genome sequence file/directory", true,
247240
chroms_dir);
248241
opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
249-
vector<string> leftover_args;
242+
std::vector<std::string> leftover_args;
250243
opt_parse.parse(argc, argv, leftover_args);
251244
if (argc == 1 || opt_parse.help_requested()) {
252-
cerr << opt_parse.help_message() << '\n'
253-
<< opt_parse.about_message() << '\n';
245+
std::cerr << opt_parse.help_message() << '\n'
246+
<< opt_parse.about_message() << '\n';
254247
return EXIT_SUCCESS;
255248
}
256249
if (opt_parse.about_requested()) {
257-
cerr << opt_parse.about_message() << '\n';
250+
std::cerr << opt_parse.about_message() << '\n';
258251
return EXIT_SUCCESS;
259252
}
260253
if (opt_parse.option_missing()) {
261-
cerr << opt_parse.option_missing_message() << '\n';
254+
std::cerr << opt_parse.option_missing_message() << '\n';
262255
return EXIT_SUCCESS;
263256
}
264257
if (leftover_args.size() != 1) {
265-
cerr << opt_parse.help_message() << '\n';
258+
std::cerr << opt_parse.help_message() << '\n';
266259
return EXIT_SUCCESS;
267260
}
268-
const string epi_file(leftover_args.front());
261+
const std::string epi_file(leftover_args.front());
269262
/****************** END COMMAND LINE OPTIONS *****************/
270263

271-
vector<string> chrom_names;
272-
vector<string> chroms;
264+
std::vector<std::string> chrom_names;
265+
std::vector<std::string> chroms;
273266
read_fasta_file_short_names(chroms_dir, chrom_names, chroms);
274267
for (auto &&i : chroms)
275268
transform(begin(i), end(i), begin(i),
276269
[](const char c) { return std::toupper(c); });
277270

278271
// lookup to map chrom names to chrom sequences
279-
unordered_map<string, size_t> chrom_lookup;
280-
for (size_t i = 0; i < chrom_names.size(); ++i)
272+
std::unordered_map<std::string, std::size_t> chrom_lookup;
273+
for (std::size_t i = 0; i < chrom_names.size(); ++i)
281274
chrom_lookup.insert(make_pair(chrom_names[i], i));
282275

283-
unordered_map<string, size_t> chrom_sizes;
284-
for (size_t i = 0; i < chrom_names.size(); ++i) {
285-
size_t cpg_count = 0;
286-
for (size_t j = 0; j < chroms[i].size() - 1; ++j)
276+
std::unordered_map<std::string, std::size_t> chrom_sizes;
277+
for (std::size_t i = 0; i < chrom_names.size(); ++i) {
278+
std::size_t cpg_count = 0;
279+
for (std::size_t j = 0; j < chroms[i].size() - 1; ++j)
287280
cpg_count += (chroms[i][j] == 'C' && chroms[i][j + 1] == 'G');
288281
chrom_sizes.insert(make_pair(chrom_names[i], cpg_count));
289282
}
290283

291284
if (VERBOSE)
292-
cerr << "number of chromosomes: " << chrom_sizes.size() << '\n';
285+
std::cerr << "number of chromosomes: " << chrom_sizes.size() << '\n';
293286

294287
std::ifstream in(epi_file);
295288
if (!in)
296-
throw runtime_error("cannot open input file: " + epi_file);
289+
throw std::runtime_error("cannot open input file: " + epi_file);
297290

298-
std::ofstream of;
299-
if (!outfile.empty())
300-
of.open(outfile);
301-
std::ostream out(outfile.empty() ? cout.rdbuf() : of.rdbuf());
291+
std::ofstream out(outfile);
292+
if (!out)
293+
throw std::runtime_error("failed to open output file: " + outfile);
302294

303-
unordered_set<string> chroms_seen;
304-
string chrom;
295+
std::unordered_set<std::string> chroms_seen;
296+
std::string chrom;
305297
epiread er;
306-
vector<epiread> epireads;
298+
std::vector<epiread> epireads;
307299
while (in >> er) {
308300
if (er.chr != chrom) {
309301
update_chroms_seen(er.chr, chroms_seen);
310302
verify_chroms_available(er.chr, chrom_lookup);
311303

312304
if (VERBOSE)
313-
cerr << "[processing " << er.chr << "]" << '\n';
305+
std::cerr << "[processing " << er.chr << "]" << '\n';
314306

315307
if (!chrom.empty()) {
316-
vector<PairStateCounter<uint32_t>> counts(chrom_sizes[chrom] - 1);
317-
vector<MSite> cytosines;
308+
std::vector<PairStateCounter<std::uint32_t>> counts(
309+
chrom_sizes[chrom] - 1);
310+
std::vector<MSite> cytosines;
318311
process_chrom(chrom, epireads, cytosines, counts);
319-
const size_t chrom_idx = chrom_lookup[chrom];
312+
const std::size_t chrom_idx = chrom_lookup[chrom];
320313
convert_coordinates(chroms[chrom_idx], cytosines);
321-
for (size_t i = 0; i < cytosines.size() - 1; ++i) {
314+
for (std::size_t i = 0; i < cytosines.size() - 1; ++i) {
322315
out << cytosines[i].chrom << "\t" << cytosines[i].pos
323316
<< "\t+\tCpG\t" << cytosines[i].context << '\n';
324317
}
@@ -329,22 +322,23 @@ computes probability of allele-specific methylation at each tuple of CpGs
329322
chrom.swap(er.chr);
330323
}
331324
if (!chrom.empty()) {
332-
vector<PairStateCounter<uint32_t>> counts(chrom_sizes[chrom] - 1);
333-
vector<MSite> cytosines;
325+
std::vector<PairStateCounter<std::uint32_t>> counts(chrom_sizes[chrom] -
326+
1);
327+
std::vector<MSite> cytosines;
334328
process_chrom(chrom, epireads, cytosines, counts);
335-
const size_t chrom_idx = chrom_lookup[chrom];
329+
const std::size_t chrom_idx = chrom_lookup[chrom];
336330
convert_coordinates(chroms[chrom_idx], cytosines);
337-
for (size_t i = 0; i < cytosines.size() - 1; ++i) {
331+
for (std::size_t i = 0; i < cytosines.size() - 1; ++i) {
338332
out << cytosines[i].chrom << "\t" << cytosines[i].pos << "\t+\tCpG\t"
339333
<< cytosines[i].context << '\n';
340334
}
341335
}
342336
}
343337
catch (const std::exception &e) {
344-
cerr << e.what() << '\n';
338+
std::cerr << e.what() << '\n';
345339
return EXIT_FAILURE;
346340
}
347341
return EXIT_SUCCESS;
348342
}
349343

350-
// NOLINTEND(*-avoid-magic-numbers,*-narrowing-conversions,*-prefer-member-initializer)
344+
// NOLINTEND(*-avoid-magic-numbers,*-narrowing-conversions)

0 commit comments

Comments
 (0)