Skip to content

Commit ebb2c58

Browse files
src/analysis/nanopore.cpp: modernizing
1 parent a85574f commit ebb2c58

File tree

1 file changed

+60
-63
lines changed

1 file changed

+60
-63
lines changed

src/analysis/nanopore.cpp

Lines changed: 60 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ for details.
5555

5656
// NOLINTBEGIN(*-narrowing-conversions)
5757

58-
// clang-format off
5958
static constexpr std::array<std::uint8_t, 96> encoding = {
6059
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 16
6160
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 32
@@ -64,22 +63,21 @@ static constexpr std::array<std::uint8_t, 96> encoding = {
6463
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, // 80
6564
4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 96
6665
};
67-
// clang-format on
6866

6967
static constexpr auto n_nucs = 4u;
7068

71-
[[nodiscard]] inline bool
72-
is_cytosine(const char c) {
69+
[[nodiscard]] inline auto
70+
is_cytosine(const char c) -> bool {
7371
return c == 'c' || c == 'C';
7472
}
7573

76-
[[nodiscard]] inline bool
77-
is_guanine(const char c) {
74+
[[nodiscard]] inline auto
75+
is_guanine(const char c) -> bool {
7876
return c == 'g' || c == 'G';
7977
}
8078

81-
[[nodiscard]] inline bool
82-
is_cpg(const std::string &s, const std::size_t i) {
79+
[[nodiscard]] inline auto
80+
is_cpg(const std::string &s, const std::size_t i) -> bool {
8381
return i + 1 < std::size(s) && is_cytosine(s[i]) && is_guanine(s[i + 1]);
8482
}
8583

@@ -108,8 +106,8 @@ read_fasta_file(const std::string &filename, std::vector<std::string> &names,
108106
}
109107
}
110108

111-
[[nodiscard]] static std::string
112-
get_basecall_model(const bamxx::bam_header &hdr) {
109+
[[nodiscard]] static auto
110+
get_basecall_model(const bamxx::bam_header &hdr) -> std::string {
113111
kstring_t ks{};
114112

115113
ks = {0, 0, nullptr};
@@ -144,15 +142,15 @@ get_basecall_model(const bamxx::bam_header &hdr) {
144142

145143
// ADS: here the std::uint16_t allows for up to 256 reads, each contributing
146144
// up to 256 "counts" in the probability encoding.
147-
typedef std::uint16_t count_type;
145+
using count_type = std::uint16_t;
148146

149-
[[nodiscard]] static inline bool
150-
eats_ref(const std::uint32_t c) {
147+
[[nodiscard]] static inline auto
148+
eats_ref(const std::uint32_t c) -> bool {
151149
return bam_cigar_type(bam_cigar_op(c)) & 2;
152150
}
153151

154-
[[nodiscard]] static inline bool
155-
eats_query(const std::uint32_t c) {
152+
[[nodiscard]] static inline auto
153+
eats_query(const std::uint32_t c) -> bool {
156154
return bam_cigar_type(bam_cigar_op(c)) & 1;
157155
}
158156

@@ -161,20 +159,20 @@ eats_query(const std::uint32_t c) {
161159
considers strand, and the CHH is not symmetric, then one needs this. Also,
162160
Qiang should be consulted on this because he spent much time thinking about
163161
it in the context of plants. */
164-
[[nodiscard]] static inline bool
165-
is_chh(const std::string &s, const std::size_t i) {
162+
[[nodiscard]] static inline auto
163+
is_chh(const std::string &s, const std::size_t i) -> bool {
166164
return i + 2 < std::size(s) && is_cytosine(s[i]) && !is_guanine(s[i + 1]) &&
167165
!is_guanine(s[i + 2]);
168166
}
169167

170-
[[nodiscard]] static inline bool
171-
is_ddg(const std::string &s, const std::size_t i) {
168+
[[nodiscard]] static inline auto
169+
is_ddg(const std::string &s, const std::size_t i) -> bool {
172170
return i + 2 < std::size(s) && !is_cytosine(s[i]) && !is_cytosine(s[i + 1]) &&
173171
is_guanine(s[i + 2]);
174172
}
175173

176-
[[nodiscard]] static inline bool
177-
is_c_at_g(const std::string &s, const std::size_t i) {
174+
[[nodiscard]] static inline auto
175+
is_c_at_g(const std::string &s, const std::size_t i) -> bool {
178176
return i + 2 < std::size(s) && is_cytosine(s[i]) && !is_cytosine(s[i + 1]) &&
179177
!is_guanine(s[i + 1]) && is_guanine(s[i + 2]);
180178
}
@@ -198,20 +196,20 @@ struct CountSet {
198196
methyl_rev += static_cast<std::uint8_t>(m);
199197
++n_reads_rev;
200198
}
201-
[[nodiscard]] double
202-
get_hydroxy(const bool is_c) const {
199+
[[nodiscard]] auto
200+
get_hydroxy(const bool is_c) const -> double {
203201
return (is_c ? hydroxy_fwd : hydroxy_rev) / max_prob_repr;
204202
}
205-
[[nodiscard]] double
206-
get_methyl(const bool is_c) const {
203+
[[nodiscard]] auto
204+
get_methyl(const bool is_c) const -> double {
207205
return (is_c ? methyl_fwd : methyl_rev) / max_prob_repr;
208206
}
209-
[[nodiscard]] double
210-
get_mods(const bool is_c) const {
207+
[[nodiscard]] auto
208+
get_mods(const bool is_c) const -> double {
211209
return get_hydroxy(is_c) + get_methyl(is_c);
212210
}
213-
[[nodiscard]] double
214-
get_n_reads(const bool is_c) const {
211+
[[nodiscard]] auto
212+
get_n_reads(const bool is_c) const -> double {
215213
return is_c ? n_reads_fwd : n_reads_rev;
216214
}
217215
count_type hydroxy_fwd{0};
@@ -222,14 +220,14 @@ struct CountSet {
222220
count_type n_reads_rev{0};
223221
};
224222

225-
/* The "tag" returned by this function should be exclusive, so that
226-
* the order of checking conditions doesn't matter. There is also a
227-
* bit of a hack in that the unsigned "pos" could wrap, but this still
228-
* works as long as the chromosome size is not the maximum size of a
229-
* std::size_t.
223+
/* The "tag" returned by this function should be exclusive, so that the order
224+
* of checking conditions doesn't matter. There is also a bit of a hack in
225+
* that the unsigned "pos" could wrap, but this still works as long as the
226+
* chromosome size is not the maximum size of a std::size_t.
230227
*/
231-
[[nodiscard]] static std::uint32_t
232-
get_tag_from_genome(const std::string &s, const std::size_t pos) {
228+
[[nodiscard]] static auto
229+
get_tag_from_genome(const std::string &s,
230+
const std::size_t pos) -> std::uint32_t {
233231
if (is_cytosine(s[pos])) {
234232
if (is_cpg(s, pos))
235233
return 0;
@@ -281,8 +279,8 @@ struct mod_prob_buffer {
281279
hydroxy_probs.reserve(init_capacity);
282280
}
283281

284-
[[nodiscard]] bool
285-
set_probs(const bamxx::bam_rec &aln) {
282+
[[nodiscard]] auto
283+
set_probs(const bamxx::bam_rec &aln) -> bool {
286284
static constexpr auto h_idx = 0;
287285
static constexpr auto m_idx = 1;
288286
const auto qlen = get_l_qseq(aln);
@@ -323,8 +321,8 @@ struct mod_prob_buffer {
323321
static void
324322
count_states_fwd(const bamxx::bam_rec &aln, std::vector<CountSet> &counts,
325323
mod_prob_buffer &mod_buf, const std::string &chrom) {
326-
/* Move through cigar, reference and read positions without
327-
inflating cigar or read sequence */
324+
// Move through cigar, reference and read positions without inflating cigar
325+
// or read sequence
328326
const auto beg_cig = bam_get_cigar(aln);
329327
const auto end_cig =
330328
beg_cig + get_n_cigar(aln); // NOLINT(*-pointer-arithmetic)
@@ -363,10 +361,10 @@ count_states_fwd(const bamxx::bam_rec &aln, std::vector<CountSet> &counts,
363361
ref_itr += n;
364362
}
365363
}
366-
// ADS: somehow previous code included a correction for rpos going
367-
// past the end of the chromosome; this should result at least in a
368-
// soft-clip by any mapper. I'm not checking it here as even if it
369-
// happens I don't want to terminate.
364+
// ADS: somehow previous code included a correction for rpos going past the
365+
// end of the chromosome; this should result at least in a soft-clip by any
366+
// mapper. I'm not checking it here as even if it happens I don't want to
367+
// terminate.
370368
assert(qpos == get_l_qseq(aln));
371369
}
372370

@@ -421,16 +419,15 @@ count_states_rev(const bamxx::bam_rec &aln, std::vector<CountSet> &counts,
421419
assert(qpos == 0);
422420
}
423421

424-
[[nodiscard]] static std::tuple<std::map<std::int32_t, std::size_t>,
425-
std::set<std::int32_t>>
426-
get_tid_to_idx(
427-
const bamxx::bam_header &hdr,
428-
const std::unordered_map<std::string, std::size_t> &name_to_idx) {
422+
[[nodiscard]] static auto
423+
get_tid_to_idx(const bamxx::bam_header &hdr,
424+
const std::unordered_map<std::string, std::size_t> &name_to_idx)
425+
-> std::tuple<std::map<std::int32_t, std::size_t>, std::set<std::int32_t>> {
429426
std::set<std::int32_t> missing_tids;
430427
std::map<std::int32_t, std::size_t> tid_to_idx;
431428
for (std::int32_t i = 0; i < get_n_targets(hdr); ++i) {
432-
// "curr_name" gives a "tid_to_name" mapping allowing to jump
433-
// through "name_to_idx" and get "tid_to_idx"
429+
// "curr_name" gives a "tid_to_name" mapping allowing to jump through
430+
// "name_to_idx" and get "tid_to_idx"
434431
// NOLINTNEXTLINE(*-pointer-arithmetic)
435432
const std::string curr_name(hdr.h->target_name[i]);
436433
const auto name_itr(name_to_idx.find(curr_name));
@@ -443,11 +440,11 @@ get_tid_to_idx(
443440
std::set<std::int32_t>>{tid_to_idx, missing_tids};
444441
}
445442

446-
[[nodiscard]] static bool
443+
[[nodiscard]] static auto
447444
consistent_targets(const bamxx::bam_header &hdr,
448445
const std::map<std::int32_t, std::size_t> &tid_to_idx,
449446
const std::vector<std::string> &names,
450-
const std::vector<std::size_t> &sizes) {
447+
const std::vector<std::size_t> &sizes) -> bool {
451448
const std::size_t n_targets = get_n_targets(hdr);
452449
if (n_targets != std::size(names))
453450
return false;
@@ -465,12 +462,12 @@ consistent_targets(const bamxx::bam_header &hdr,
465462
return true;
466463
}
467464

468-
[[nodiscard]] static bool
465+
[[nodiscard]] static auto
469466
consistent_existing_targets(
470467
const bamxx::bam_header &hdr,
471468
const std::map<std::int32_t, std::size_t> &tid_to_idx,
472469
const std::vector<std::string> &names,
473-
const std::vector<std::size_t> &sizes) {
470+
const std::vector<std::size_t> &sizes) -> bool {
474471
const std::size_t n_targets = get_n_targets(hdr);
475472
for (std::size_t tid = 0; tid < n_targets; ++tid) {
476473
const auto idx_itr = tid_to_idx.find(tid);
@@ -561,8 +558,8 @@ struct read_processor {
561558
int strand{};
562559
std::string expected_basecall_model{};
563560

564-
[[nodiscard]] std::string
565-
tostring() const {
561+
[[nodiscard]] auto
562+
tostring() const -> std::string {
566563
const auto strand_str = strand == 0 ? "both" : strand == 1 ? "fwd" : "rev";
567564
std::ostringstream oss;
568565
oss << std::boolalpha << "[verbose: " << verbose << "]\n"
@@ -582,8 +579,8 @@ struct read_processor {
582579

583580
read_processor() : expected_basecall_model{default_expected_basecall_model} {}
584581

585-
[[nodiscard]] std::string
586-
expected_basecall_model_str() const {
582+
[[nodiscard]] auto
583+
expected_basecall_model_str() const -> std::string {
587584
return expected_basecall_model.empty() ? "NA" : expected_basecall_model;
588585
}
589586

@@ -752,9 +749,9 @@ struct read_processor {
752749
write_output_all(hdr, out, tid, chrom, counts);
753750
}
754751

755-
[[nodiscard]] mod_prob_stats
752+
[[nodiscard]] auto
756753
operator()(const std::string &infile, const std::string &outfile,
757-
const std::string &chroms_file) const {
754+
const std::string &chroms_file) const -> mod_prob_stats {
758755
// first get the chromosome names and sequences from the FASTA file
759756
std::vector<std::string> chroms, names;
760757
read_fasta_file(chroms_file, names, chroms);
@@ -1003,8 +1000,8 @@ check_modification_sites(const std::string &infile,
10031000
return only_cpgs_counter == reads_processed;
10041001
}
10051002

1006-
int
1007-
main_nanocount(int argc, char *argv[]) { // NOLINT(*-avoid-c-arrays)
1003+
auto
1004+
main_nanocount(int argc, char *argv[]) -> int { // NOLINT(*-avoid-c-arrays)
10081005
static constexpr auto n_reads_to_check = 1000;
10091006
try {
10101007
read_processor rp;

0 commit comments

Comments
 (0)