Skip to content

Commit 4497381

Browse files
src/utils/xcounts.cpp and src/utils/unxcounts.cpp: linting
1 parent ace5dc0 commit 4497381

File tree

2 files changed

+107
-73
lines changed

2 files changed

+107
-73
lines changed

src/utils/unxcounts.cpp

Lines changed: 89 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
1-
/* unxcounts: reverse the process of xcounts and generate the counts
2-
* file, including sites not covered.
1+
/* Copyright (C) 2023 Andrew D. Smith
32
*
4-
* Copyright (C) 2023 Andrew D. Smith
3+
* This program is free software: you can redistribute it and/or modify it
4+
* under the terms of the GNU General Public License as published by the Free
5+
* Software Foundation, either version 3 of the License, or (at your option)
6+
* any later version.
57
*
6-
* Authors: Andrew D. Smith
7-
*
8-
* This program is free software: you can redistribute it and/or
9-
* modify it under the terms of the GNU General Public License as
10-
* published by the Free Software Foundation, either version 3 of the
11-
* License, or (at your option) any later version.
12-
*
13-
* This program is distributed in the hope that it will be useful, but
14-
* WITHOUT ANY WARRANTY; without even the implied warranty of
15-
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16-
* General Public License for more details.
8+
* This program is distributed in the hope that it will be useful, but WITHOUT
9+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11+
* more details.
1712
*/
1813

14+
[[maybe_unused]] static constexpr auto about = R"(
15+
unxcounts: reverse the process of xcounts and generate the counts file,
16+
including sites not covered.
17+
)";
18+
1919
#include "OptionParser.hpp"
2020
#include "bsutils.hpp"
2121
#include "counts_header.hpp"
@@ -28,6 +28,7 @@
2828
#include <htslib/sam.h>
2929

3030
#include <algorithm>
31+
#include <array>
3132
#include <cassert>
3233
#include <cctype>
3334
#include <charconv>
@@ -45,7 +46,7 @@
4546
#include <utility>
4647
#include <vector>
4748

48-
// NOLINTBEGIN(*-avoid-c-arrays,*-avoid-magic-numbers,*-avoid-non-const-global-variables,*-narrowing-conversions,*-constant-array-index,*-pointer-arithmetic)
49+
// NOLINTBEGIN(*-narrowing-conversions)
4950

5051
static void
5152
read_fasta_file_short_names_uppercase(const std::string &chroms_file,
@@ -55,8 +56,8 @@ read_fasta_file_short_names_uppercase(const std::string &chroms_file,
5556
names.clear();
5657
read_fasta_file_short_names(chroms_file, names, chroms);
5758
for (auto &i : chroms)
58-
transform(std::cbegin(i), std::cend(i), begin(i),
59-
[](const char c) { return std::toupper(c); });
59+
std::transform(std::cbegin(i), std::cend(i), begin(i),
60+
[](const char c) { return std::toupper(c); });
6061
}
6162

6263
static void
@@ -83,7 +84,7 @@ verify_chrom_orders(
8384
throw std::runtime_error("failed to acquire buffer");
8485

8586
while (bamxx::getline(in, line)) {
86-
if (std::isdigit(line.s[0]))
87+
if (std::isdigit(line.s[0])) // NOLINT(*-pointer-arithmetic)
8788
continue;
8889
if (is_counts_header_line(line.s))
8990
continue;
@@ -110,20 +111,22 @@ verify_chrom_orders(
110111
std::cerr << "chrom orders are consistent" << "\n";
111112
}
112113

113-
static const char *tag_values[] = {
114+
static constexpr auto tag_values = std::array<const char *, 5>{
114115
"CpG", // 0
115116
"CHH", // 1
116117
"CXG", // 2
117118
"CCG", // 3
118119
"N" // 4
119120
};
120121

121-
static const int tag_sizes[] = {3, 3, 3, 3, 1};
122+
static constexpr auto tag_sizes = std::array<int, 5>{
123+
3, 3, 3, 3, 1,
124+
};
122125

123-
// ADS: the values below allow for things like CHH where the is a N in
124-
// the triplet; I'm allowing that for consistency with the weird logic
125-
// from earlier versions.
126-
const std::uint32_t context_codes[] = {
126+
// ADS: the values below allow for things like CHH where the is a N in the
127+
// triplet; I'm allowing that for consistency with the weird logic from
128+
// earlier versions.
129+
static constexpr auto context_codes = std::array<std::uint32_t, 25>{
127130
/*CAA CHH*/ 1,
128131
/*CAC CHH*/ 1,
129132
/*CAG CXG*/ 2,
@@ -154,26 +157,28 @@ const std::uint32_t context_codes[] = {
154157
static inline std::uint32_t
155158
get_tag_from_genome_c(const std::string &s, const size_t pos) {
156159
const auto val = base2int(s[pos + 1]) * 5 + base2int(s[pos + 2]);
157-
return context_codes[val];
160+
return context_codes[val]; // NOLINT(*-constant-array-index)
158161
}
159162

160163
static inline std::uint32_t
161164
get_tag_from_genome_g(const std::string &s, const size_t pos) {
162165
const auto val =
163166
base2int(complement(s[pos - 1])) * 5 + base2int(complement(s[pos - 2]));
164-
return context_codes[val];
167+
return context_codes[val]; // NOLINT(*-constant-array-index)
165168
}
166169

167170
static bool
168171
write_missing(const std::uint32_t name_size, const std::string &chrom,
169172
const std::uint64_t start_pos, const std::uint64_t end_pos,
170173
std::vector<char> &buf, bamxx::bgzf_file &out) {
171174
static constexpr auto zeros = "\t0\t0\n";
175+
static constexpr auto zeros_sz = 5;
172176
static constexpr auto pos_strand = "\t+\t";
173177
static constexpr auto neg_strand = "\t-\t";
174-
const auto buf_end = buf.data() + size(buf);
178+
const auto buf_end =
179+
buf.data() + std::size(buf); // NOLINT(*-pointer-arithmetic)
175180
// chrom name is already in the buffer so move past it
176-
auto cursor = buf.data() + name_size + 1;
181+
auto cursor = buf.data() + name_size + 1; // NOLINT(*-pointer-arithmetic)
177182
for (auto pos = start_pos; pos < end_pos; ++pos) {
178183
const char base = chrom[pos];
179184
if (is_cytosine(base) || is_guanine(base)) {
@@ -182,10 +187,12 @@ write_missing(const std::uint32_t name_size, const std::string &chrom,
182187
: get_tag_from_genome_g(chrom, pos);
183188
#pragma GCC diagnostic push
184189
#pragma GCC diagnostic error "-Wstringop-overflow=0"
190+
// NOLINTBEGIN(*-constant-array-index)
185191
auto [ptr, ec] = std::to_chars(cursor, buf_end, pos);
186192
ptr = std::copy_n(is_c ? pos_strand : neg_strand, 3, ptr);
187193
ptr = std::copy_n(tag_values[the_tag], tag_sizes[the_tag], ptr);
188-
ptr = std::copy_n(zeros, 5, ptr);
194+
ptr = std::copy_n(zeros, zeros_sz, ptr);
195+
// NOLINTEND(*-constant-array-index)
189196
const auto sz = std::distance(buf.data(), ptr);
190197
#pragma GCC diagnostic push
191198

@@ -201,21 +208,26 @@ write_missing_cpg(const std::uint32_t &name_size, const std::string &chrom,
201208
const std::uint64_t start_pos, const std::uint64_t end_pos,
202209
std::vector<char> &buf, bamxx::bgzf_file &out) {
203210
static constexpr auto zeros = "\t0\t0\n";
211+
static constexpr auto zeros_sz = 5;
204212
static constexpr auto pos_strand = "\t+\t";
205-
const auto buf_end = buf.data() + size(buf);
213+
static constexpr auto pos_strand_sz = 3;
214+
const auto buf_end =
215+
buf.data() + std::size(buf); // NOLINT(*-pointer-arithmetic)
206216
// chrom name is already in the buffer so move past it
207-
auto cursor = buf.data() + name_size + 1;
217+
auto cursor = buf.data() + name_size + 1; // NOLINT(*-pointer-arithmetic)
208218
for (auto pos = start_pos; pos < end_pos - 1; ++pos) {
209-
// When this function is called, the "end_pos" is either the chrom
210-
// size or the position of a base known to be a C. So we never
211-
// have to allow pos+1 to equal end_pos.
219+
// When this function is called, the "end_pos" is either the chrom size or
220+
// the position of a base known to be a C. So we never have to allow pos+1
221+
// to equal end_pos.
212222
if (is_cytosine(chrom[pos]) && is_guanine(chrom[pos + 1])) {
213223
#pragma GCC diagnostic push
214224
#pragma GCC diagnostic error "-Wstringop-overflow=0"
225+
// NOLINTBEGIN(*-constant-array-index)
215226
auto [ptr, ec] = std::to_chars(cursor, buf_end, pos);
216-
ptr = std::copy_n(pos_strand, 3, ptr);
227+
ptr = std::copy_n(pos_strand, pos_strand_sz, ptr);
217228
ptr = std::copy_n("CpG", 3, ptr);
218-
ptr = std::copy_n(zeros, 5, ptr);
229+
ptr = std::copy_n(zeros, zeros_sz, ptr);
230+
// NOLINTEND(*-constant-array-index)
219231
const auto sz = std::distance(buf.data(), ptr);
220232
#pragma GCC diagnostic push
221233
if (bgzf_write(out.f, buf.data(), sz) != sz)
@@ -233,8 +245,11 @@ write_site(const std::uint32_t name_size, const std::string &chrom,
233245
static constexpr auto pos_strand = "\t+\t";
234246
static constexpr auto neg_strand = "\t-\t";
235247
static constexpr auto fmt = std::chars_format::general;
248+
// use default precision, 6, same as std::cout default
249+
static constexpr auto precision = 6;
236250

237-
const auto buf_end = buf.data() + size(buf);
251+
const auto buf_end =
252+
buf.data() + std::size(buf); // NOLINT(*-pointer-arithmetic)
238253
const char base = chrom[pos];
239254
assert(is_cytosine(base) || is_guanine(base));
240255
const bool is_c = is_cytosine(base);
@@ -246,17 +261,17 @@ write_site(const std::uint32_t name_size, const std::string &chrom,
246261
#pragma GCC diagnostic push
247262
#pragma GCC diagnostic error "-Wstringop-overflow=0"
248263
// chrom name is already in the buffer so move past it
249-
auto cursor = buf.data() + name_size + 1;
264+
auto cursor = buf.data() + name_size + 1; // NOLINT(*-pointer-arithmetic)
250265
{
251266
auto [ptr, ec] = std::to_chars(cursor, buf_end, pos);
252267
cursor = ptr;
253268
}
254269
cursor = std::copy_n(is_c ? pos_strand : neg_strand, 3, cursor);
270+
// NOLINTNEXTLINE(*-constant-array-index)
255271
cursor = std::copy_n(tag_values[the_tag], tag_sizes[the_tag], cursor);
256272
*cursor++ = '\t';
257273
{
258-
// use default precision, 6, same as std::cout default
259-
auto [ptr, ec] = std::to_chars(cursor, buf_end, meth, fmt, 6);
274+
auto [ptr, ec] = std::to_chars(cursor, buf_end, meth, fmt, precision);
260275
cursor = ptr;
261276
}
262277
*cursor++ = '\t';
@@ -318,11 +333,11 @@ get_lookups(const std::vector<std::string> &names,
318333
std::vector<std::uint64_t> &chrom_sizes) {
319334
chrom_lookup.clear();
320335
name_to_id.clear();
321-
chrom_sizes = std::vector<std::uint64_t>(size(chroms), 0);
322-
for (size_t i = 0; i < size(chroms); ++i) {
336+
chrom_sizes = std::vector<std::uint64_t>(std::size(chroms), 0);
337+
for (size_t i = 0; i < std::size(chroms); ++i) {
323338
chrom_lookup[names[i]] = std::cbegin(chroms) + i;
324339
name_to_id[names[i]] = i;
325-
chrom_sizes[i] = size(chroms[i]);
340+
chrom_sizes[i] = std::size(chroms[i]);
326341
}
327342
}
328343

@@ -332,7 +347,8 @@ process_header_line(
332347
const std::vector<std::uint64_t> &chrom_sizes, const kstring_t &line,
333348
bamxx::bgzf_file &out) {
334349
std::string hdr_line{line.s};
335-
if (size(hdr_line) > 1 && !verify_chrom(hdr_line, name_to_id, chrom_sizes))
350+
if (std::size(hdr_line) > 1 &&
351+
!verify_chrom(hdr_line, name_to_id, chrom_sizes))
336352
throw std::runtime_error{"failed to verify header for: " + hdr_line};
337353
if (!write_counts_header_line(hdr_line, out))
338354
throw std::runtime_error{"failed to write header line: " + hdr_line};
@@ -351,7 +367,8 @@ write_all_sites(const bool verbose, const std::uint32_t prev_chr_id,
351367
auto res =
352368
std::copy(std::cbegin(names[i]), std::cend(names[i]), buf.data());
353369
*res = '\t';
354-
write_missing(size(names[i]), chroms[i], 0u, size(chroms[i]), buf, out);
370+
write_missing(std::size(names[i]), chroms[i], 0u, std::size(chroms[i]), buf,
371+
out);
355372
}
356373
}
357374

@@ -364,11 +381,11 @@ process_sites(const bool verbose, const bool add_missing_chroms,
364381
std::vector<std::string> chroms, names;
365382
read_fasta_file_short_names_uppercase(chroms_file, names, chroms);
366383
if (verbose)
367-
std::cerr << "[n chroms in reference: " << chroms.size() << "]" << "\n";
384+
std::cerr << "[n chroms in reference: " << std::size(chroms) << "]" << "\n";
368385

369386
std::unordered_map<std::string, chrom_itr_t> chrom_lookup;
370387
std::unordered_map<std::string, std::int32_t> name_to_id;
371-
std::vector<std::uint64_t> chrom_sizes(size(chroms), 0);
388+
std::vector<std::uint64_t> chrom_sizes(std::size(chroms), 0);
372389
get_lookups(names, chroms, chrom_lookup, name_to_id, chrom_sizes);
373390

374391
if (add_missing_chroms)
@@ -412,15 +429,16 @@ process_sites(const bool verbose, const bool add_missing_chroms,
412429
while (getline(in, line)) {
413430
if (is_counts_header_line(line.s)) {
414431
process_header_line(name_to_id, chrom_sizes, line, out);
415-
continue; // ADS: early loop exit
432+
continue; // ADS: just skip headers
416433
}
417434

418-
if (!std::isdigit(line.s[0])) { // check if we have a chrom line
435+
// check if we have a chrom line
436+
if (!std::isdigit(line.s[0])) { // NOLINT(*-pointer-arithmetic)
419437
if (!require_covered && pos != std::numeric_limits<std::uint64_t>::max())
420-
write_missing(nm_sz, *ch_itr, pos + 1, size(*ch_itr), buf, out);
438+
write_missing(nm_sz, *ch_itr, pos + 1, std::size(*ch_itr), buf, out);
421439

422440
chrom_name = std::string{line.s};
423-
nm_sz = size(chrom_name);
441+
nm_sz = std::size(chrom_name);
424442
const std::int32_t chr_id = get_chrom_id(name_to_id, chrom_name);
425443

426444
if (add_missing_chroms)
@@ -438,10 +456,12 @@ process_sites(const bool verbose, const bool add_missing_chroms,
438456
}
439457
else {
440458
std::uint32_t pos_step = 0, n_meth = 0, n_unmeth = 0;
459+
// NOLINTBEGIN(*-pointer-arithmetic)
441460
const auto end_line = line.s + line.l;
442461
auto res = std::from_chars(line.s, end_line, pos_step);
443462
res = std::from_chars(res.ptr + 1, end_line, n_meth);
444463
res = std::from_chars(res.ptr + 1, end_line, n_unmeth);
464+
// NOLINTEND(*-pointer-arithmetic)
445465

446466
const auto curr_pos = pos + pos_step;
447467
if (!require_covered && pos + 1 < curr_pos)
@@ -452,9 +472,9 @@ process_sites(const bool verbose, const bool add_missing_chroms,
452472
}
453473
}
454474
if (!require_covered)
455-
write_missing(nm_sz, *ch_itr, pos + 1, size(*ch_itr), buf, out);
475+
write_missing(nm_sz, *ch_itr, pos + 1, std::size(*ch_itr), buf, out);
456476
if (add_missing_chroms)
457-
write_all_sites(verbose, prev_chr_id, size(chroms), names, chroms, buf,
477+
write_all_sites(verbose, prev_chr_id, std::size(chroms), names, chroms, buf,
458478
out);
459479
}
460480

@@ -471,7 +491,8 @@ write_all_cpgs(const bool verbose, const std::uint32_t prev_chr_id,
471491
auto res =
472492
std::copy(std::cbegin(names[i]), std::cend(names[i]), buf.data());
473493
*res = '\t';
474-
write_missing_cpg(size(names[i]), chroms[i], 0u, size(chroms[i]), buf, out);
494+
write_missing_cpg(std::size(names[i]), chroms[i], 0u, std::size(chroms[i]),
495+
buf, out);
475496
}
476497
}
477498

@@ -484,11 +505,11 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms,
484505
std::vector<std::string> chroms, names;
485506
read_fasta_file_short_names_uppercase(chroms_file, names, chroms);
486507
if (verbose)
487-
std::cerr << "[n chroms in reference: " << chroms.size() << "]" << "\n";
508+
std::cerr << "[n chroms in reference: " << std::size(chroms) << "]" << "\n";
488509

489510
std::unordered_map<std::string, chrom_itr_t> chrom_lookup;
490511
std::unordered_map<std::string, std::int32_t> name_to_id;
491-
std::vector<std::uint64_t> chrom_sizes(size(chroms), 0);
512+
std::vector<std::uint64_t> chrom_sizes(std::size(chroms), 0);
492513
get_lookups(names, chroms, chrom_lookup, name_to_id, chrom_sizes);
493514

494515
if (add_missing_chroms)
@@ -535,12 +556,14 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms,
535556
continue; // ADS: early loop exit
536557
}
537558

538-
if (!std::isdigit(line.s[0])) { // check if we have a chrom line
559+
// check if we have a chrom line
560+
if (!std::isdigit(line.s[0])) { // NOLINT(*-pointer-arithmetic)
539561
if (!require_covered && pos != std::numeric_limits<std::uint64_t>::max())
540-
write_missing_cpg(nm_sz, *ch_itr, pos + 1, size(*ch_itr), buf, out);
562+
write_missing_cpg(nm_sz, *ch_itr, pos + 1, std::size(*ch_itr), buf,
563+
out);
541564

542565
chrom_name = std::string{line.s};
543-
nm_sz = size(chrom_name);
566+
nm_sz = std::size(chrom_name);
544567
const std::int32_t chr_id = get_chrom_id(name_to_id, chrom_name);
545568

546569
if (add_missing_chroms)
@@ -558,10 +581,12 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms,
558581
}
559582
else {
560583
std::uint32_t pos_step = 0, n_meth = 0, n_unmeth = 0;
584+
// NOLINTBEGIN(*-pointer-arithmetic)
561585
const auto end_line = line.s + line.l;
562586
auto res = std::from_chars(line.s, end_line, pos_step);
563587
res = std::from_chars(res.ptr + 1, end_line, n_meth);
564588
res = std::from_chars(res.ptr + 1, end_line, n_unmeth);
589+
// NOLINTEND(*-pointer-arithmetic)
565590

566591
const auto curr_pos = pos + pos_step;
567592
if (!require_covered && pos + 1 < curr_pos)
@@ -572,9 +597,10 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms,
572597
}
573598
}
574599
if (!require_covered)
575-
write_missing_cpg(nm_sz, *ch_itr, pos + 1, size(*ch_itr), buf, out);
600+
write_missing_cpg(nm_sz, *ch_itr, pos + 1, std::size(*ch_itr), buf, out);
576601
if (add_missing_chroms)
577-
write_all_cpgs(verbose, prev_chr_id, size(chroms), names, chroms, buf, out);
602+
write_all_cpgs(verbose, prev_chr_id, std::size(chroms), names, chroms, buf,
603+
out);
578604
}
579605

580606
int
@@ -622,7 +648,7 @@ main_unxcounts(int argc, char *argv[]) { // NOLINT(*-avoid-c-arrays)
622648
std::cerr << opt_parse.option_missing_message() << "\n";
623649
return EXIT_SUCCESS;
624650
}
625-
if (leftover_args.size() != 1) {
651+
if (std::size(leftover_args) != 1) {
626652
std::cerr << opt_parse.help_message() << "\n";
627653
return EXIT_SUCCESS;
628654
}
@@ -653,4 +679,4 @@ main_unxcounts(int argc, char *argv[]) { // NOLINT(*-avoid-c-arrays)
653679
return EXIT_SUCCESS;
654680
}
655681

656-
// NOLINTEND(*-avoid-c-arrays,*-avoid-magic-numbers,*-avoid-non-const-global-variables,*-narrowing-conversions,*-constant-array-index,*-pointer-arithmetic)
682+
// NOLINTEND(*-narrowing-conversions)

0 commit comments

Comments
 (0)