Skip to content

Commit 7fab6b6

Browse files
src/analysis/roimethstat.cpp: reverting to local function for formatting levels counter because the format used by roi has been slightly different from the general format. Also reading the input intervals from either a BED3 or BED6, depending on what the user provides
1 parent 3381c2b commit 7fab6b6

File tree

1 file changed

+54
-7
lines changed

1 file changed

+54
-7
lines changed

src/analysis/roimethstat.cpp

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
* more details.
1616
*/
1717

18+
#include "Interval.hpp"
1819
#include "Interval6.hpp"
20+
1921
#include "LevelsCounter.hpp"
2022
#include "MSite.hpp"
2123
#include "OptionParser.hpp"
@@ -41,6 +43,29 @@
4143

4244
// NOLINTBEGIN(*-narrowing-conversions)
4345

46+
[[nodiscard]] static std::string
47+
format_levels_counter_for_roi(const LevelsCounter &lc) {
48+
// ...
49+
// (7) weighted mean methylation
50+
// (8) unweighted mean methylation
51+
// (9) fractional methylation
52+
// (10) number of sites in the region
53+
// (11) number of sites covered at least once
54+
// (12) number of observations in reads indicating methylation
55+
// (13) total number of observations from reads in the region
56+
std::ostringstream oss;
57+
// clang-format off
58+
oss << lc.mean_meth_weighted() << '\t'
59+
<< lc.mean_meth() << '\t'
60+
<< lc.fractional_meth() << '\t'
61+
<< lc.total_sites << '\t'
62+
<< lc.sites_covered << '\t'
63+
<< lc.total_c << '\t'
64+
<< lc.coverage();
65+
// clang-format on
66+
return oss.str();
67+
}
68+
4469
static void
4570
update(LevelsCounter &lc, const xcounts_entry &xse) {
4671
static constexpr auto one_half = 0.5;
@@ -85,7 +110,7 @@ process_chrom(const bool report_more_info, const char level_code,
85110
: (level_code == 'u' ? lc.sites_covered : lc.total_called()));
86111
out << to_string(interval);
87112
if (report_more_info)
88-
out << '\t' << format_levels_counter(lc);
113+
out << '\t' << format_levels_counter_for_roi(lc);
89114
out << '\n';
90115
}
91116
}
@@ -94,7 +119,7 @@ static void
94119
process_chrom(const bool report_more_info,
95120
const std::vector<Interval6> &intervals, std::ostream &out) {
96121
LevelsCounter lc;
97-
const std::string lc_formatted = format_levels_counter(lc);
122+
const std::string lc_formatted = format_levels_counter_for_roi(lc);
98123
for (const auto &r : intervals) {
99124
out << to_string(r);
100125
if (report_more_info)
@@ -109,8 +134,6 @@ process_from_xcounts(const std::uint32_t n_threads, const bool report_more_info,
109134
const std::vector<Interval6> &intervals_in,
110135
std::ostream &out) {
111136
const auto sites_by_chrom = read_xcounts_by_chrom(n_threads, xsym_file);
112-
// const auto intervals = get_Interval6s(intervals_file);
113-
114137
std::vector<std::vector<Interval6>> intervals_by_chrom;
115138
std::string prev_chrom;
116139
for (auto i = 0u; i < std::size(intervals_in); ++i) {
@@ -293,7 +316,7 @@ process_preloaded(
293316
r_scored.score = score;
294317
out << to_string(r_scored);
295318
if (report_more_info)
296-
out << '\t' << format_levels_counter(lc);
319+
out << '\t' << format_levels_counter_for_roi(lc);
297320
out << '\n';
298321
}
299322
}
@@ -362,7 +385,7 @@ process_on_disk(
362385
r.score = score;
363386
out << to_string(r);
364387
if (report_more_info)
365-
out << '\t' << format_levels_counter(lc);
388+
out << '\t' << format_levels_counter_for_roi(lc);
366389
out << '\n';
367390
}
368391
}
@@ -382,6 +405,30 @@ get_bed_columns(const std::string &regions_file) {
382405
return n_columns;
383406
}
384407

408+
static auto
409+
get_intervals(const std::string &filename) -> std::vector<Interval6> {
410+
const auto field_count = [&] {
411+
bamxx::bgzf_file in(filename, "r");
412+
if (!in)
413+
throw std::runtime_error("cannot open file: " + filename);
414+
std::string line;
415+
while (getline(in, line) && line[0] == '#')
416+
;
417+
std::istringstream iss(line);
418+
std::vector<std::string> v(std::istream_iterator<std::string>(iss),
419+
(std::istream_iterator<std::string>()));
420+
return std::size(v);
421+
}();
422+
if (field_count >= 6)
423+
return read_intervals6(filename);
424+
425+
auto without_names = read_intervals(filename);
426+
std::vector<Interval6> intervals;
427+
for (const auto &w : without_names)
428+
intervals.emplace_back(w.chrom, w.start, w.stop, std::string{}, 0, '+');
429+
return intervals;
430+
}
431+
385432
int
386433
main_roimethstat(int argc, char *argv[]) { // NOLINT(*-avoid-c-arrays)
387434
try {
@@ -495,7 +542,7 @@ Columns (beyond the first 6) in the BED format output:
495542
throw std::runtime_error("seems to be a counts file: " + regions_file +
496543
"\ncheck order of input arguments");
497544

498-
auto regions = read_intervals6(regions_file);
545+
auto regions = get_intervals(regions_file);
499546
if (!std::is_sorted(std::begin(regions), std::end(regions))) {
500547
if (sort_data_if_needed) {
501548
if (verbose)

0 commit comments

Comments
 (0)