Skip to content

Commit f57abfe

Browse files
src/utils/xcounts.cpp: adding a flag to force gzip output even if input is not gzip
1 parent 6df5926 commit f57abfe

File tree

1 file changed

+28
-23
lines changed

1 file changed

+28
-23
lines changed

src/utils/xcounts.cpp

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@
1818

1919
#include <bamxx.hpp>
2020

21+
#include <charconv>
2122
#include <iostream>
2223
#include <stdexcept>
2324
#include <string>
24-
#include <vector>
25-
#include <charconv>
2625
#include <system_error>
26+
#include <vector>
2727

2828
// from smithlab_cpp
2929
#include "OptionParser.hpp"
@@ -40,13 +40,12 @@ using std::endl;
4040
using std::runtime_error;
4141
using std::string;
4242
using std::to_chars;
43-
using std::vector;
4443
using std::to_string;
44+
using std::vector;
4545

4646
using bamxx::bgzf_file;
4747

48-
49-
template<typename T>
48+
template <typename T>
5049
static inline uint32_t
5150
fill_output_buffer(const uint32_t offset, const MSite &s, T &buf) {
5251
auto buf_end = buf.data() + buf.size();
@@ -59,11 +58,11 @@ fill_output_buffer(const uint32_t offset, const MSite &s, T &buf) {
5958
return std::distance(buf.data(), res.ptr);
6059
}
6160

62-
6361
int
6462
main_xcounts(int argc, const char **argv) {
6563
try {
6664
bool verbose = false;
65+
bool gzip_output = false;
6766
bool require_coverage = false;
6867
size_t n_threads = 1;
6968
string genome_file;
@@ -78,14 +77,17 @@ main_xcounts(int argc, const char **argv) {
7877
"<counts-file> (\"-\" for standard input)", 1);
7978
opt_parse.add_opt("output", 'o', "output file (default is standard out)",
8079
false, outfile);
81-
opt_parse.add_opt("chroms", 'c', "make header from this reference",
82-
false, genome_file);
83-
opt_parse.add_opt("reads", 'r', "ouput only sites with reads",
84-
false, require_coverage);
85-
opt_parse.add_opt("header", 'h', "use this file to generate header",
86-
false, header_file);
80+
opt_parse.add_opt("chroms", 'c', "make header from this reference", false,
81+
genome_file);
82+
opt_parse.add_opt("reads", 'r', "ouput only sites with reads", false,
83+
require_coverage);
84+
opt_parse.add_opt("header", 'h', "use this file to generate header", false,
85+
header_file);
8786
opt_parse.add_opt("threads", 't', "threads for compression (use few)",
8887
false, n_threads);
88+
opt_parse.add_opt("zip", 'z',
89+
"gzip compress output (automatic if input is gzip)",
90+
false, gzip_output);
8991
opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
9092
std::vector<string> leftover_args;
9193
opt_parse.parse(argc, argv, leftover_args);
@@ -112,21 +114,22 @@ main_xcounts(int argc, const char **argv) {
112114
vector<string> chrom_names;
113115
vector<uint64_t> chrom_sizes;
114116
if (!genome_file.empty()) {
115-
const int ret =
116-
get_chrom_sizes_for_counts_header(n_threads, genome_file,
117-
chrom_names, chrom_sizes);
117+
const int ret = get_chrom_sizes_for_counts_header(
118+
n_threads, genome_file, chrom_names, chrom_sizes);
118119
if (ret)
119120
throw dnmt_error{"failed to get chrom sizes from: " + genome_file};
120121
}
121122

122123
bamxx::bam_tpool tpool(n_threads);
123124
bgzf_file in(filename, "r");
124-
if (!in) throw dnmt_error{"could not open file: " + filename};
125+
if (!in)
126+
throw dnmt_error{"could not open file: " + filename};
125127

126-
const auto outfile_mode = in.is_compressed() ? "w" : "wu";
128+
const auto outfile_mode = (gzip_output || in.is_compressed()) ? "w" : "wu";
127129

128130
bgzf_file out(outfile, outfile_mode);
129-
if (!out) throw dnmt_error{"error opening output file: " + outfile};
131+
if (!out)
132+
throw dnmt_error{"error opening output file: " + outfile};
130133

131134
if (n_threads > 1) {
132135
if (in.is_bgzf())
@@ -142,7 +145,8 @@ main_xcounts(int argc, const char **argv) {
142145
// use the kstring_t type to more directly use the BGZF file
143146
kstring_t line{0, 0, nullptr};
144147
const int ret = ks_resize(&line, 1024);
145-
if (ret) throw dnmt_error("failed to acquire buffer");
148+
if (ret)
149+
throw dnmt_error("failed to acquire buffer");
146150

147151
vector<char> buf(128);
148152

@@ -154,15 +158,17 @@ main_xcounts(int argc, const char **argv) {
154158
MSite site;
155159
while (status_ok && bamxx::getline(in, line)) {
156160
if (is_counts_header_line(line.s)) {
157-
if (!genome_file.empty() || !header_file.empty()) continue;
161+
if (!genome_file.empty() || !header_file.empty())
162+
continue;
158163
found_header = true;
159164
const string header_line{line.s};
160165
write_counts_header_line(header_line, out);
161166
continue;
162167
}
163168

164169
status_ok = site.initialize(line.s, line.s + line.l);
165-
if (!status_ok || !found_header) break;
170+
if (!status_ok || !found_header)
171+
break;
166172

167173
if (site.chrom != prev_chrom) {
168174
if (verbose)
@@ -183,8 +189,7 @@ main_xcounts(int argc, const char **argv) {
183189
ks_free(&line);
184190

185191
if (!status_ok) {
186-
cerr << "failed converting "
187-
<< filename << " to " << outfile << endl;
192+
cerr << "failed converting " << filename << " to " << outfile << endl;
188193
return EXIT_FAILURE;
189194
}
190195
if (!found_header) {

0 commit comments

Comments
 (0)