Skip to content

Commit 09f9f2e

Browse files
committed
add tag_qc in summary
1 parent 54008ed commit 09f9f2e

File tree

2 files changed

+39
-3
lines changed

2 files changed

+39
-3
lines changed

src/SplitCode.h

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ struct SplitCode {
7676
std::string trim_5_str = "", std::string trim_3_str = "", std::string extract_str = "", bool extract_no_chain = false, std::string barcode_prefix = "",
7777
std::string filter_length_str = "", bool quality_trimming_5 = false, bool quality_trimming_3 = false,
7878
bool quality_trimming_pre = false, bool quality_trimming_naive = false, int quality_trimming_threshold = -1, bool phred64 = false,
79-
bool write_tag_location_information = false, std::vector<size_t> sub_assign_vec = std::vector<size_t>(0), int fake_bc_len_override = 0, int min_delta = -1) {
79+
bool write_tag_location_information = false, std::vector<size_t> sub_assign_vec = std::vector<size_t>(0), int fake_bc_len_override = 0, int min_delta = -1, bool do_qc = false) {
8080
init = false;
8181
extract_seq_names = false;
8282
discard_check = false;
@@ -112,6 +112,7 @@ struct SplitCode {
112112
this->write_tag_location_information = write_tag_location_information;
113113
this->sub_assign_vec = sub_assign_vec;
114114
this->min_delta = min_delta;
115+
this->do_qc = do_qc;
115116
early_termination_maxFindsG = -1;
116117
max_seq_len = 0;
117118
setNFiles(nFiles);
@@ -299,7 +300,25 @@ struct SplitCode {
299300
of << "\t\t" << "\"assign_id_map_size\": " << idmap_getsize() << ",\n";
300301
of << "\t\t" << "\"sub_assign_id_map_size\": " << idmap_getsize(true) << ",\n";
301302
of << "\t\t" << "\"always_assign\": " << always_assign << "\n";
302-
of << "\t" << "}" << "\n";
303+
of << "\t" << "}";
304+
if (do_qc) {
305+
of << ",";
306+
}
307+
of << "\n";
308+
if (do_qc) {
309+
of << "\t" << "\"tag_qc\": " << "[" << "\n";
310+
for (int i = 0; i < qc.size(); i++) {
311+
if (qc[i].size() == 0) continue;
312+
for (int j = 0; j < qc[i].size(); j++) {
313+
of << "\t\t{\"tag\": \"" << names[i] << "\", \"distance\": " << j << ", \"count\": " << qc[i][j] << "}";
314+
if (!(i == qc.size()-1 && j == qc[i].size()-1)) {
315+
of << ",";
316+
}
317+
of << "\n";
318+
}
319+
}
320+
of << "\t" << "]" << "\n";
321+
}
303322
of << "}" << std::endl;
304323
of.close();
305324
}
@@ -805,6 +824,9 @@ struct SplitCode {
805824
}
806825
}
807826
}*/
827+
if (do_qc) {
828+
qc.resize(names.size());
829+
}
808830
init = true;
809831
}
810832

@@ -3113,6 +3135,7 @@ struct SplitCode {
31133135
bool check_group = keep_check_group || discard_check_group;
31143136
auto it_umi_loc = umi_loc_map.begin();
31153137
std::vector<uint32_t> group_v(0);
3138+
std::vector<std::pair<uint32_t,short>> qc_vec;
31163139
if (check_group) {
31173140
group_v.reserve(16);
31183141
}
@@ -3249,6 +3272,9 @@ struct SplitCode {
32493272
name_id_curr = tag.name_id;
32503273
group_curr = tag.group;
32513274
search_after_start = pos+k; // aka end_pos_curr
3275+
if (do_qc) { // Store some QC information
3276+
qc_vec.push_back(std::make_pair(name_id_curr, error));
3277+
}
32523278
if (write_tag_location_information) {
32533279
results.tag_locations.push_back(names[tag.name_id] + ":" + std::to_string(file) + "," + std::to_string(pos) + "-" + std::to_string(pos+k));
32543280
}
@@ -3423,6 +3449,13 @@ struct SplitCode {
34233449
results.discard = true;
34243450
return;
34253451
}
3452+
if (do_qc && !u.empty()) { // Now, store the QC
3453+
for (auto &q : qc_vec) {
3454+
auto& qc_ = qc[q.first];
3455+
qc_.resize(q.second+1, 0);
3456+
qc_[q.second]++;
3457+
}
3458+
}
34263459
}
34273460

34283461
static void modifyRead(std::vector<std::pair<const char*, int>>& seqs, std::vector<std::pair<const char*, int>>& quals, int i, Results& results, bool edit_sub_len = false) {
@@ -3950,6 +3983,9 @@ struct SplitCode {
39503983
std::unordered_map<std::vector<uint32_t>, std::string, VectorHasher> groupmapinv_keep;
39513984
std::unordered_map<std::vector<uint32_t>, int, VectorHasher> groupmapinv_discard;
39523985

3986+
std::vector<std::vector<short>> qc; // outer vector index = tag name id; vector indices = tag edit distance; value = count
3987+
bool do_qc; // Should we do QC (i.e. do tag-level statistics?)
3988+
39533989
std::unordered_map<uint32_t,int> min_finds_map;
39543990
std::unordered_map<uint32_t,int> max_finds_map;
39553991
std::unordered_map<uint32_t,int> min_finds_group_map;

src/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1380,7 +1380,7 @@ int main(int argc, char *argv[]) {
13801380
ProgramOptions opt;
13811381
ParseOptions(argc,argv,opt);
13821382
SplitCode sc(opt.nfiles, opt.summary_file, opt.trim_only, opt.disable_n, opt.trim_5_str, opt.trim_3_str, opt.extract_str, opt.extract_no_chain, opt.barcode_prefix, opt.filter_length_str,
1383-
opt.quality_trimming_5, opt.quality_trimming_3, opt.quality_trimming_pre, opt.quality_trimming_naive, opt.quality_trimming_threshold, opt.phred64, opt.write_locations, opt.sub_assign_vec, opt.bclen, opt.min_delta);
1383+
opt.quality_trimming_5, opt.quality_trimming_3, opt.quality_trimming_pre, opt.quality_trimming_naive, opt.quality_trimming_threshold, opt.phred64, opt.write_locations, opt.sub_assign_vec, opt.bclen, opt.min_delta, !opt.summary_file.empty());
13841384
bool checkopts = CheckOptions(opt, sc);
13851385
if (!checkopts) {
13861386
usage();

0 commit comments

Comments
 (0)