@@ -76,7 +76,7 @@ struct SplitCode {
7676 std::string trim_5_str = " " , std::string trim_3_str = " " , std::string extract_str = " " , bool extract_no_chain = false , std::string barcode_prefix = " " ,
7777 std::string filter_length_str = " " , bool quality_trimming_5 = false , bool quality_trimming_3 = false ,
7878 bool quality_trimming_pre = false , bool quality_trimming_naive = false , int quality_trimming_threshold = -1 , bool phred64 = false ,
79- bool write_tag_location_information = false , std::vector<size_t > sub_assign_vec = std::vector<size_t >(0 ), int fake_bc_len_override = 0 , int min_delta = -1 ) {
79+ bool write_tag_location_information = false , std::vector<size_t > sub_assign_vec = std::vector<size_t >(0 ), int fake_bc_len_override = 0 , int min_delta = -1 , bool do_qc = false ) {
8080 init = false ;
8181 extract_seq_names = false ;
8282 discard_check = false ;
@@ -112,6 +112,7 @@ struct SplitCode {
112112 this ->write_tag_location_information = write_tag_location_information;
113113 this ->sub_assign_vec = sub_assign_vec;
114114 this ->min_delta = min_delta;
115+ this ->do_qc = do_qc;
115116 early_termination_maxFindsG = -1 ;
116117 max_seq_len = 0 ;
117118 setNFiles (nFiles);
@@ -299,7 +300,25 @@ struct SplitCode {
299300 of << " \t\t " << " \" assign_id_map_size\" : " << idmap_getsize () << " ,\n " ;
300301 of << " \t\t " << " \" sub_assign_id_map_size\" : " << idmap_getsize (true ) << " ,\n " ;
301302 of << " \t\t " << " \" always_assign\" : " << always_assign << " \n " ;
302- of << " \t " << " }" << " \n " ;
303+ of << " \t " << " }" ;
304+ if (do_qc) {
305+ of << " ," ;
306+ }
307+ of << " \n " ;
308+ if (do_qc) {
309+ of << " \t " << " \" tag_qc\" : " << " [" << " \n " ;
310+ for (int i = 0 ; i < qc.size (); i++) {
311+ if (qc[i].size () == 0 ) continue ;
312+ for (int j = 0 ; j < qc[i].size (); j++) {
313+ of << " \t\t {\" tag\" : \" " << names[i] << " \" , \" distance\" : " << j << " , \" count\" : " << qc[i][j] << " }" ;
314+ if (!(i == qc.size ()-1 && j == qc[i].size ()-1 )) {
315+ of << " ," ;
316+ }
317+ of << " \n " ;
318+ }
319+ }
320+ of << " \t " << " ]" << " \n " ;
321+ }
303322 of << " }" << std::endl;
304323 of.close ();
305324 }
@@ -805,6 +824,9 @@ struct SplitCode {
805824 }
806825 }
807826 }*/
827+ if (do_qc) {
828+ qc.resize (names.size ());
829+ }
808830 init = true ;
809831 }
810832
@@ -3113,6 +3135,7 @@ struct SplitCode {
31133135 bool check_group = keep_check_group || discard_check_group;
31143136 auto it_umi_loc = umi_loc_map.begin ();
31153137 std::vector<uint32_t > group_v (0 );
3138+ std::vector<std::pair<uint32_t ,short >> qc_vec;
31163139 if (check_group) {
31173140 group_v.reserve (16 );
31183141 }
@@ -3249,6 +3272,9 @@ struct SplitCode {
32493272 name_id_curr = tag.name_id ;
32503273 group_curr = tag.group ;
32513274 search_after_start = pos+k; // aka end_pos_curr
3275+ if (do_qc) { // Store some QC information
3276+ qc_vec.push_back (std::make_pair (name_id_curr, error));
3277+ }
32523278 if (write_tag_location_information) {
32533279 results.tag_locations .push_back (names[tag.name_id ] + " :" + std::to_string (file) + " ," + std::to_string (pos) + " -" + std::to_string (pos+k));
32543280 }
@@ -3423,6 +3449,13 @@ struct SplitCode {
34233449 results.discard = true ;
34243450 return ;
34253451 }
3452+ if (do_qc && !u.empty ()) { // Now, store the QC
3453+ for (auto &q : qc_vec) {
3454+ auto & qc_ = qc[q.first ];
3455+ qc_.resize (q.second +1 , 0 );
3456+ qc_[q.second ]++;
3457+ }
3458+ }
34263459 }
34273460
34283461 static void modifyRead (std::vector<std::pair<const char *, int >>& seqs, std::vector<std::pair<const char *, int >>& quals, int i, Results& results, bool edit_sub_len = false ) {
@@ -3950,6 +3983,9 @@ struct SplitCode {
39503983 std::unordered_map<std::vector<uint32_t >, std::string, VectorHasher> groupmapinv_keep;
39513984 std::unordered_map<std::vector<uint32_t >, int , VectorHasher> groupmapinv_discard;
39523985
3986+ std::vector<std::vector<short >> qc; // outer vector index = tag name id; vector indices = tag edit distance; value = count
3987+ bool do_qc; // Should we do QC (i.e. do tag-level statistics?)
3988+
39533989 std::unordered_map<uint32_t ,int > min_finds_map;
39543990 std::unordered_map<uint32_t ,int > max_finds_map;
39553991 std::unordered_map<uint32_t ,int > min_finds_group_map;
0 commit comments