11#ifndef SPLITCODE_H
22#define SPLITCODE_H
33
4- #define SPLITCODE_VERSION " 0.31.1 "
4+ #define SPLITCODE_VERSION " 0.31.2 "
55
66#include < string>
77#include < iostream>
@@ -1145,7 +1145,7 @@ struct SplitCode {
11451145 std::pair<int16_t ,int32_t > location2;
11461146 uint16_t name_id;
11471147 std::string prepend, append;
1148- bool group1, group2, id1_present, id2_present, rev_comp, special_extraction, use_sub, use_read_sequence;
1148+ bool group1, group2, id1_present, id2_present, rev_comp, rev, comp, special_extraction, use_sub, use_read_sequence;
11491149 };
11501150
11511151 struct TrimTagSummary {
@@ -2808,6 +2808,8 @@ struct SplitCode {
28082808 auto & padding_left = umi.padding_left ;
28092809 auto & padding_right = umi.padding_right ;
28102810 auto & rev_comp = umi.rev_comp ;
2811+ auto & rev = umi.rev ;
2812+ auto & comp = umi.comp ;
28112813 auto & special_extraction = umi.special_extraction ;
28122814 auto & use_sub = umi.use_sub ;
28132815 auto & use_read_sequence = umi.use_read_sequence ;
@@ -2820,6 +2822,8 @@ struct SplitCode {
28202822 int16_t file1 = -1 , file2 = -1 ;
28212823 int32_t pos1 = -1 , pos2 = -1 ;
28222824 rev_comp = false ;
2825+ rev = false ;
2826+ comp = false ;
28232827 name1_present = false ;
28242828 name2_present = false ;
28252829 special_extraction = false ;
@@ -2835,8 +2839,16 @@ struct SplitCode {
28352839 return false ; // malformed
28362840 }
28372841 std::string umi_name = s.substr (umi_open+1 ,umi_close-umi_open-1 );
2838- // Find tilde at beginning (denoting reverse complement)
2839- if (umi_name.length () > 1 && umi_name[0 ] == ' ~' ) {
2842+ // Find tilde at beginning (denoting reverse complement, reverse, or complement)
2843+ if (umi_name.length () > 3 && umi_name[0 ] == ' ~' && umi_name[1 ] == ' r' && umi_name[2 ] == ' ~' ) {
2844+ umi_name = umi_name.substr (3 );
2845+ rev = true ;
2846+ }
2847+ else if (umi_name.length () > 3 && umi_name[0 ] == ' ~' && umi_name[1 ] == ' c' && umi_name[2 ] == ' ~' ) {
2848+ umi_name = umi_name.substr (3 );
2849+ comp = true ;
2850+ }
2851+ else if (umi_name.length () > 1 && umi_name[0 ] == ' ~' ) {
28402852 umi_name = umi_name.substr (1 );
28412853 rev_comp = true ;
28422854 }
@@ -3227,7 +3239,7 @@ struct SplitCode {
32273239 auto extract_no_chain = this ->extract_no_chain ;
32283240 auto & extract_no_chain_set = this ->extract_no_chain_set ;
32293241 auto & umi_names = this ->umi_names ;
3230- auto revcomp = [](const std::string s) {
3242+ auto revcomp = [](const std::string& s) {
32313243 std::string r (s);
32323244 std::transform (s.rbegin (), s.rend (), r.begin (), [](char c) {
32333245 switch (c) {
@@ -3241,15 +3253,43 @@ struct SplitCode {
32413253 });
32423254 return r;
32433255 };
3244- auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp](const UMI& u, const std::string& extracted_umi) {
3256+ auto rev_ = [](const std::string &s) {
3257+ return std::string{s.rbegin (), s.rend ()};
3258+ };
3259+ auto comp_ = [](const std::string &s) {
3260+ std::string result (s);
3261+ std::transform (result.begin (), result.end (), result.begin (),
3262+ [](char c) {
3263+ switch (c) {
3264+ case ' A' : return ' T' ;
3265+ case ' T' : return ' A' ;
3266+ case ' C' : return ' G' ;
3267+ case ' G' : return ' C' ;
3268+ default : return ' N' ;
3269+ }
3270+ });
3271+ return result;
3272+ };
3273+ auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp, &rev_, &comp_](const UMI& u, const std::string& extracted_umi) {
32453274 bool extract_no_chain_ = extract_no_chain;
32463275 if (extract_no_chain_ && !extract_no_chain_set.empty ()) {
32473276 extract_no_chain_ = false ;
32483277 if (extract_no_chain_set.find (umi_names[u.name_id ]) != extract_no_chain_set.end ()) {
32493278 extract_no_chain_ = true ;
32503279 }
32513280 }
3252- umi_data[u.name_id ] += extract_no_chain_ && !umi_data[u.name_id ].empty () ? " " : (!u.rev_comp ? u.prepend +extracted_umi+u.append : u.prepend +revcomp (extracted_umi)+u.append );
3281+ if (!(extract_no_chain_ && !umi_data[u.name_id ].empty ())) {
3282+ if (u.rev_comp ) {
3283+ umi_data[u.name_id ] += u.prepend +revcomp (extracted_umi)+u.append ;
3284+ } else if (u.rev ) {
3285+ umi_data[u.name_id ] += u.prepend +rev_ (extracted_umi)+u.append ;
3286+ } else if (u.comp ) {
3287+ umi_data[u.name_id ] += u.prepend +comp_ (extracted_umi)+u.append ;
3288+ } else {
3289+ umi_data[u.name_id ] += u.prepend +extracted_umi+u.append ;
3290+ }
3291+
3292+ }
32533293 };
32543294
32553295 const auto & umi_vec_name = umi_name_map.find (tag_name_id) != umi_name_map.end () ? umi_name_map[tag_name_id] : std::vector<UMI>(0 );
@@ -3510,7 +3550,7 @@ struct SplitCode {
35103550 auto extract_no_chain = this ->extract_no_chain ;
35113551 auto & extract_no_chain_set = this ->extract_no_chain_set ;
35123552 auto & umi_names = this ->umi_names ;
3513- auto revcomp = [](const std::string s) {
3553+ auto revcomp = [](const std::string& s) {
35143554 std::string r (s);
35153555 std::transform (s.rbegin (), s.rend (), r.begin (), [](char c) {
35163556 switch (c) {
@@ -3524,15 +3564,43 @@ struct SplitCode {
35243564 });
35253565 return r;
35263566 };
3527- auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp](const UMI& u, const std::string& extracted_umi) {
3567+ auto rev_ = [](const std::string &s) {
3568+ return std::string{s.rbegin (), s.rend ()};
3569+ };
3570+ auto comp_ = [](const std::string &s) {
3571+ std::string result (s);
3572+ std::transform (result.begin (), result.end (), result.begin (),
3573+ [](char c) {
3574+ switch (c) {
3575+ case ' A' : return ' T' ;
3576+ case ' T' : return ' A' ;
3577+ case ' C' : return ' G' ;
3578+ case ' G' : return ' C' ;
3579+ default : return ' N' ;
3580+ }
3581+ });
3582+ return result;
3583+ };
3584+ auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp, &rev_, &comp_](const UMI& u, const std::string& extracted_umi) {
35283585 bool extract_no_chain_ = extract_no_chain;
35293586 if (extract_no_chain_ && !extract_no_chain_set.empty ()) {
35303587 extract_no_chain_ = false ;
35313588 if (extract_no_chain_set.find (umi_names[u.name_id ]) != extract_no_chain_set.end ()) {
35323589 extract_no_chain_ = true ;
35333590 }
35343591 }
3535- umi_data[u.name_id ] += extract_no_chain_ && !umi_data[u.name_id ].empty () ? " " : (!u.rev_comp ? u.prepend +extracted_umi+u.append : u.prepend +revcomp (extracted_umi)+u.append );
3592+ if (!(extract_no_chain_ && !umi_data[u.name_id ].empty ())) {
3593+ if (u.rev_comp ) {
3594+ umi_data[u.name_id ] += u.prepend +revcomp (extracted_umi)+u.append ;
3595+ } else if (u.rev ) {
3596+ umi_data[u.name_id ] += u.prepend +rev_ (extracted_umi)+u.append ;
3597+ } else if (u.comp ) {
3598+ umi_data[u.name_id ] += u.prepend +comp_ (extracted_umi)+u.append ;
3599+ } else {
3600+ umi_data[u.name_id ] += u.prepend +extracted_umi+u.append ;
3601+ }
3602+
3603+ }
35363604 };
35373605 const auto &u = extract_seq_names_umi;
35383606 auto extract_min_len = u.length_range_start ;
@@ -3547,7 +3615,7 @@ struct SplitCode {
35473615 auto extract_no_chain = this ->extract_no_chain ;
35483616 auto & extract_no_chain_set = this ->extract_no_chain_set ;
35493617 auto & umi_names = this ->umi_names ;
3550- auto revcomp = [](const std::string s) {
3618+ auto revcomp = [](const std::string& s) {
35513619 std::string r (s);
35523620 std::transform (s.rbegin (), s.rend (), r.begin (), [](char c) {
35533621 switch (c) {
@@ -3561,15 +3629,42 @@ struct SplitCode {
35613629 });
35623630 return r;
35633631 };
3564- auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp](const UMI& u, const std::string& extracted_umi) {
3632+ auto rev_ = [](const std::string &s) {
3633+ return std::string{s.rbegin (), s.rend ()};
3634+ };
3635+ auto comp_ = [](const std::string &s) {
3636+ std::string result (s);
3637+ std::transform (result.begin (), result.end (), result.begin (),
3638+ [](char c) {
3639+ switch (c) {
3640+ case ' A' : return ' T' ;
3641+ case ' T' : return ' A' ;
3642+ case ' C' : return ' G' ;
3643+ case ' G' : return ' C' ;
3644+ default : return ' N' ;
3645+ }
3646+ });
3647+ return result;
3648+ };
3649+ auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp, &rev_, &comp_](const UMI& u, const std::string& extracted_umi) {
35653650 bool extract_no_chain_ = extract_no_chain;
35663651 if (extract_no_chain_ && !extract_no_chain_set.empty ()) {
35673652 extract_no_chain_ = false ;
35683653 if (extract_no_chain_set.find (umi_names[u.name_id ]) != extract_no_chain_set.end ()) {
35693654 extract_no_chain_ = true ;
35703655 }
35713656 }
3572- umi_data[u.name_id ] += extract_no_chain_ && !umi_data[u.name_id ].empty () ? " " : (!u.rev_comp ? u.prepend +extracted_umi+u.append : u.prepend +revcomp (extracted_umi)+u.append );
3657+ if (!(extract_no_chain_ && !umi_data[u.name_id ].empty ())) {
3658+ if (u.rev_comp ) {
3659+ umi_data[u.name_id ] += u.prepend +revcomp (extracted_umi)+u.append ;
3660+ } else if (u.rev ) {
3661+ umi_data[u.name_id ] += u.prepend +rev_ (extracted_umi)+u.append ;
3662+ } else if (u.comp ) {
3663+ umi_data[u.name_id ] += u.prepend +comp_ (extracted_umi)+u.append ;
3664+ } else {
3665+ umi_data[u.name_id ] += u.prepend +extracted_umi+u.append ;
3666+ }
3667+ }
35733668 };
35743669 int i = 0 ;
35753670 for (auto & pumi : placement_umis) {
0 commit comments