Skip to content

Commit d309cfc

Browse files
committed
version 0.31.2
In addition to extracting reverse complement sequences via ~, can now extract complements via ~c~ and simple reverse with ~r~
1 parent 195766b commit d309cfc

File tree

1 file changed

+108
-13
lines changed

1 file changed

+108
-13
lines changed

src/SplitCode.h

Lines changed: 108 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#ifndef SPLITCODE_H
22
#define SPLITCODE_H
33

4-
#define SPLITCODE_VERSION "0.31.1"
4+
#define SPLITCODE_VERSION "0.31.2"
55

66
#include <string>
77
#include <iostream>
@@ -1145,7 +1145,7 @@ struct SplitCode {
11451145
std::pair<int16_t,int32_t> location2;
11461146
uint16_t name_id;
11471147
std::string prepend, append;
1148-
bool group1, group2, id1_present, id2_present, rev_comp, special_extraction, use_sub, use_read_sequence;
1148+
bool group1, group2, id1_present, id2_present, rev_comp, rev, comp, special_extraction, use_sub, use_read_sequence;
11491149
};
11501150

11511151
struct TrimTagSummary {
@@ -2808,6 +2808,8 @@ struct SplitCode {
28082808
auto& padding_left = umi.padding_left;
28092809
auto& padding_right = umi.padding_right;
28102810
auto& rev_comp = umi.rev_comp;
2811+
auto& rev = umi.rev;
2812+
auto& comp = umi.comp;
28112813
auto& special_extraction = umi.special_extraction;
28122814
auto& use_sub = umi.use_sub;
28132815
auto& use_read_sequence = umi.use_read_sequence;
@@ -2820,6 +2822,8 @@ struct SplitCode {
28202822
int16_t file1 = -1, file2 = -1;
28212823
int32_t pos1 = -1, pos2 = -1;
28222824
rev_comp = false;
2825+
rev = false;
2826+
comp = false;
28232827
name1_present = false;
28242828
name2_present = false;
28252829
special_extraction = false;
@@ -2835,8 +2839,16 @@ struct SplitCode {
28352839
return false; // malformed
28362840
}
28372841
std::string umi_name = s.substr(umi_open+1,umi_close-umi_open-1);
2838-
// Find tilde at beginning (denoting reverse complement)
2839-
if (umi_name.length() > 1 && umi_name[0] == '~') {
2842+
// Find tilde at beginning (denoting reverse complement, reverse, or complement)
2843+
if (umi_name.length() > 3 && umi_name[0] == '~' && umi_name[1] == 'r' && umi_name[2] == '~') {
2844+
umi_name = umi_name.substr(3);
2845+
rev = true;
2846+
}
2847+
else if (umi_name.length() > 3 && umi_name[0] == '~' && umi_name[1] == 'c' && umi_name[2] == '~') {
2848+
umi_name = umi_name.substr(3);
2849+
comp = true;
2850+
}
2851+
else if (umi_name.length() > 1 && umi_name[0] == '~') {
28402852
umi_name = umi_name.substr(1);
28412853
rev_comp = true;
28422854
}
@@ -3227,7 +3239,7 @@ struct SplitCode {
32273239
auto extract_no_chain = this->extract_no_chain;
32283240
auto& extract_no_chain_set = this->extract_no_chain_set;
32293241
auto& umi_names = this->umi_names;
3230-
auto revcomp = [](const std::string s) {
3242+
auto revcomp = [](const std::string& s) {
32313243
std::string r(s);
32323244
std::transform(s.rbegin(), s.rend(), r.begin(), [](char c) {
32333245
switch(c) {
@@ -3241,15 +3253,43 @@ struct SplitCode {
32413253
});
32423254
return r;
32433255
};
3244-
auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp](const UMI& u, const std::string& extracted_umi) {
3256+
auto rev_ = [](const std::string &s) {
3257+
return std::string{s.rbegin(), s.rend()};
3258+
};
3259+
auto comp_ = [](const std::string &s) {
3260+
std::string result(s);
3261+
std::transform(result.begin(), result.end(), result.begin(),
3262+
[](char c) {
3263+
switch (c) {
3264+
case 'A': return 'T';
3265+
case 'T': return 'A';
3266+
case 'C': return 'G';
3267+
case 'G': return 'C';
3268+
default: return 'N';
3269+
}
3270+
});
3271+
return result;
3272+
};
3273+
auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp, &rev_, &comp_](const UMI& u, const std::string& extracted_umi) {
32453274
bool extract_no_chain_ = extract_no_chain;
32463275
if (extract_no_chain_ && !extract_no_chain_set.empty()) {
32473276
extract_no_chain_ = false;
32483277
if (extract_no_chain_set.find(umi_names[u.name_id]) != extract_no_chain_set.end()) {
32493278
extract_no_chain_ = true;
32503279
}
32513280
}
3252-
umi_data[u.name_id] += extract_no_chain_ && !umi_data[u.name_id].empty() ? "" : (!u.rev_comp ? u.prepend+extracted_umi+u.append : u.prepend+revcomp(extracted_umi)+u.append);
3281+
if (!(extract_no_chain_ && !umi_data[u.name_id].empty())) {
3282+
if (u.rev_comp) {
3283+
umi_data[u.name_id] += u.prepend+revcomp(extracted_umi)+u.append;
3284+
} else if (u.rev) {
3285+
umi_data[u.name_id] += u.prepend+rev_(extracted_umi)+u.append;
3286+
} else if (u.comp) {
3287+
umi_data[u.name_id] += u.prepend+comp_(extracted_umi)+u.append;
3288+
} else {
3289+
umi_data[u.name_id] += u.prepend+extracted_umi+u.append;
3290+
}
3291+
3292+
}
32533293
};
32543294

32553295
const auto& umi_vec_name = umi_name_map.find(tag_name_id) != umi_name_map.end() ? umi_name_map[tag_name_id] : std::vector<UMI>(0);
@@ -3510,7 +3550,7 @@ struct SplitCode {
35103550
auto extract_no_chain = this->extract_no_chain;
35113551
auto& extract_no_chain_set = this->extract_no_chain_set;
35123552
auto& umi_names = this->umi_names;
3513-
auto revcomp = [](const std::string s) {
3553+
auto revcomp = [](const std::string& s) {
35143554
std::string r(s);
35153555
std::transform(s.rbegin(), s.rend(), r.begin(), [](char c) {
35163556
switch(c) {
@@ -3524,15 +3564,43 @@ struct SplitCode {
35243564
});
35253565
return r;
35263566
};
3527-
auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp](const UMI& u, const std::string& extracted_umi) {
3567+
auto rev_ = [](const std::string &s) {
3568+
return std::string{s.rbegin(), s.rend()};
3569+
};
3570+
auto comp_ = [](const std::string &s) {
3571+
std::string result(s);
3572+
std::transform(result.begin(), result.end(), result.begin(),
3573+
[](char c) {
3574+
switch (c) {
3575+
case 'A': return 'T';
3576+
case 'T': return 'A';
3577+
case 'C': return 'G';
3578+
case 'G': return 'C';
3579+
default: return 'N';
3580+
}
3581+
});
3582+
return result;
3583+
};
3584+
auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp, &rev_, &comp_](const UMI& u, const std::string& extracted_umi) {
35283585
bool extract_no_chain_ = extract_no_chain;
35293586
if (extract_no_chain_ && !extract_no_chain_set.empty()) {
35303587
extract_no_chain_ = false;
35313588
if (extract_no_chain_set.find(umi_names[u.name_id]) != extract_no_chain_set.end()) {
35323589
extract_no_chain_ = true;
35333590
}
35343591
}
3535-
umi_data[u.name_id] += extract_no_chain_ && !umi_data[u.name_id].empty() ? "" : (!u.rev_comp ? u.prepend+extracted_umi+u.append : u.prepend+revcomp(extracted_umi)+u.append);
3592+
if (!(extract_no_chain_ && !umi_data[u.name_id].empty())) {
3593+
if (u.rev_comp) {
3594+
umi_data[u.name_id] += u.prepend+revcomp(extracted_umi)+u.append;
3595+
} else if (u.rev) {
3596+
umi_data[u.name_id] += u.prepend+rev_(extracted_umi)+u.append;
3597+
} else if (u.comp) {
3598+
umi_data[u.name_id] += u.prepend+comp_(extracted_umi)+u.append;
3599+
} else {
3600+
umi_data[u.name_id] += u.prepend+extracted_umi+u.append;
3601+
}
3602+
3603+
}
35363604
};
35373605
const auto &u = extract_seq_names_umi;
35383606
auto extract_min_len = u.length_range_start;
@@ -3547,7 +3615,7 @@ struct SplitCode {
35473615
auto extract_no_chain = this->extract_no_chain;
35483616
auto& extract_no_chain_set = this->extract_no_chain_set;
35493617
auto& umi_names = this->umi_names;
3550-
auto revcomp = [](const std::string s) {
3618+
auto revcomp = [](const std::string& s) {
35513619
std::string r(s);
35523620
std::transform(s.rbegin(), s.rend(), r.begin(), [](char c) {
35533621
switch(c) {
@@ -3561,15 +3629,42 @@ struct SplitCode {
35613629
});
35623630
return r;
35633631
};
3564-
auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp](const UMI& u, const std::string& extracted_umi) {
3632+
auto rev_ = [](const std::string &s) {
3633+
return std::string{s.rbegin(), s.rend()};
3634+
};
3635+
auto comp_ = [](const std::string &s) {
3636+
std::string result(s);
3637+
std::transform(result.begin(), result.end(), result.begin(),
3638+
[](char c) {
3639+
switch (c) {
3640+
case 'A': return 'T';
3641+
case 'T': return 'A';
3642+
case 'C': return 'G';
3643+
case 'G': return 'C';
3644+
default: return 'N';
3645+
}
3646+
});
3647+
return result;
3648+
};
3649+
auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp, &rev_, &comp_](const UMI& u, const std::string& extracted_umi) {
35653650
bool extract_no_chain_ = extract_no_chain;
35663651
if (extract_no_chain_ && !extract_no_chain_set.empty()) {
35673652
extract_no_chain_ = false;
35683653
if (extract_no_chain_set.find(umi_names[u.name_id]) != extract_no_chain_set.end()) {
35693654
extract_no_chain_ = true;
35703655
}
35713656
}
3572-
umi_data[u.name_id] += extract_no_chain_ && !umi_data[u.name_id].empty() ? "" : (!u.rev_comp ? u.prepend+extracted_umi+u.append : u.prepend+revcomp(extracted_umi)+u.append);
3657+
if (!(extract_no_chain_ && !umi_data[u.name_id].empty())) {
3658+
if (u.rev_comp) {
3659+
umi_data[u.name_id] += u.prepend+revcomp(extracted_umi)+u.append;
3660+
} else if (u.rev) {
3661+
umi_data[u.name_id] += u.prepend+rev_(extracted_umi)+u.append;
3662+
} else if (u.comp) {
3663+
umi_data[u.name_id] += u.prepend+comp_(extracted_umi)+u.append;
3664+
} else {
3665+
umi_data[u.name_id] += u.prepend+extracted_umi+u.append;
3666+
}
3667+
}
35733668
};
35743669
int i = 0;
35753670
for (auto & pumi : placement_umis) {

0 commit comments

Comments
 (0)