Skip to content

Commit a674cde

Browse files
committed
Version bump to 0.29.2
Changed 32-bit int to 64-bit int when getting number of mapped reads to avoid overflow when displaying progress of how many reads have been mapped Fixed specifying no-chain in the config file (previously, it was not being recognized) Allow no-chain to take in arguments about what should be chained vs not chained.
1 parent 0e0804c commit a674cde

File tree

3 files changed

+63
-23
lines changed

3 files changed

+63
-23
lines changed

func_tests/runtests.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,5 +381,18 @@ echo "@extract 0:3<x[1]>" >> $test_dir/config_mult_extracts.txt
381381

382382
checkcmdoutput "$splitcode --trim-only -c $test_dir/config_mult_extracts.txt --x-only -p $test_dir/test_bound.fq" aec8f4f4691db99827b80a0f21140848
383383

384+
echo "@no-chain" >> $test_dir/config_mult_extracts.txt
385+
checkcmdoutput "$splitcode --trim-only -c $test_dir/config_mult_extracts.txt --x-only -p $test_dir/test_bound.fq" fad7c4fa3cb392c57255d2a8139d4ba0
386+
387+
388+
echo "@extract 0:0<y[1]>" >> $test_dir/config_mult_extracts.txt
389+
echo "@extract 0:3<y[1]>" >> $test_dir/config_mult_extracts.txt
390+
echo "@no-chain y" >> $test_dir/config_mult_extracts.txt
391+
392+
checkcmdoutput "$splitcode --trim-only -c $test_dir/config_mult_extracts.txt --x-only -p $test_dir/test_bound.fq" 3bbf54b385da42b516117a9b1d0bc25d
393+
394+
echo "@no-chain x,y" >> $test_dir/config_mult_extracts.txt
395+
396+
checkcmdoutput "$splitcode --trim-only -c $test_dir/config_mult_extracts.txt --x-only -p $test_dir/test_bound.fq" 676235664622f341a6fcb6b2035642f2
384397

385398

src/ProcessReads.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ int64_t ProcessReads(MasterProcessor& MP, const ProgramOptions& opt) {
6565
std::cerr << std::endl << "done " << std::endl;
6666
}
6767

68-
int nummapped = MP.sc.getNumMapped();
68+
int64_t nummapped = MP.sc.getNumMapped();
6969

7070
if (MP.verbose) {
7171
std::cerr << "* processed " << pretty_num(numreads) << " reads";
@@ -511,7 +511,7 @@ void ReadProcessor::processBuffer() {
511511

512512
if (numreads > 0 && numreads % 1000000 == 0 && mp.verbose) {
513513
numreads = 0; // reset counter
514-
int nummapped = mp.sc.getNumMapped();
514+
int64_t nummapped = mp.sc.getNumMapped();
515515

516516
std::cerr << '\r' << (mp.numreads/1000000) << "M reads processed";
517517
if (!mp.sc.always_assign) {

src/SplitCode.h

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
#ifndef SPLITCODE_H
22
#define SPLITCODE_H
33

4-
#define SPLITCODE_VERSION "0.29.1"
4+
#define SPLITCODE_VERSION "0.29.2"
55

66
#include <string>
77
#include <iostream>
88
#include <vector>
99
#include <map>
1010
#include <unordered_map>
11+
#include <unordered_set>
1112
#include <set>
1213
#include <algorithm>
1314
#include <sstream>
@@ -1428,7 +1429,26 @@ struct SplitCode {
14281429
std::string field;
14291430
std::string value;
14301431
ss >> field >> value;
1431-
if (value.empty()) {
1432+
if (field == "@qtrim-5") {
1433+
this->quality_trimming_5 = true;
1434+
} else if (field == "@qtrim-3") {
1435+
this->quality_trimming_3 = true;
1436+
} else if (field == "@qtrim-pre") {
1437+
this->quality_trimming_pre = true;
1438+
} else if (field == "@qtrim-naive") {
1439+
this->quality_trimming_naive = true;
1440+
} else if (field == "@phred64") {
1441+
this->phred64 = true;
1442+
} else if (field == "@no-chain") {
1443+
this->extract_no_chain = true;
1444+
if (!value.empty()) {
1445+
std::stringstream ss(value);
1446+
std::string extract_val;
1447+
while (std::getline(ss, extract_val, ',')) {
1448+
this->extract_no_chain_set.insert(extract_val);
1449+
}
1450+
}
1451+
} else if (value.empty()) {
14321452
std::cerr << "Error: The file \"" << config_file << "\" contains an invalid line starting with @" << std::endl;
14331453
return false;
14341454
}
@@ -1493,18 +1513,6 @@ struct SplitCode {
14931513
std::cerr << "Error: The file \"" << config_file << "\" specifies an invalid value for @qtrim" << std::endl;
14941514
return false;
14951515
}
1496-
} else if (field == "@qtrim-5") {
1497-
this->quality_trimming_5 = true;
1498-
} else if (field == "@qtrim-3") {
1499-
this->quality_trimming_3 = true;
1500-
} else if (field == "@qtrim-pre") {
1501-
this->quality_trimming_pre = true;
1502-
} else if (field == "@qtrim-naive") {
1503-
this->quality_trimming_naive = true;
1504-
} else if (field == "@phred64") {
1505-
this->phred64 = true;
1506-
} else if (field == "@no-chain") {
1507-
this->extract_no_chain = true;
15081516
}
15091517
continue;
15101518
}
@@ -1762,8 +1770,8 @@ struct SplitCode {
17621770
}
17631771
}
17641772

1765-
int getNumMapped() {
1766-
int nummapped = 0;
1773+
int64_t getNumMapped() {
1774+
int64_t nummapped = 0;
17671775
for (auto& n : idcount) {
17681776
nummapped += n;
17691777
}
@@ -2564,6 +2572,8 @@ struct SplitCode {
25642572
void doUMIExtraction(std::string& seq, int pos, int k, int file, int readLength, std::map<int16_t, std::vector<int32_t>>& umi_seen, std::map<int16_t, std::vector<int32_t>>& umi_seen_copy,
25652573
std::vector<std::string>& umi_data, uint32_t tag_name_id, uint32_t tag_group_id, std::pair<int16_t,int32_t> location = std::make_pair(-1,-1), int64_t tag_id_ = -1) {
25662574
auto extract_no_chain = this->extract_no_chain;
2575+
auto& extract_no_chain_set = this->extract_no_chain_set;
2576+
auto& umi_names = this->umi_names;
25672577
auto revcomp = [](const std::string s) {
25682578
std::string r(s);
25692579
std::transform(s.rbegin(), s.rend(), r.begin(), [](char c) {
@@ -2578,8 +2588,15 @@ struct SplitCode {
25782588
});
25792589
return r;
25802590
};
2581-
auto addToUmiData = [extract_no_chain, &umi_data, &revcomp](const UMI& u, const std::string& extracted_umi) {
2582-
umi_data[u.name_id] += extract_no_chain && !umi_data[u.name_id].empty() ? "" : (!u.rev_comp ? extracted_umi : revcomp(extracted_umi));
2591+
auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp](const UMI& u, const std::string& extracted_umi) {
2592+
bool extract_no_chain_ = extract_no_chain;
2593+
if (extract_no_chain_ && !extract_no_chain_set.empty()) {
2594+
extract_no_chain_ = false;
2595+
if (extract_no_chain_set.find(umi_names[u.name_id]) != extract_no_chain_set.end()) {
2596+
extract_no_chain_ = true;
2597+
}
2598+
}
2599+
umi_data[u.name_id] += extract_no_chain_ && !umi_data[u.name_id].empty() ? "" : (!u.rev_comp ? extracted_umi : revcomp(extracted_umi));
25832600
};
25842601

25852602
const auto& umi_vec_name = umi_name_map.find(tag_name_id) != umi_name_map.end() ? umi_name_map[tag_name_id] : std::vector<UMI>(0);
@@ -2838,6 +2855,8 @@ struct SplitCode {
28382855

28392856
void doUMIExtractionSeqNames(const std::string& identified_tags_seq, std::vector<std::string>& umi_data) {
28402857
auto extract_no_chain = this->extract_no_chain;
2858+
auto& extract_no_chain_set = this->extract_no_chain_set;
2859+
auto& umi_names = this->umi_names;
28412860
auto revcomp = [](const std::string s) {
28422861
std::string r(s);
28432862
std::transform(s.rbegin(), s.rend(), r.begin(), [](char c) {
@@ -2852,8 +2871,15 @@ struct SplitCode {
28522871
});
28532872
return r;
28542873
};
2855-
auto addToUmiData = [extract_no_chain, &umi_data, &revcomp](const UMI& u, const std::string& extracted_umi) {
2856-
umi_data[u.name_id] += extract_no_chain && !umi_data[u.name_id].empty() ? "" : (!u.rev_comp ? extracted_umi : revcomp(extracted_umi));
2874+
auto addToUmiData = [extract_no_chain, &extract_no_chain_set, &umi_names, &umi_data, &revcomp](const UMI& u, const std::string& extracted_umi) {
2875+
bool extract_no_chain_ = extract_no_chain;
2876+
if (extract_no_chain_ && !extract_no_chain_set.empty()) {
2877+
extract_no_chain_ = false;
2878+
if (extract_no_chain_set.find(umi_names[u.name_id]) != extract_no_chain_set.end()) {
2879+
extract_no_chain_ = true;
2880+
}
2881+
}
2882+
umi_data[u.name_id] += extract_no_chain_ && !umi_data[u.name_id].empty() ? "" : (!u.rev_comp ? extracted_umi : revcomp(extracted_umi));
28572883
};
28582884
const auto &u = extract_seq_names_umi;
28592885
auto extract_min_len = u.length_range_start;
@@ -3780,7 +3806,7 @@ struct SplitCode {
37803806
splitcode_u_map__<std::vector<uint16_t>, int, VectorHasher> idmapinv16;
37813807
splitcode_u_map__<std::vector<uint32_t>, int, VectorHasher> subassign_idmapinv;
37823808
splitcode_u_map__<std::vector<uint16_t>, int, VectorHasher> subassign_idmapinv16;
3783-
std::vector<uint32_t> idcount;
3809+
std::vector<int64_t> idcount;
37843810
std::unordered_map<std::vector<uint32_t>, std::string, VectorHasher> idmapinv_keep;
37853811
std::unordered_map<std::vector<uint32_t>, int, VectorHasher> idmapinv_discard;
37863812
std::unordered_map<std::vector<uint32_t>, std::string, VectorHasher> groupmapinv_keep;
@@ -3803,6 +3829,7 @@ struct SplitCode {
38033829
std::string barcode_prefix;
38043830
std::string trim_5_str, trim_3_str;
38053831
std::string extract_str;
3832+
std::unordered_set<std::string> extract_no_chain_set;
38063833
std::string filter_length_str;
38073834
std::vector<std::pair<int,int>> trim_5_3_vec;
38083835
std::vector<std::pair<int,int>> filter_length_vec;

0 commit comments

Comments
 (0)