Skip to content

Commit 0e0804c

Browse files
committed
version 0.29.1: allow shorter final barcodes
1 parent 8f32053 commit 0e0804c

File tree

4 files changed

+32
-4
lines changed

4 files changed

+32
-4
lines changed

func_tests/runtests.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,10 @@ checkcmdoutput "$splitcode --trim-only -b AT,TT --loc-names -d 1 -p --mod-names
357357
checkcmdoutput "$splitcode --trim-only -b AT,TT,GGGGG -i a,a,b --loc-names -d 1 -p --mod-names $test_dir/test_bound.fq" ca97fa58494f66bd3395ac329e5a95cf
358358
checkcmdoutput "$splitcode --trim-only -b AT,TT,GGGGG -i a,a,a --loc-names -d 1 -p --mod-names $test_dir/test_bound.fq" ca97fa58494f66bd3395ac329e5a95cf
359359

360+
# Shorten assigned final barcode
361+
362+
checkcmdoutput "$splitcode -m /dev/null -b AT,TT -d 1 -p --mod-names --bclen=15 --bc-names $test_dir/test_bound.fq" 9e7b7980f24349c5c334e1fc02950af8
363+
360364
# Try them with FASTA file
361365

362366
echo ">read1

src/SplitCode.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#ifndef SPLITCODE_H
22
#define SPLITCODE_H
33

4-
#define SPLITCODE_VERSION "0.29.0"
4+
#define SPLITCODE_VERSION "0.29.1"
55

66
#include <string>
77
#include <iostream>
@@ -66,6 +66,7 @@ struct SplitCode {
6666
summary_n_reads_filtered = 0;
6767
summary_n_reads_filtered_assigned = 0;
6868
max_seq_len = 0;
69+
fake_bc_len_offset = 0;
6970
setNFiles(0);
7071
early_termination_maxFindsG = -1;
7172
}
@@ -74,7 +75,7 @@ struct SplitCode {
7475
std::string trim_5_str = "", std::string trim_3_str = "", std::string extract_str = "", bool extract_no_chain = false, std::string barcode_prefix = "",
7576
std::string filter_length_str = "", bool quality_trimming_5 = false, bool quality_trimming_3 = false,
7677
bool quality_trimming_pre = false, bool quality_trimming_naive = false, int quality_trimming_threshold = -1, bool phred64 = false,
77-
bool write_tag_location_information = false, std::vector<size_t> sub_assign_vec = std::vector<size_t>(0)) {
78+
bool write_tag_location_information = false, std::vector<size_t> sub_assign_vec = std::vector<size_t>(0), int fake_bc_len_override = 0) {
7879
init = false;
7980
extract_seq_names = false;
8081
discard_check = false;
@@ -102,6 +103,10 @@ struct SplitCode {
102103
this->quality_trimming_pre = quality_trimming_pre;
103104
this->quality_trimming_naive = quality_trimming_naive;
104105
this->quality_trimming_threshold = quality_trimming_threshold;
106+
this->fake_bc_len_offset = 0;
107+
if (fake_bc_len_override != 0) {
108+
this->fake_bc_len_offset = fake_bc_len_override-((int)FAKE_BARCODE_LEN);
109+
}
105110
this->phred64 = phred64;
106111
this->write_tag_location_information = write_tag_location_information;
107112
this->sub_assign_vec = sub_assign_vec;
@@ -3630,7 +3635,7 @@ struct SplitCode {
36303635
}
36313636

36323637
int getBarcodeLength() {
3633-
return FAKE_BARCODE_LEN+barcode_prefix.length();
3638+
return (FAKE_BARCODE_LEN+fake_bc_len_offset)+barcode_prefix.length();
36343639
}
36353640

36363641
void setNumReads(size_t num_reads, size_t max_num_reads = 0) {
@@ -3848,6 +3853,7 @@ struct SplitCode {
38483853
int curr_barcode_mapping_i;
38493854
int curr_umi_id_i;
38503855
size_t max_seq_len; // Length of longest tag sequence excluding homopolymers
3856+
int fake_bc_len_offset;
38513857
static const int MAX_K = 32;
38523858
static const size_t FAKE_BARCODE_LEN = 16;
38533859
static const char QUAL = 'K';

src/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ struct ProgramOptions {
1616
int quality_trimming_threshold;
1717
int64_t max_num_reads;
1818
int compress_level;
19+
int bclen;
1920
bool extract_no_chain;
2021
bool output_fasta;
2122
bool no_output;
@@ -92,6 +93,7 @@ struct ProgramOptions {
9293
quality_trimming_threshold(-1),
9394
max_num_reads(0),
9495
compress_level(1),
96+
bclen(0),
9597
extract_no_chain(false),
9698
output_fasta(false),
9799
no_output(false),

src/main.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ void usage() {
143143
<< "-s, --summary File where summary statistics will be written to" << endl
144144
<< "-h, --help Displays usage information" << endl
145145
<< " --assign Assign reads to a final barcode sequence identifier based on tags present" << endl
146+
<< " --bclen The length of the final barcode sequence identifier (default: 16)" << endl
146147
<< " --inleaved Specifies that input is an interleaved FASTQ file" << endl
147148
<< " --remultiplex Turn on remultiplexing mode" << endl
148149
<< " --version Prints version number" << endl
@@ -259,6 +260,7 @@ void ParseOptions(int argc, char **argv, ProgramOptions& opt) {
259260
{"sam-tags", required_argument, 0, 'M'},
260261
{"sub-assign", required_argument, 0, 'X'},
261262
{"compress", required_argument, 0, 'C'},
263+
{"bclen", required_argument, 0, '9'},
262264
{0,0,0,0}
263265
};
264266

@@ -441,6 +443,10 @@ void ParseOptions(int argc, char **argv, ProgramOptions& opt) {
441443
stringstream(optarg) >> opt.trim_3_str;
442444
break;
443445
}
446+
case '9': {
447+
stringstream(optarg) >> opt.bclen;
448+
break;
449+
}
444450
case 'w': {
445451
stringstream(optarg) >> opt.filter_length_str;
446452
break;
@@ -624,6 +630,16 @@ bool CheckOptions(ProgramOptions& opt, SplitCode& sc) {
624630
<< ", but only " << n << " cores on the machine" << endl;
625631
}
626632
}
633+
if (opt.bclen != 0) {
634+
if (!opt.barcode_prefix.empty()) {
635+
cerr << ERROR_STR << " Cannot specify --prefix with --bclen" << endl;
636+
ret = false;
637+
}
638+
if (opt.bclen >= 32 || opt.bclen < 2) {
639+
cerr << ERROR_STR << " --bclen must have value between 2 and 32 " << endl;
640+
ret = false;
641+
}
642+
}
627643
if (opt.remultiplex && opt.files.size() != 1) {
628644
cerr << ERROR_STR << " A single batch file must be supplied (for remultiplexing)" << endl;
629645
ret = false;
@@ -1297,7 +1313,7 @@ int main(int argc, char *argv[]) {
12971313
ProgramOptions opt;
12981314
ParseOptions(argc,argv,opt);
12991315
SplitCode sc(opt.nfiles, opt.summary_file, opt.trim_only, opt.disable_n, opt.trim_5_str, opt.trim_3_str, opt.extract_str, opt.extract_no_chain, opt.barcode_prefix, opt.filter_length_str,
1300-
opt.quality_trimming_5, opt.quality_trimming_3, opt.quality_trimming_pre, opt.quality_trimming_naive, opt.quality_trimming_threshold, opt.phred64, opt.write_locations, opt.sub_assign_vec);
1316+
opt.quality_trimming_5, opt.quality_trimming_3, opt.quality_trimming_pre, opt.quality_trimming_naive, opt.quality_trimming_threshold, opt.phred64, opt.write_locations, opt.sub_assign_vec, opt.bclen);
13011317
bool checkopts = CheckOptions(opt, sc);
13021318
if (!checkopts) {
13031319
usage();

0 commit comments

Comments
 (0)