Skip to content

Commit e5cebe4

Browse files
committed
enable simplified mode if processing big FASTQ for many mutations
1 parent b6bfd92 commit e5cebe4

File tree

5 files changed

+54
-3
lines changed

5 files changed

+54
-3
lines changed

src/mutation.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ Match* Mutation::searchInRead(Read* r, char* simplifiedBuf, int distanceReq, int
110110
}
111111

112112
vector<Mutation> Mutation::parseCsv(string filename) {
113+
int num = 0;
113114
if(GlobalSettings::verbose) {
114115
cerr << "Parsing target mutations from CSV file: " << filename << endl;
115116
}
@@ -160,7 +161,8 @@ vector<Mutation> Mutation::parseCsv(string filename) {
160161
else {
161162
mutations.push_back(mut);
162163
if(GlobalSettings::verbose) {
163-
cerr <<name<<" "<<left<<" "<<center<<" "<<right<< " "<<chr <<endl;
164+
num++;
165+
cerr <<num<<", "<<name<<" "<<left<<" "<<center<<" "<<right<< " "<<chr <<endl;
164166
}
165167
}
166168
}
@@ -172,6 +174,7 @@ vector<Mutation> Mutation::parseCsv(string filename) {
172174
}
173175

174176
vector<Mutation> Mutation::parseBuiltIn() {
177+
int num = 0;
175178
if(GlobalSettings::verbose) {
176179
cerr << "Using built-in target mutations" << endl;
177180
}
@@ -198,7 +201,8 @@ vector<Mutation> Mutation::parseBuiltIn() {
198201
Mutation mut(name, left, center, right, chr);
199202
mutations.push_back(mut);
200203
if(GlobalSettings::verbose) {
201-
cerr <<name<<" "<<left<<" "<<center<<" "<<right<< " "<<chr <<endl;
204+
num++;
205+
cerr <<num<<", "<<name<<" "<<left<<" "<<center<<" "<<right<< " "<<chr <<endl;
202206
}
203207
}
204208
if(mutations.size() <= 0){
@@ -208,6 +212,7 @@ vector<Mutation> Mutation::parseBuiltIn() {
208212
}
209213

210214
vector<Mutation> Mutation::parseVcf(string vcfFile, string refFile) {
215+
int num = 0;
211216
if(GlobalSettings::verbose) {
212217
cerr << "Parsing target mutations from VCF file: " << vcfFile << endl;
213218
cerr << "With reference genome: " << refFile << endl;
@@ -272,7 +277,8 @@ vector<Mutation> Mutation::parseVcf(string vcfFile, string refFile) {
272277
mut.setSmallIndel(true);
273278
mutations.push_back(mut);
274279
if(GlobalSettings::verbose) {
275-
cerr <<name<<" "<<left<<" "<<center<<" "<<right<< " "<<chrom <<endl;
280+
num++;
281+
cerr <<num<<", "<<name<<" "<<left<<" "<<center<<" "<<right<< " "<<chrom <<endl;
276282
}
277283
}
278284
if(mutations.size() <= 0){

src/mutscan.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "sescanner.h"
66
#include "pescanner.h"
77
#include "util.h"
8+
#include "globalsettings.h"
89

910
MutScan::MutScan(string mutationFile, string refFile, string read1File, string read2File, string html, int threadNum){
1011
mRead1File = read1File;
@@ -28,6 +29,35 @@ bool MutScan::scan(){
2829
}
2930
}
3031

32+
void MutScan::evaluateSimplifiedMode(string r1file, string r2file, int mutationNum) {
33+
if(mutationNum < 10000) {
34+
GlobalSettings::setSimplifiedMode(false);
35+
return ;
36+
}
37+
// use another ifstream to not affect current reader
38+
ifstream is(r1file);
39+
is.seekg (0, is.end);
40+
long bytes = is.tellg();
41+
42+
if(r2file != "")
43+
bytes *= 2;
44+
45+
// here we consider gz file for FASTQ has a compression rate of 3
46+
if(ends_with(r1file, ".gz"))
47+
bytes *= 3;
48+
49+
// enable simplified mode for over 50G FASTQ + 10,000 mutations
50+
if(bytes > 50L * 1024L * 1024L * 1024L) {
51+
if(GlobalSettings::verbose)
52+
cerr << "Simplified mode is enabled automatically..."<<endl;
53+
GlobalSettings::setSimplifiedMode(true);
54+
}
55+
else {
56+
GlobalSettings::setSimplifiedMode(false);
57+
}
58+
59+
}
60+
3161
bool MutScan::scanPairEnd(){
3262
FastqReader reader1(mRead1File);
3363
FastqReader reader2(mRead2File);

src/mutscan.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class MutScan{
1515
bool scan();
1616
void textReport(vector<Mutation>& mutationList, vector<Match*> *mutationMatches);
1717
void htmlReport(vector<Mutation>& mutationList, vector<Match*> *mutationMatches);
18+
static void evaluateSimplifiedMode(string r1file, string r2file, int mutationNum);
1819

1920
private:
2021
bool scanPairEnd();

src/pescanner.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <memory.h>
1010
#include "util.h"
1111
#include "globalsettings.h"
12+
#include "mutscan.h"
1213

1314
PairEndScanner::PairEndScanner(string mutationFile, string refFile, string read1File, string read2File, string html, int threadNum){
1415
mRead1File = read1File;
@@ -48,6 +49,12 @@ bool PairEndScanner::scan(){
4849
else
4950
mutationList = Mutation::parseBuiltIn();
5051

52+
if(GlobalSettings::verbose)
53+
cerr << "Scanning "<< mutationList.size() << " mutations..."<<endl;
54+
55+
if(GlobalSettings::simplifiedModeToEvaluate)
56+
MutScan::evaluateSimplifiedMode(mRead1File, mRead2File, mutationList.size());
57+
5158
if(!GlobalSettings::legacyMode && mRollingHash==NULL){
5259
mRollingHash = new RollingHash();
5360
mRollingHash->initMutations(mutationList);

src/sescanner.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <memory.h>
1010
#include "util.h"
1111
#include "globalsettings.h"
12+
#include "mutscan.h"
1213

1314
SingleEndScanner::SingleEndScanner(string mutationFile, string refFile, string read1File, string html, int threadNum){
1415
mRead1File = read1File;
@@ -46,6 +47,12 @@ bool SingleEndScanner::scan(){
4647
else
4748
mutationList = Mutation::parseBuiltIn();
4849

50+
if(GlobalSettings::verbose)
51+
cerr << "Scanning "<< mutationList.size() << " mutations..."<<endl;
52+
53+
if(GlobalSettings::simplifiedModeToEvaluate)
54+
MutScan::evaluateSimplifiedMode(mRead1File, mRead2File, mutationList.size());
55+
4956
if(!GlobalSettings::legacyMode && mRollingHash==NULL){
5057
mRollingHash = new RollingHash();
5158
mRollingHash->initMutations(mutationList);

0 commit comments

Comments
 (0)