Skip to content

Commit 800ae6d

Browse files
authored
Merge pull request #66 from calkan/master
CRAM support added w/samtools. Multithreaded computation is added for…
2 parents b0281f2 + fba734b commit 800ae6d

File tree

6 files changed

+100
-16
lines changed

6 files changed

+100
-16
lines changed

src/GenomeCopyNumber.cpp

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ GenomeCopyNumber::GenomeCopyNumber(void)
3333
sex_="";
3434
SeekingSubc_ = false;
3535
isMappUsed_=false;
36-
totalNumberOfPairs_=0;
36+
totalNumberOfPairs_=0;
3737
normalNumberOfPairs_=0;
3838
}
3939

@@ -53,7 +53,7 @@ void GenomeCopyNumber::readCopyNumber(std::string const& mateFileName ,std::stri
5353
if ((inputFormat.compare("pileup")==0 || inputFormat.compare("SAMtools pileup")==0)) {
5454
readNumber = getReadNumberFromPileup(mateFileName);
5555
} else {
56-
readNumber = getLineNumber(mateFileName, pathToSamtools_, pathToSambamba_, SambambaThreads_);
56+
readNumber = getLineNumber(mateFileName, pathToReference_, pathToSamtools_, pathToSambamba_, SambambaThreads_);
5757
}
5858
cout << "\t read number:\t" << readNumber << "\n";
5959
cout << "\t coefficientOfVariation:\t" << coefficientOfVariation << "\n";
@@ -273,7 +273,7 @@ void GenomeCopyNumber::fillMyHash(std::string const& mateFileName ,std::string c
273273
long normalCount = 0;
274274
int bin = 0;
275275

276-
cout << "..Starting reading "<< mateFileName << "\n";
276+
cout << "..[genomecopynumber] Starting reading "<< mateFileName << "\n";
277277
//if (mateFileName.substr(mateFileName.size()-1,3).compare(".gz")==0) {
278278
// igzstream in( mateFileName );
279279
// if ( ! in.good()) {
@@ -310,24 +310,55 @@ void GenomeCopyNumber::fillMyHash(std::string const& mateFileName ,std::string c
310310

311311
string myInputFormat=inputFormat_str;
312312
if (mateFileName.substr(mateFileName.size()-3,3).compare(".gz")!=0) {
313+
if (mateFileName.substr(mateFileName.size()-4,4).compare(".bam")==0) {
313314
if (pathToSambamba_ != "")
314315
{
315316
command = pathToSambamba_ + " view -t " + SambambaThreads_ + " " + mateFileName;
316317
myInputFormat="sam"; //will try to use existing sambamba
317-
cout << "..sambamba should be installed to be able to read BAM files; will use the following command for sambamba: "<<pathToSambamba_ + " view -t " + SambambaThreads_ + " " + mateFileName << "\n";
318+
//cout << "..sambamba should be installed to be able to read BAM files; will use the following command for sambamba: "<<pathToSambamba_ + " view -t " + SambambaThreads_ + " " + mateFileName << "\n";
319+
cout << "..sambamba should be installed to be able to read BAM files; will use the following command for sambamba: "<< command << "\n";
318320
}
319321
else
320322
{
321-
command = pathToSamtools_ + " view "+mateFileName;
323+
command = pathToSamtools_ + " view -@ " + SambambaThreads_ + " " +mateFileName;
322324
myInputFormat="sam"; //will try to use existing samtools
323-
cout << "..samtools should be installed to be able to read BAM files; will use the following command for samtools: "<<pathToSamtools_ + " view "+mateFileName<<"\n";
325+
//cout << "..samtools should be installed to be able to read BAM files; will use the following command for samtools: "<<pathToSamtools_ + " view "+mateFileName<<"\n";
326+
cout << "..samtools should be installed to be able to read BAM files; will use the following command for samtools: "<< command <<"\n";
324327
}
328+
}
329+
else if (mateFileName.substr(mateFileName.size()-5,5).compare(".cram")==0) {
330+
if (pathToReference_ == "")
331+
{
332+
cout << "..Reference FASTA file (config.fastaFile) should be given to be able to read CRAM files\n";
333+
exit (1);
334+
}
335+
if (pathToSambamba_ != "")
336+
{
337+
/* calkan - sambamba is using a very old version of htslib. CRAM support is buggy */
338+
cout << "Sambamba has a bug in reading CRAM files. Please use samtools instead.\n";
339+
exit (1);
340+
/*
341+
command = pathToSambamba_ + " view -t " + SambambaThreads_ + " -C -T " + pathToReference_ + " " + mateFileName;
342+
myInputFormat="sam"; //will try to use existing sambamba
343+
// cout << "..sambamba should be installed to be able to read BAM files; will use the following command for sambamba: "<<pathToSambamba_ + " view -t " + SambambaThreads_ + " " + mateFileName << "\n";
344+
cout << "..sambamba should be installed to be able to read CRAM files; will use the following command for sambamba: "<< command << "\n";
345+
*/
346+
}
347+
else
348+
{
349+
command = pathToSamtools_ + " view -T "+ pathToReference_ + " -@ " + SambambaThreads_ + " " +mateFileName;
350+
myInputFormat="sam"; //will try to use existing samtools
351+
//cout << "..samtools should be installed to be able to read CRAM files; will use the following command for samtools: "<<pathToSamtools_ + " view "+mateFileName<<"\n";
352+
cout << "..samtools should be installed to be able to read CRAM files; will use the following command for samtools: "<< command << "\n";
353+
}
354+
}
325355
}
326356
else {
327357
command = "gzip -cd "+mateFileName;
328358
}
329359

330360
inputFormat = getInputFormat(myInputFormat);
361+
331362
stream =
332363
#if defined(_WIN32)
333364
_popen(command.c_str(), "r");
@@ -4261,8 +4292,13 @@ int GenomeCopyNumber::focusOnCapture (std::string const& captureFile) {
42614292
return int(minRegion);
42624293
}
42634294

4264-
void GenomeCopyNumber::setSamtools(std::string const& pathToSamtools) {
4295+
void GenomeCopyNumber::setSamtools(std::string const& pathToSamtools, std::string const& SambambaThreads) {
42654296
pathToSamtools_=pathToSamtools;
4297+
SambambaThreads_ = SambambaThreads;
4298+
}
4299+
4300+
void GenomeCopyNumber::setReference(std::string const& pathToReference) {
4301+
pathToReference_=pathToReference;
42664302
}
42674303

42684304
void GenomeCopyNumber::setSambamba(std::string const& pathToSambamba, std::string const& SambambaThreads) {

src/GenomeCopyNumber.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,9 @@ class GenomeCopyNumber
118118
void setBAFtrue();
119119
void setNormalContamination(float normalContamination) ;
120120
void setAllNormal ();
121-
void setSamtools(std::string const& pathToSamtools);
121+
void setSamtools(std::string const& pathToSamtools, std::string const& SambambaThreads_);
122122
void setSambamba(std::string const& pathToSambamba, std::string const& SambambaThreads_);
123+
void setReference(std::string const& pathToReference);
123124
bool ifHasBAF();
124125
void setSex(std::string sex);
125126
void setSeekSubclones(bool seekSubclones);
@@ -162,6 +163,7 @@ class GenomeCopyNumber
162163
std::string sex_;
163164
std::string pathToSamtools_;
164165
std::string pathToSambamba_;
166+
std::string pathToReference_;
165167
std::string SambambaThreads_;
166168
};
167169
#endif

src/freec

-8.22 MB
Binary file not shown.

src/main.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,9 @@ int main(int argc, char *argv[])
253253

254254
string pathToSambamba = (std::string)cf.Value("general","sambamba","");
255255
string SambambaThreads = "";
256+
257+
SambambaThreads = (std::string)cf.Value("general","maxThreads",""); /* initialize SambambaThreads using maxThreads -- calkan */
258+
256259
if (pathToSambamba != "") {
257260
SambambaThreads = (std::string)cf.Value("general","SambambaThreads","");
258261
if (SambambaThreads == "") {
@@ -262,7 +265,7 @@ int main(int argc, char *argv[])
262265
}
263266
}
264267

265-
bool has_window = cf.hasValue("general","window");
268+
bool has_window = cf.hasValue("general","window");
266269
int window = (int)cf.Value("general","window",NA);
267270
bool ifTargeted = cf.hasValue("target","captureRegions");
268271

@@ -792,16 +795,18 @@ int main(int argc, char *argv[])
792795
}
793796

794797
GenomeCopyNumber sampleCopyNumber;
795-
sampleCopyNumber.setSamtools(pathToSamtools);
798+
sampleCopyNumber.setSamtools(pathToSamtools, SambambaThreads);
796799
sampleCopyNumber.setSambamba(pathToSambamba, SambambaThreads);
800+
sampleCopyNumber.setReference(fastaFile);
797801
sampleCopyNumber.setWESanalysis(WESanalysis);
798802
sampleCopyNumber.setmakingPileup(makingPileup);
799803

800804
sampleCopyNumber.setIfLogged(logLogNorm);
801805

802806
GenomeCopyNumber controlCopyNumber;
803-
controlCopyNumber.setSamtools(pathToSamtools);
807+
controlCopyNumber.setSamtools(pathToSamtools, SambambaThreads);
804808
controlCopyNumber.setSambamba(pathToSambamba, SambambaThreads);
809+
controlCopyNumber.setReference(fastaFile);
805810
controlCopyNumber.setWESanalysis(WESanalysis);
806811
controlCopyNumber.setmakingPileup(makingPileup);
807812
controlCopyNumber.setIfLogged(logLogNorm);

src/myFunc.cpp

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ unsigned long sum(const std::vector<int>& data) {
352352
return sum;
353353
}
354354

355-
long getLineNumber(std::string const& fileName, const std::string& pathToSamtools, const std::string& pathToSambamba, const std::string& SambambaThreads) {
355+
long getLineNumber(std::string const& fileName, std::string const& refFileName, const std::string& pathToSamtools, const std::string& pathToSambamba, const std::string& SambambaThreads) {
356356
string line ;
357357
long count = 0;
358358
FILE *stream;
@@ -374,7 +374,8 @@ long getLineNumber(std::string const& fileName, const std::string& pathToSamtool
374374
#else
375375
pclose(stream);
376376
#endif
377-
} else if (fileName.substr(fileName.size()-4,4).compare(".bam")==0) {
377+
}
378+
else if (fileName.substr(fileName.size()-4,4).compare(".bam")==0) {
378379
string command = "";
379380
if (pathToSambamba != "")
380381
{
@@ -384,7 +385,7 @@ long getLineNumber(std::string const& fileName, const std::string& pathToSamtool
384385
}
385386
else
386387
{
387-
command = pathToSamtools + " view "+fileName;
388+
command = pathToSamtools + " view -@ "+ SambambaThreads + " " + fileName;
388389
//myInputFormat="sam"; //will try to use existing samtools
389390
cout << "..samtools should be installed to be able to read BAM files\n";
390391
}
@@ -403,7 +404,47 @@ long getLineNumber(std::string const& fileName, const std::string& pathToSamtool
403404
#else
404405
pclose(stream);
405406
#endif
406-
}else {
407+
}
408+
else if (fileName.substr(fileName.size()-5,5).compare(".cram")==0) {
409+
string command = "";
410+
if (refFileName == "")
411+
{
412+
cout << "..Reference FASTA file (config.fastaFile) should be given to be able to read CRAM files\n";
413+
exit (1);
414+
}
415+
if (pathToSambamba != "")
416+
{
417+
/* calkan - sambamba is using a very old version of htslib. CRAM support is buggy */
418+
cout << "Sambamba has a bug in reading CRAM files. Please use samtools instead.\n";
419+
exit (1);
420+
/*
421+
command = pathToSambamba + " view -t " + SambambaThreads + " -C -T " + refFileName + " " +fileName;
422+
//myInputFormat="sam"; //will try to use existing samtools
423+
cout << "..sambamba should be installed to be able to read CRAM files\n"; */
424+
}
425+
else
426+
{
427+
command = pathToSamtools + " view -T "+ refFileName + " -@" + SambambaThreads + " "+ fileName;
428+
//myInputFormat="sam"; //will try to use existing samtools
429+
cout << "..samtools should be installed to be able to read CRAM files\n";
430+
}
431+
stream =
432+
#if defined(_WIN32)
433+
_popen(command.c_str(), "r");
434+
#else
435+
popen(command.c_str(), "r");
436+
#endif
437+
438+
while ( fgets(buffer, MAX_BUFFER, stream) != NULL ) {
439+
count++;
440+
}
441+
#if defined(_WIN32)
442+
_pclose(stream);
443+
#else
444+
pclose(stream);
445+
#endif
446+
}
447+
else {
407448
ifstream file(fileName.c_str()) ;
408449
while( getline( file, line ) ) count++ ;
409450
file.close();

src/myFunc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ float get_iqr(const std::vector<float>& data);
8585
void readFileWithGenomeInfo(const std::string &chrLenFileName, std::vector<std::string>& names, std::vector<int>& lengths);
8686
void readChrNamesInBed(const std::string &targetBed, std::vector<std::string>&names_bed);
8787
unsigned long sum(const std::vector<int>& data);
88-
long getLineNumber(std::string const& file, const std::string& pathToSamtools, const std::string& pathToSambamba, const std::string& SambambaThreads);
88+
long getLineNumber(std::string const& file, std::string const& refFileName, const std::string& pathToSamtools, const std::string& pathToSambamba, const std::string& SambambaThreads);
8989
long getReadNumberFromPileup(std::string const& file);
9090
int factorial (int num);
9191
int get_max_index(const std::vector<float>& data);

0 commit comments

Comments
 (0)