Skip to content

Commit a45206e

Browse files
committed
Fixing bugs: BAF fit
1 parent fea52d3 commit a45206e

File tree

9 files changed

+61
-31
lines changed

9 files changed

+61
-31
lines changed

src/ChrCopyNumber.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,11 @@ class ChrCopyNumber
2727

2828
void addBAFinfo(SNPinGenome & snpingenome,int indexSNP);
2929

30+
void fillInRatio(bool islog);
3031
void calculateRatio(ChrCopyNumber control, float normalizationConst) ;
3132
void recalculateRatio (float constant);
32-
void recalculateRatioWithContam(float contamination, float normGenytype);
33+
void recalculateLogRatio (float constant) ;
34+
void recalculateRatioWithContam(float contamination, float normGenytype, bool isLogged);
3335
void recalculateRatio(ChrCopyNumber control);
3436
void calculateRatio(ChrCopyNumber control, double a0, double a1);
3537
void calculateRatio(ChrCopyNumber control, const double * a, const int degree);
@@ -39,7 +41,7 @@ class ChrCopyNumber
3941
double calculateXiSum(int ploidy, std::map <float,float> &sds, std::map <float,float> &meds);
4042
double calculateXiSum(int ploidy, std::map <float,float> &sds);
4143
void calculateCopyNumberMedian(); //create median profiles using 'bpfinal_' and store them in medianProfile_, info about medians themselves is stored in medianValues_ and about SD in sd_, lengths of fragments in bpLengths_
42-
void calculateCopyNumberMedian(int ploidy, int minCNAlength, bool noisyData, bool CompleteGenomicsData); //create median profiles as calculateCopyNumberMedian(), but merges close regions (roundByPloidy(median))
44+
void calculateCopyNumberMedian(int ploidy, int minCNAlength, bool noisyData, bool CompleteGenomicsData, bool isLogged); //create median profiles as calculateCopyNumberMedian(), but merges close regions (roundByPloidy(median))
4345
void recalcFlanksForIndeces (int i, int j);
4446
void recalcFlanks(int telo_centromeric_flanks, int minNumberOfWindows); //merge short notNA-segments around NA-segments
4547

@@ -52,7 +54,7 @@ class ChrCopyNumber
5254

5355
void deleteFlanks(int telo_centromeric_flanks);
5456
void deleteFragment(int i) ;
55-
57+
int removeLargeExons(float threshold);
5658

5759
std::string getGeneNameAtBin(int i);
5860
float getValueAt(int i);

src/GenomeCopyNumber.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,15 @@ class GenomeCopyNumber
3535
int processRead(InputFormat inputFormat, MateOrientation matesOrientation, const char* line_buffer, int& prevInd, std::string targetBed = "", std::string mateFileName = "");
3636
int processReadWithBowtie(std::string const& inputFormat, std::string const& matesOrientation,std::string const line,std::string const line2);
3737
int focusOnCapture (std::string const& captureFile);
38+
float removeLargeExons(float iqrToKeep);
3839
void initCopyNumber(std::string const& chrLenFileName, int windowSize , int step, std::string targetBed);
3940
void finishCopyNumber(long normalCount);
4041
void addBAFinfo(SNPinGenome & snpingenome);
4142
void removeLowReadCountWindows(GenomeCopyNumber & controlCopyNumber, int RCThresh);
4243
void removeLowReadCountWindowsFromControl (int RCThresh);
4344

44-
int calculateRatio( GenomeCopyNumber & controlCopyNumber, int degree, bool intercept,bool logLogNorm) ;
45+
int fillInRatio();
46+
int calculateRatio( GenomeCopyNumber & controlCopyNumber, int degree, bool intercept) ;
4547
void calculateRatioUsingCG( GenomeCopyNumber & controlCopyNumber) ;
4648
void calculateRatioUsingCG_Regression( GenomeCopyNumber & controlCopyNumber) ;
4749
float calculateNormalizationConstant(GenomeCopyNumber & controlCopyNumber);
@@ -123,7 +125,9 @@ class GenomeCopyNumber
123125

124126
int findWinNumber(int position, std::string myName, std::string const& matefile);
125127
void setWESanalysis(bool WESgiven);
126-
void setmakingPileup(bool makingPileup_given);
128+
void setmakingPileup(bool makingPileup_given);
129+
void setIfLogged(bool);
130+
127131
double Percentage_GenomeExplained(int &);
128132
long double calculateRSS(int ploidy);
129133
bool isMappUsed();
@@ -136,6 +140,8 @@ class GenomeCopyNumber
136140
bool makingPileup;
137141
bool SeekingSubc_;
138142
bool isMappUsed_;
143+
bool isRatioLogged_;
144+
139145
void fillMyHash(std::string const& mateFileName , std::string const& inputFormat, std::string const& matesOrientation, int windowSize, int step, std::string targetBed = "");
140146
int windowSize_;
141147
int step_;

src/SNPinGenome.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,9 @@ void SNPinGenome::readMateFile(std::string const& mateFile, std::string const& i
189189

190190
void SNPinGenome::readMateFile(std::string const& mateFile, std::string const& inputFormat, int minimalTotalLetterCountPerPosition, int minimalQualityPerPosition, GenomeCopyNumber& genomeCopyNumber, std::string const& chrLenFileName, int windowSize, int step, std::string targetBed) {
191191
// must perform partly GenomeCopyNumber::readCopyNumber line #114, all but fillMyHash
192-
genomeCopyNumber.initCopyNumber(chrLenFileName, windowSize, step, targetBed);
193-
assignValues(mateFile, inputFormat, minimalTotalLetterCountPerPosition,minimalQualityPerPosition, &genomeCopyNumber);
192+
genomeCopyNumber.initCopyNumber(chrLenFileName, windowSize, step, targetBed);
193+
assignValues(mateFile, inputFormat, minimalTotalLetterCountPerPosition,minimalQualityPerPosition, &genomeCopyNumber);
194+
194195
pileup_read = true;
195196
}
196197

src/SNPinGenome.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ struct SNPinGenomePerformArgWrapper : public ThreadArg {
6666
int minCNAlength;
6767
const char* what;
6868

69-
SNPinGenomePerformArgWrapper(SNPinGenome& snpingenome, std::string const& mateFile, const std::string& inputFormat, int minimalTotalLetterCountPerPosition, int minimalQualityPerPosition, bool noisyData, bool CompleteGenomicsData, GenomeCopyNumber& genomeCopyNumber, double breakPointThreshold, int breakPointType, int minCNAlength, const char* what) : snpingenome(snpingenome), mateFile(mateFile), inputFormat(inputFormat), minimalTotalLetterCountPerPosition(minimalTotalLetterCountPerPosition), minimalQualityPerPosition(minimalQualityPerPosition), noisyData(noisyData),genomeCopyNumber(genomeCopyNumber), breakPointThreshold(breakPointThreshold), breakPointType(breakPointType), minCNAlength(minCNAlength), what(what) { }
69+
SNPinGenomePerformArgWrapper(SNPinGenome& snpingenome, std::string const& mateFile, const std::string& inputFormat, int minimalTotalLetterCountPerPosition, int minimalQualityPerPosition, bool noisyData, bool CompleteGenomicsData, GenomeCopyNumber& genomeCopyNumber, double breakPointThreshold, int breakPointType, int minCNAlength, const char* what) : snpingenome(snpingenome), mateFile(mateFile), inputFormat(inputFormat), minimalTotalLetterCountPerPosition(minimalTotalLetterCountPerPosition), minimalQualityPerPosition(minimalQualityPerPosition), noisyData(noisyData),CompleteGenomicsData(CompleteGenomicsData),genomeCopyNumber(genomeCopyNumber), breakPointThreshold(breakPointThreshold), breakPointType(breakPointType), minCNAlength(minCNAlength), what(what) { }
7070
};
7171

7272
extern void* SNPinGenome_perform_wrapper(void *arg);

src/SVfinder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ int runWithDefinedPloidy(int ploidy, GenomeCopyNumber & sampleCopyNumber, Genome
7070
int degree,int intercept,bool logLogNorm,float minExpectedGC,float maxExpectedGC,float knownContamination,float breakPointThreshold,int breakPointType,int minCNAlength,
7171
int teloCentroFlanks, std::vector<double> & RSS, std::vector<double> &percentage_GenExpl,bool contaminationAdjustment,std::vector<double> &contamination,
7272
ThreadPool * thrPool,ThreadPoolManager * thrPoolManager,std::string makePileup, float seekSubclones,
73-
std::string myName,std::vector<int> &unexplainedChromosomes, bool CompleteGenomicsData) ;
73+
std::string myName,std::vector<int> &unexplainedChromosomes, bool CompleteGenomicsData,bool normalization) ;
7474

7575

7676
#endif //SVFINDER_H

src/freec

19.1 KB
Binary file not shown.

src/main.cpp

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -352,44 +352,52 @@ int main(int argc, char *argv[])
352352
bool is_sample_pileup = (sample_inputFormat.compare("pileup") == 0 || sample_inputFormat.compare("SAMtools pileup") == 0);
353353
bool is_control_pileup = (control_inputFormat.compare("pileup") == 0 || control_inputFormat.compare("SAMtools pileup") == 0);
354354

355-
356355
bool has_BAF = cf.hasValue("BAF","SNPfile");
357356
std::string makePileup = (std::string)cf.Value("BAF","makePileup", "false");
358357
std::string fastaFile = (std::string)cf.Value("BAF","fastaFile", "false");
358+
std::string miniPileupFileSample = (std::string)cf.Value("sample","miniPileup", "false");
359+
std::string miniPileupFileControl = (std::string)cf.Value("sample","miniPileup", "false");
360+
361+
bool isHasMiniPileUPsample = (miniPileupFileSample=="false")?0:1;
362+
bool isHasMiniPileUPcontrol = (miniPileupFileControl=="false")?0:1;
359363

360-
if (makePileup != "false" && fastaFile=="false") {
364+
if (makePileup != "false" && fastaFile=="false" && (!isHasMiniPileUPsample || isControlIsPresent && !isHasMiniPileUPcontrol)) {
361365
cerr << "To create a usable .pileup file from .BAM you need to provide a fasta file for the whole genome with option \"fastaFile\""<<endl;
362366
cerr << "If you only want copy number profiles (no genotypes), then remove or comment all the lines in the group of parameters [BAF]"<<endl;
363367

364368
exit(0);
365369
}
366370

367-
if (makePileup != "false")
371+
if (isHasMiniPileUPsample) {
372+
has_BAF = false;
373+
}
374+
375+
if (makePileup != "false" && ((!isControlIsPresent)&&(!isHasMiniPileUPsample) || isControlIsPresent&&(!isHasMiniPileUPcontrol||miniPileupFileSample=="false" )))
368376
{
369377
cout << "FREEC will create a pileup to compute BAF profile! \n";
370378
cout << "...File with SNPs : " << makePileup << "\n";
371379
has_BAF = false;
372380
}
373381

374-
if (has_BAF && makePileup == "false" && !has_sample_MateFile) {
382+
if (has_BAF && makePileup == "false" && !has_sample_MateFile && !isHasMiniPileUPsample) {
375383
cerr << "ERROR: you need to provide a 'mateFile' for the [sample] (in SAMtools pileup format) to be able to calculate BAF profiles with options [BAF] or to provide a BED/VCF file with SNP positions (option \"makePileup\")\n";
376384
exit (0);
377385
}
378386

379-
if (has_BAF && !has_control_MateFile && isControlIsPresent && makePileup == "false") {
387+
if (has_BAF && !has_control_MateFile && isControlIsPresent && makePileup == "false" && !isHasMiniPileUPcontrol) {
380388
cerr << "ERROR: you need to provide a 'mateFile' for the [control] (in SAMtools pileup format) to be able to calculate BAF profiles with options [BAF] and detect somatic CNAs and LOH\n";
381389
cerr << "..Otherwise, you may not to use the control data at all. Just comment or delete 'mateCopyNumberFile' in the [control] group of parameters\n";
382390
exit (0);
383391
}
384392

385-
if (!is_sample_pileup && has_BAF && makePileup == "false") {
393+
if (!is_sample_pileup && has_BAF && makePileup == "false" && !isHasMiniPileUPsample) {
386394
cerr << "Error: to calculate BAF values, you need to provide mateFile in SAMtools pileup format\n Or you can set 'makePileup' parameter true by providing a path to a VCF file with SNP positions\n";
387395
cout << "..since you mateFile is not in SAMtools pileup format, the BAF values will not be calculated\n";
388396
has_BAF=false;
389397
}
390398
string SNPinfoFile = std::string(cf.Value("BAF","SNPfile",""));
391399

392-
if (makePileup != "false" && SNPinfoFile=="") {
400+
if (makePileup != "false" && SNPinfoFile=="" || isHasMiniPileUPsample&& SNPinfoFile=="") {
393401
if (makePileup.substr(makePileup.size()-3,3)=="vcf" || makePileup.substr(makePileup.size()-6,6)=="vcf.gz") {
394402
SNPinfoFile=makePileup;
395403
} else {
@@ -574,9 +582,10 @@ int main(int argc, char *argv[])
574582
cout << "..break-point type set to "<<breakPointType<<"\n";
575583

576584
bool noisyData = (bool)cf.Value("general","noisyData", "false");
585+
577586
if ((!noisyData) && ifTargeted && has_BAF) {
578587
cout << "Warning: consider using '[general] noisyData=true' if you expect to have highly nonuniform coverage along the genome\n";
579-
} else if (noisyData && !has_BAF && makePileup=="false"){
588+
} else if (noisyData && !has_BAF && makePileup=="false" && !isHasMiniPileUPsample){
580589
cout << "Warning: Parameter '[general] noisyData=true' will not have effect since FREEC won't use BAF information to correct predicted copy numbers\n";
581590
}else if (noisyData && !ifTargeted ){
582591
cout << "Warning: I would not recommend using '[general] noisyData=true' for whole genome data; you can miss some real CNAs in this case\n";
@@ -701,19 +710,30 @@ int main(int argc, char *argv[])
701710
string controlPileup;
702711
string samplePileup;
703712

704-
if (makePileup != "false") {
705-
cout << "Creating Pileup file to compute BAF profile...\n";
706-
minipileup.makepileup(sampleCopyNumber, controlCopyNumber, sample_MateFile, control_MateFile, myName, makePileup, sample_MateFile,
707-
sample_inputFormat, sample_mateOrientation, pathToSamtools, pathToSambamba, SambambaThreads, chrLenFile, controlName, targetBed, pathToBedtools, fastaFile, minimalQualityPerPosition);
708-
cout << "... -> Done!\n";
713+
if (makePileup != "false" || isHasMiniPileUPcontrol || isHasMiniPileUPsample) {
714+
if (!isHasMiniPileUPsample || (isControlIsPresent && !isHasMiniPileUPcontrol)) {
715+
cout << "Creating Pileup file to compute BAF profile...\n";
716+
minipileup.makepileup(sampleCopyNumber, controlCopyNumber, sample_MateFile, control_MateFile, myName, makePileup, sample_MateFile,
717+
sample_inputFormat, sample_mateOrientation, pathToSamtools, pathToSambamba, SambambaThreads, chrLenFile, controlName, targetBed, pathToBedtools, fastaFile, minimalQualityPerPosition);
718+
cout << "... -> Done!\n";
719+
}
720+
709721
GenomeCopyNumberReadMateFileArgWrapper* readMateFileArg;
710722
cout << "..will use SNP positions from "<< SNPinfoFile << " to calculate BAF profiles\n";
711723

712724
thrPool = thrPoolManager->newThreadPool("GenomeCopyNumber_readMateFile");
713725
snpingenome.readSNPs(SNPinfoFile);
714726

715-
controlPileup = controlName + "_minipileup" +".pileup";
716-
samplePileup = myName + "_minipileup" +".pileup";
727+
if (!isHasMiniPileUPsample || (isControlIsPresent && !isHasMiniPileUPcontrol)) {
728+
controlPileup = controlName + "_minipileup" +".pileup";
729+
samplePileup = myName + "_minipileup" +".pileup";
730+
}
731+
if (isHasMiniPileUPsample) {
732+
samplePileup = miniPileupFileSample;
733+
}
734+
if (isControlIsPresent && isHasMiniPileUPcontrol) {
735+
controlPileup = miniPileupFileControl;
736+
}
717737

718738
if (is_sample_pileup && !has_sample_mateCopyNumberFile && has_window)
719739
{
@@ -992,7 +1012,7 @@ int main(int argc, char *argv[])
9921012
}
9931013

9941014
double breakPointThreshold_BAF=1;
995-
if (has_BAF || makePileup != "false") {
1015+
if (has_BAF || makePileup != "false" || isHasMiniPileUPsample) {
9961016
breakPointThreshold_BAF = 0.8;
9971017
if (ifTargeted)
9981018
breakPointThreshold_BAF = 1.6;
@@ -1004,7 +1024,7 @@ int main(int argc, char *argv[])
10041024

10051025
SNPinGenomePerformArgWrapper* snpArg;
10061026

1007-
if (makePileup == "false")
1027+
if (makePileup == "false" && !isHasMiniPileUPsample)
10081028
{
10091029
snpArg = new SNPinGenomePerformArgWrapper(snpingenome, sample_MateFile, sample_inputFormat, minimalTotalLetterCountPerPosition,minimalQualityPerPosition, noisyData,CompleteGenomicsData,sampleCopyNumber, breakPointThreshold_BAF, breakPointType, minCNAlength, "Sample");
10101030
thrPool->addThread(SNPinGenome_perform_wrapper, snpArg);
@@ -1016,15 +1036,15 @@ int main(int argc, char *argv[])
10161036
}
10171037
//the same for the control sample:
10181038
if (isControlIsPresent) {
1019-
if (makePileup == "false")
1039+
if (makePileup == "false" && !isHasMiniPileUPcontrol)
10201040
{
10211041
snpArg = new SNPinGenomePerformArgWrapper(snpingenomeControl, control_MateFile, control_inputFormat, minimalTotalLetterCountPerPosition,minimalQualityPerPosition, noisyData, CompleteGenomicsData,controlCopyNumber, breakPointThreshold_BAF, breakPointType, minCNAlength, "Control");
10221042
thrPool->addThread(SNPinGenome_perform_wrapper, snpArg);
10231043
}
10241044
else
1025-
{
1026-
//snpArg = new SNPinGenomePerformArgWrapper(snpingenomeControl, control_MateFile,"pileup", minimalTotalLetterCountPerPosition,minimalQualityPerPosition, noisyData, controlCopyNumber, breakPointThreshold_BAF, breakPointType, minCNAlength, "Control");
1027-
//thrPool->addThread(SNPinGenome_perform_wrapper, snpArg);
1045+
{// the two lines below were commented for some unknown reason..
1046+
snpArg = new SNPinGenomePerformArgWrapper(snpingenomeControl, controlPileup,"pileup", minimalTotalLetterCountPerPosition,minimalQualityPerPosition, noisyData, CompleteGenomicsData, controlCopyNumber, breakPointThreshold_BAF, breakPointType, minCNAlength, "Control");
1047+
thrPool->addThread(SNPinGenome_perform_wrapper, snpArg);
10281048
}
10291049
}
10301050

src/myFunc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ char* getLine(char* buffer, int buffer_size, FILE* stream, std::string& line);
7777
float get_sd (const std::vector<float>& data, float mean);
7878
float get_median(const std::vector<float>& data) ;
7979
float get_median(const std::vector<float>& data, int start, int end) ;
80+
float get_medianNotNA(const std::vector<float> & myvector) ;
8081
float get_mean(const std::vector<float>& data) ;
8182
float get_weighted_mean(const std::vector<float>& data, const std::vector<float>& weights) ;
8283
float get_sum(const std::vector<float>& data) ;

src/version.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#define VERSION_H
44

55
const double VERSION_OFFSET = 3;
6-
const double FREEC_VERSION = 10.5;
6+
const double FREEC_VERSION = 10.6;
77
const double CONTROL_FREEC_VERSION = FREEC_VERSION - VERSION_OFFSET;
88

99
#endif

0 commit comments

Comments
 (0)