@@ -401,9 +401,9 @@ void GenomeCopyNumber::recalculateRatio (float contamination) {
401401 for ( it=chrCopyNumber_.begin () ; it != chrCopyNumber_.end (); it++ ) {
402402 if (sex_.compare (" XY" )==0 && (it->getChromosome ().find (" X" )!=string::npos || it->getChromosome ().find (" Y" )!=string::npos)) {
403403 // should take into account that normally one has only one copy of X and Y..
404- it->recalculateRatioWithContam (contamination,0.5 );
404+ it->recalculateRatioWithContam (contamination,0.5 , isRatioLogged_ );
405405 } else
406- it->recalculateRatioWithContam (contamination,1 );
406+ it->recalculateRatioWithContam (contamination,1 , isRatioLogged_ );
407407 }
408408
409409}
@@ -1101,19 +1101,21 @@ long double GenomeCopyNumber::calculateRSS(int ploidy)
11011101 int index = findIndex (chrNumber);
11021102 int length = chrCopyNumber_[index].getLength ();
11031103 for (int i = 0 ; i< length; i++) {
1104- float observed = 0 ;
1105- float expected = 0 ;
1106- {
1107- observed = chrCopyNumber_[index].getRatioAtBin (i);
1108- expected = observed;
1104+ float observed = chrCopyNumber_[index].getRatioAtBin (i);
1105+ if (observed!=NA) {
1106+ if (isRatioLogged_) {
1107+ observed=pow (2 ,observed);
1108+ }
1109+ float expected = observed;
11091110 if (chrCopyNumber_[index].isMedianCalculated ()) {
11101111 expected = chrCopyNumber_[index].getMedianProfileAtI (i);
11111112 if (chrCopyNumber_[index].isSmoothed ())
11121113 expected = chrCopyNumber_[index].getSmoothedProfileAtI (i);
1113- }
1114+
11141115 }
1115- observedvalues.push_back (observed);
1116- expectedvalues.push_back (expected);
1116+ observedvalues.push_back (observed);
1117+ expectedvalues.push_back (expected);
1118+ }
11171119 }
11181120 }
11191121
@@ -1126,7 +1128,12 @@ long double GenomeCopyNumber::calculateRSS(int ploidy)
11261128 RSS = RSS + (long double )pow (diff,2 );
11271129 }
11281130 }
1129- return RSS;
1131+ if (observedvalues.size ()==0 ) {
1132+ return 0 ;
1133+ }
1134+ double normRSS = (RSS/observedvalues.size ());
1135+ observedvalues.clear ();expectedvalues.clear ();
1136+ return normRSS;
11301137}
11311138
11321139
@@ -1223,7 +1230,35 @@ void GenomeCopyNumber::calculateRatioUsingCG_Regression( GenomeCopyNumber & cont
12231230 // XXX
12241231}
12251232
1226- int GenomeCopyNumber::calculateRatio ( GenomeCopyNumber & controlCopyNumber, int degree, bool intercept,bool logLogNorm) {
1233+ int GenomeCopyNumber::fillInRatio () {
1234+ vector <float > countValues;
1235+ vector<ChrCopyNumber>::iterator it;
1236+
1237+ for ( it=chrCopyNumber_.begin () ; it != chrCopyNumber_.end (); it++ ) {
1238+ if ((sex_.compare (" XY" )==0 ) && (it->getChromosome ().find (" X" )!=string::npos || it->getChromosome ().find (" Y" )!=string::npos)) {
1239+ // should take into account that normally one has only one copy of X and Y..
1240+ it->fillInRatio (isRatioLogged_);
1241+ } else {
1242+ it->fillInRatio (isRatioLogged_);
1243+ for (int i = 0 ; i< it->getLength (); i++) {
1244+ if (it->getValueAt (i)>0 ) {
1245+ countValues.push_back (it->getRatioAtBin (i));
1246+ }
1247+ }
1248+ }
1249+ }
1250+ float median=get_medianNotNA (countValues);
1251+ for ( it=chrCopyNumber_.begin () ; it != chrCopyNumber_.end (); it++ ) {
1252+ if (!isRatioLogged_) {
1253+ it->recalculateRatio (median);
1254+ } else {
1255+ it->recalculateLogRatio (median);
1256+ }
1257+ }
1258+ return 1 ;
1259+ }
1260+
1261+ int GenomeCopyNumber::calculateRatio ( GenomeCopyNumber & controlCopyNumber, int degree, bool intercept) {
12271262
12281263
12291264 int maximalNumberOfIterations = 300 ;
@@ -1232,7 +1267,7 @@ int GenomeCopyNumber::calculateRatio( GenomeCopyNumber & controlCopyNumber, int
12321267
12331268 int successfulFit = 0 ;
12341269
1235- if (logLogNorm ) {
1270+ if (isRatioLogged_ ) {
12361271 intercept=1 ; degree=1 ;// because it is loglogscale
12371272 vector <float > y; // y ~ a0x+a1
12381273 vector <float > x;
@@ -2920,6 +2955,8 @@ void GenomeCopyNumber::printRatioBedGraph(std::string const& chr, std::ofstream
29202955
29212956 for (int i = 0 ; i< length; i++) {
29222957 value=chrCopyNumber_[index].getRatioAtBin (i);
2958+ if (isRatioLogged_ && value!=NA)
2959+ value=pow (2 ,value);
29232960 position=chrCopyNumber_[index].getCoordinateAtBin (i);
29242961 float valueToPrint;
29252962 if (chrCopyNumber_[index].isSmoothed ())
@@ -2962,8 +2999,12 @@ void GenomeCopyNumber::printRatio(std::string const& chr, std::ofstream & file,
29622999 int length = chrCopyNumber_[index].getLength ();
29633000 // cout <<length<<" == "<<chrCopyNumber_[index].getValues().size() <<"\n";
29643001 for (int i = 0 ; i< length; i++) {
2965- if (printNA || chrCopyNumber_[index].getRatioAtBin (i)!=NA) {// process this this window
2966- file << chrNumber <<" \t " <<chrCopyNumber_[index].getCoordinateAtBin (i)+1 <<" \t " <<chrCopyNumber_[index].getRatioAtBin (i) ;
3002+ float ratioToPrint = chrCopyNumber_[index].getRatioAtBin (i);
3003+ if (isRatioLogged_ && ratioToPrint!= NA) {
3004+ ratioToPrint=pow (2 ,ratioToPrint);
3005+ }
3006+ if (printNA || ratioToPrint!=NA) {// process this this window
3007+ file << chrNumber <<" \t " <<chrCopyNumber_[index].getCoordinateAtBin (i)+1 <<" \t " <<ratioToPrint ;
29673008 if (chrCopyNumber_[index].isMedianCalculated ()) {
29683009 file << " \t " <<chrCopyNumber_[index].getMedianProfileAtI (i) ;
29693010 float valueToPrint;
@@ -3127,7 +3168,7 @@ void GenomeCopyNumber::calculateCopyNumberMedians (int minCNAlength, bool noisyD
31273168 vector<ChrCopyNumber>::iterator it;
31283169 for ( it=chrCopyNumber_.begin () ; it != chrCopyNumber_.end (); it++ ) {
31293170 cout << " ..calculating medians for " << it->getChromosome ()<< " \n " ;
3130- it->calculateCopyNumberMedian (ploidy_, minCNAlength, noisyData, CompleteGenomicsData);
3171+ it->calculateCopyNumberMedian (ploidy_, minCNAlength, noisyData, CompleteGenomicsData, isRatioLogged_ );
31313172 }
31323173}
31333174
@@ -3186,6 +3227,8 @@ void GenomeCopyNumber::calculateSDAndMed(int ploidy, map <float,float> &sds,map
31863227 med = it->getMedianProfileAtI (i);
31873228 float level = round_by_ploidy (med, ploidy);
31883229 value = it->getRatioAtBin (i);
3230+ if (isRatioLogged_&& value!=NA)
3231+ value=pow (2 ,value);
31893232 if (value != NA) {
31903233 if (mymap.count (level) == 0 ) {
31913234 vector <float > a;
@@ -3223,6 +3266,8 @@ void GenomeCopyNumber::calculateSDs(int ploidy, map <float,float> &sds) {
32233266 med = it->getMedianProfileAtI (i);
32243267 float level = round_by_ploidy (med, ploidy);
32253268 value = it->getRatioAtBin (i);
3269+ if (isRatioLogged_&& value!=NA)
3270+ value=pow (2 ,value);
32263271 if (value != NA) {
32273272 if (mymap.count (level) == 0 ) {
32283273 vector <float > a;
@@ -3267,6 +3312,8 @@ float GenomeCopyNumber::calculateVarianceForNormalCopy(int ploidy) { //geting th
32673312 med = it->getMedianProfileAtI (i);
32683313 if ((med>lowBoundary)&&(med < highBoundary)) {
32693314 value = it->getRatioAtBin (i);
3315+ if (isRatioLogged_&& value!=NA)
3316+ value=pow (2 ,value);
32703317 if (value != NA) {
32713318 myfile << value-1 << " \n " ;
32723319 variance += (value-1 )*(value-1 );
@@ -3319,19 +3366,23 @@ float GenomeCopyNumber::evaluateContamination () {
33193366 int length = chrCopyNumber_[index].getLength ();
33203367 for (int i = 0 ; i< length; i++) {
33213368 float observed = chrCopyNumber_[index].getRatioAtBin (i);
3322- float expected = observed;
3323- if (chrCopyNumber_[index].isMedianCalculated ()) {
3324- expected = chrCopyNumber_[index].getMedianProfileAtI (i) ;
3325- if (chrCopyNumber_[index].isSmoothed () && WESanalysis == false )
3326- expected = chrCopyNumber_[index].getSmoothedProfileAtI (i);
3327- }
3328- if (!(expected == 1 || expected <= 0 || expected >= 2 || observed > 3 || observed <= 0 )
3329- && (((1 >observed)&&(1 >expected))||((1 <observed)&&(1 <expected)))) {// should it be something related to ploidy_ and not 2
3330- float p = (observed-expected)/(observed-expected+2 /ploidy_*(1 -observed));
3331- if (p>-0.5 && p<1.5 ) {
3332- values.push_back (p);
3333- weights.push_back (chrCopyNumber_[index].getFragmentLengths_notNA_At (i));
3334- }
3369+ if (observed!=NA) {
3370+ if (isRatioLogged_)
3371+ observed=pow (2 ,observed);
3372+ float expected = observed;
3373+ if (chrCopyNumber_[index].isMedianCalculated ()) {
3374+ expected = chrCopyNumber_[index].getMedianProfileAtI (i) ;
3375+ if (chrCopyNumber_[index].isSmoothed () && WESanalysis == false )
3376+ expected = chrCopyNumber_[index].getSmoothedProfileAtI (i);
3377+ }
3378+ if (!(expected == 1 || expected <= 0 || expected >= 2 || observed > 3 || observed <= 0 )
3379+ && (((1 >observed)&&(1 >expected))||((1 <observed)&&(1 <expected)))) {// should it be something related to ploidy_ and not 2
3380+ float p = (observed-expected)/(observed-expected+2 /ploidy_*(1 -observed));
3381+ if (p>-0.5 && p<1.5 ) {
3382+ values.push_back (p);
3383+ weights.push_back (chrCopyNumber_[index].getFragmentLengths_notNA_At (i));
3384+ }
3385+ }
33353386 }
33363387 }
33373388 }
@@ -3373,6 +3424,8 @@ float GenomeCopyNumber::evaluateContaminationwithLR () {
33733424 for (int i = 0 ; i< length; i++) {
33743425 float observed = chrCopyNumber_[index].getRatioAtBin (i);
33753426 if (observed!=NA) {
3427+ if (isRatioLogged_)
3428+ observed=pow (2 ,observed);
33763429 float expected = observed;
33773430 if (chrCopyNumber_[index].isMedianCalculated ()) {
33783431 expected = round_by_ploidy (chrCopyNumber_[index].getMedianProfileAtI (i),ploidy_) ;
@@ -4075,7 +4128,26 @@ int GenomeCopyNumber::processReadWithBowtie(std::string const& inputFormat, std:
40754128 }
40764129 return 0 ;
40774130}
4131+ float GenomeCopyNumber::removeLargeExons (float iqrToKeep) {
4132+ float maxLength = 0 ;
4133+ vector<ChrCopyNumber>::iterator it;
4134+ int totalNumberExons=0 ;
4135+ float numberOfRemovedExons = 0 ;
4136+ vector <float > exonLengths;
4137+
4138+ for ( it=chrCopyNumber_.begin () ; it != chrCopyNumber_.end (); it++ ) {
4139+ for (int i=0 ; i < it->getLength (); i++) {
4140+ exonLengths.push_back (it->getEndAtBin (i)-it->getCoordinateAtBin (i));
4141+ totalNumberExons++;
4142+ }
4143+ }
4144+ maxLength=get_iqr (exonLengths)/2 *iqrToKeep+get_median (exonLengths);
40784145
4146+ for ( it=chrCopyNumber_.begin () ; it != chrCopyNumber_.end (); it++ ) {
4147+ numberOfRemovedExons+=it->removeLargeExons (maxLength);
4148+ }
4149+ return numberOfRemovedExons/totalNumberExons;
4150+ }
40794151int GenomeCopyNumber::focusOnCapture (std::string const & captureFile) {
40804152 ifstream file (captureFile.c_str ());
40814153 string line;
@@ -4191,6 +4263,10 @@ void GenomeCopyNumber::setWESanalysis(bool WESgiven)
41914263WESanalysis = WESgiven;
41924264}
41934265
4266+ void GenomeCopyNumber::setIfLogged (bool isRatioLogged) {
4267+ isRatioLogged_=isRatioLogged;
4268+ }
4269+
41944270void GenomeCopyNumber::setmakingPileup (bool makingPileup_given)
41954271{
41964272makingPileup = makingPileup_given;
0 commit comments