Skip to content

Commit 3dd77b1

Browse files
committed
splitting savePileup() for treat/ctrl
1 parent e6e7310 commit 3dd77b1

File tree

1 file changed

+156
-71
lines changed

1 file changed

+156
-71
lines changed

Genrich.c

Lines changed: 156 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -126,47 +126,6 @@ char* getLine(char* line, int size, File in, bool gz) {
126126
return fgets(line, size, in.f);
127127
}
128128

129-
130-
/*** BED printing ***/
131-
/* void printPileup()
132-
* Controls printing of pileup values and p- and q-values
133-
* for each Chrom.
134-
*
135-
void printPileup(File log, bool gzOut, Chrom** chrom,
136-
int chromLen, float pqvalue, bool qvalOpt) {
137-
138-
// loop through chroms
139-
for (int i = 0; i < chromLen; i++) {
140-
Chrom* chr = chrom[i];
141-
if (chr->skip)
142-
continue;
143-
144-
// print output, with intervals defined by chr->pval
145-
int start = 0;
146-
int j = 0, k = 0;
147-
for (int m = 0; m < chr->pvalLen; m++) {
148-
bool sig = false;
149-
if ( (qvalOpt && chr->qval->cov[m] > pqvalue)
150-
|| (! qvalOpt && chr->pval->cov[m] > pqvalue) )
151-
sig = true; // interval reaches significance
152-
printInterval(log, gzOut, chr->name,
153-
start, chr->pval->end[m],
154-
chr->treat->cov[j], chr->ctrl->cov[k],
155-
chr->pval->cov[m],
156-
qvalOpt ? chr->qval->cov[m] : -1.0f, sig);
157-
if (chr->ctrl->end[k] < chr->treat->end[j])
158-
k++;
159-
else {
160-
if (chr->ctrl->end[k] == chr->treat->end[j])
161-
k++;
162-
j++;
163-
}
164-
start = chr->pval->end[m];
165-
}
166-
}
167-
}
168-
*/
169-
170129
/*** Quicksort ***/
171130
// adapted from https://www.geeksforgeeks.org/quick-sort/
172131

@@ -424,7 +383,7 @@ void printInterval(File out, bool gzOut, char* name,
424383
name, start, end, treatVal, ctrlVal, pval);
425384
if (qval != -1.0f)
426385
gzprintf(out.gzf, "\t%.5f", qval);
427-
gzprintf(out.gzf, "\t%s\n", sig ? "*" : "");
386+
gzprintf(out.gzf, "%s\n", sig ? "\t*" : "");
428387
} else {
429388
fprintf(out.f, "%s\t%d\t%d\t%.5f\t%.5f\t%.5f",
430389
name, start, end, treatVal, ctrlVal, pval);
@@ -434,6 +393,9 @@ void printInterval(File out, bool gzOut, char* name,
434393
}
435394
}
436395

396+
/* void printPeak()
397+
* Print BED interval for a peak.
398+
*/
437399
void printPeak(File out, bool gzOut, char* name, int start,
438400
int end, int count, float val) {
439401
if (gzOut)
@@ -536,8 +498,14 @@ void findPeaks(File out, File log, bool logOpt, bool gzOut,
536498
float pqvalue, bool qvalOpt, int minLen, int maxGap,
537499
bool verbose) {
538500

539-
if (verbose)
540-
fprintf(stderr, "Total genome length: %ldbp\n", genomeLen);
501+
if (verbose) {
502+
fprintf(stderr, "Peak-calling parameters:\n");
503+
fprintf(stderr, " Genome length: %ldbp\n", genomeLen);
504+
fprintf(stderr, " Significance threshold: -log(%c) > %.3f\n",
505+
qvalOpt ? 'q' : 'p', pqvalue);
506+
fprintf(stderr, " Max. gap between sites: %dbp\n", maxGap);
507+
fprintf(stderr, " Min. peak length: %dbp\n", minLen);
508+
}
541509

542510
// compute p- and q-values
543511
int pLen = 0;
@@ -547,9 +515,6 @@ void findPeaks(File out, File log, bool logOpt, bool gzOut,
547515
saveQval(chrom, chromLen, genomeLen, p, pLen);
548516

549517
// identify peaks
550-
if (verbose)
551-
fprintf(stderr, "Significance threshold: -log(%c) > %.3f\n",
552-
qvalOpt ? 'q' : 'p', pqvalue);
553518
int count = callPeaks(out, log, logOpt, gzOut, chrom,
554519
chromLen, pqvalue, qvalOpt, minLen, maxGap);
555520
if (verbose)
@@ -564,22 +529,24 @@ void findPeaks(File out, File log, bool logOpt, bool gzOut,
564529
* lengths divided by total genome length.
565530
*/
566531
float calcLambda(Chrom** chrom, int chromLen,
567-
unsigned long fragLen, unsigned long* genomeLen) {
532+
double fragLen, unsigned long* genomeLen) {
568533
for (int i = 0; i < chromLen; i++)
569534
if (!chrom[i]->skip)
570535
*genomeLen += chrom[i]->len;
571536
if (! *genomeLen)
572537
exit(error("", ERRGEN));
573-
return fragLen / (double) *genomeLen;
538+
//printf("fraglen = %f; genomeLen = %ld\n", fragLen, *genomeLen);
539+
return fragLen / *genomeLen;
574540
}
575541

576542
/* void savePileupNoCtrl()
577543
* When no control is available, save the control
578544
* pileup as the background lambda value.
579545
*/
580546
void savePileupNoCtrl(Chrom** chrom, int chromLen,
581-
unsigned long fragLen, unsigned long* genomeLen) {
582-
float lambda = calcLambda(chrom, chromLen, fragLen, genomeLen);
547+
double fragLen, unsigned long* genomeLen) {
548+
float lambda = calcLambda(chrom, chromLen, fragLen,
549+
genomeLen);
583550
for (int i = 0; i < chromLen; i++) {
584551
Chrom* chr = chrom[i];
585552
if (chr->skip)
@@ -588,22 +555,138 @@ void savePileupNoCtrl(Chrom** chrom, int chromLen,
588555
}
589556
}
590557

591-
/* unsigned long savePileup()
558+
/* void savePileupCtrl()
559+
* Save pileup values for control sample(s) from
560+
* 'diff' arrays and background lambda value.
561+
*/
562+
void savePileupCtrl(Chrom** chrom, int chromLen,
563+
double fragLen, unsigned long* genomeLen) {
564+
565+
// calculate background lambda value
566+
float lambda = calcLambda(chrom, chromLen, fragLen,
567+
genomeLen);
568+
569+
// create pileup for each chrom
570+
for (int i = 0; i < chromLen; i++) {
571+
Chrom* chr = chrom[i];
572+
if (chr->skip)
573+
continue;
574+
575+
// if no read coverage, save constant pileup of 0
576+
if (chr->diff == NULL) {
577+
saveConst(&chr->ctrl, &chr->ctrlLen, chr->len, 0.0f);
578+
continue;
579+
}
580+
581+
// determine number of pileup intervals
582+
int num = 1;
583+
for (int j = 1; j < chr->len; j++)
584+
if (chr->diff[j] != 0.0f)
585+
num++;
586+
587+
// create pileup arrays
588+
chr->ctrl = (Pileup*) memalloc(sizeof(Pileup));
589+
chr->ctrl->end = (unsigned int*) memalloc(num * sizeof(unsigned int));
590+
chr->ctrl->cov = (float*) memalloc(num * sizeof(float));
591+
chr->ctrlLen = num;
592+
593+
// save pileup values
594+
int pos = 0; // position in pileup arrays
595+
float val = chr->diff[0]; // pileup value
596+
int j;
597+
for (j = 1; j < chr->len; j++)
598+
if (chr->diff[j] != 0.0f) {
599+
chr->ctrl->end[pos] = j;
600+
chr->ctrl->cov[pos] = MAX(val, lambda);
601+
602+
// update pileup value
603+
val += chr->diff[j];
604+
pos++;
605+
}
606+
607+
// save final interval
608+
chr->ctrl->end[pos] = j;
609+
chr->ctrl->cov[pos] = MAX(val, lambda);
610+
}
611+
612+
}
613+
614+
/* double savePileupTreat()
615+
* Save pileup values for treatment sample(s) from
616+
* 'diff' arrays.
617+
* Return total length of all fragments (weighted).
618+
*/
619+
double savePileupTreat(Chrom ** chrom, int chromLen) {
620+
621+
// create pileup for each chrom
622+
double fragLen = 0.0; // weighted fragment length
623+
for (int i = 0; i < chromLen; i++) {
624+
Chrom* chr = chrom[i];
625+
if (chr->skip)
626+
continue;
627+
628+
// if no read coverage, save constant pileup of 0
629+
if (chr->diff == NULL) {
630+
saveConst(&chr->treat, &chr->treatLen, chr->len, 0.0f);
631+
continue;
632+
}
633+
634+
// determine number of pileup intervals
635+
int num = 1;
636+
for (int j = 1; j < chr->len; j++)
637+
if (chr->diff[j] != 0.0f)
638+
num++;
639+
640+
// create pileup arrays
641+
chr->treat = (Pileup*) memalloc(sizeof(Pileup));
642+
chr->treat->end = (unsigned int*) memalloc(num * sizeof(unsigned int));
643+
chr->treat->cov = (float*) memalloc(num * sizeof(float));
644+
chr->treatLen = num;
645+
646+
// save pileup values
647+
int pos = 0; // position in pileup arrays
648+
float val = chr->diff[0]; // pileup value
649+
int start = 0; // beginning coordinate of interval
650+
int j;
651+
for (j = 1; j < chr->len; j++)
652+
if (chr->diff[j] != 0.0f) {
653+
chr->treat->end[pos] = j;
654+
chr->treat->cov[pos] = val;
655+
656+
// keep track of fragment length (weighted by val)
657+
fragLen += (j - start) * val;
658+
start = j;
659+
660+
// update pileup value
661+
val += chr->diff[j];
662+
pos++;
663+
}
664+
665+
// save final interval
666+
chr->treat->end[pos] = j;
667+
chr->treat->cov[pos] = val;
668+
fragLen += (j - start) * val;
669+
}
670+
671+
return fragLen;
672+
}
673+
674+
/* void savePileup()
592675
* Save pileup values for each Chrom from 'diff' arrays
593676
* (and background lambda value for control sample).
594677
* Return total length of all fragments (weighted).
595-
*/
596-
unsigned long savePileup(Chrom** chrom, int chromLen,
597-
unsigned long fragLen, unsigned long* genomeLen,
678+
*
679+
void savePileup(Chrom** chrom, int chromLen,
680+
double* fragLen, unsigned long* genomeLen,
598681
bool ctrl) {
599682
600683
// calculate background lambda value
601684
float lambda = 0.0f;
602685
if (ctrl)
603-
lambda = calcLambda(chrom, chromLen, fragLen, genomeLen);
686+
lambda = calcLambda(chrom, chromLen, *fragLen, genomeLen);
604687
605688
// create pileup for each chrom
606-
fragLen = 0;
689+
*fragLen = 0.0; // weighted fragment length
607690
for (int i = 0; i < chromLen; i++) {
608691
Chrom* chr = chrom[i];
609692
if (chr->skip)
@@ -642,7 +725,7 @@ unsigned long savePileup(Chrom** chrom, int chromLen,
642725
(*p)->cov[pos] = MAX(val, lambda);
643726
644727
// keep track of fragment length
645-
fragLen += (j - start) * val;
728+
*fragLen += (j - start) * val;
646729
start = j;
647730
648731
// update pileup value
@@ -653,14 +736,13 @@ unsigned long savePileup(Chrom** chrom, int chromLen,
653736
// save final interval
654737
(*p)->end[pos] = j;
655738
(*p)->cov[pos] = MAX(val, lambda);
656-
fragLen += (j - start) * val;
739+
*fragLen += (j - start) * val;
657740
}
658741
659-
return fragLen;
660742
}
743+
*/
661744

662-
663-
/*** Convert alignments to intervals/pileups ***/
745+
/*** Convert alignments to intervals ***/
664746

665747
/* int saveInterval()
666748
* Save BED interval for a read/fragment.
@@ -1676,11 +1758,13 @@ void runProgram(char* inFile, char* ctrlFile, char* outFile,
16761758
Chrom** chrom = NULL; // array of reference sequences
16771759
Read* dummy = (Read*) memalloc(sizeof(Read)); // head of linked list
16781760
dummy->next = NULL; // for paired alns
1679-
unsigned long fragLen = 0; // total length of treatment fragments
1680-
unsigned long genomeLen = 0; // total length of genome
1761+
double fragLen = 0.0; // total weighted length of treatment fragments
1762+
unsigned long genomeLen = 0; // total length of genome
16811763

16821764
// loop through input files (treatment and control)
16831765
for (int i = 0; i < 2; i++) {
1766+
1767+
// get treat/ctrl file name(s)
16841768
char* file = (i ? ctrlFile : inFile);
16851769
if (file == NULL) {
16861770
if (verbose)
@@ -1692,7 +1776,7 @@ void runProgram(char* inFile, char* ctrlFile, char* outFile,
16921776
char* end;
16931777
char* filename = strtok_r(file, COM, &end);
16941778
while (filename) {
1695-
// open input files
1779+
// open input file
16961780
File in;
16971781
bool gz = openRead(filename, &in);
16981782
bool bam = checkBAM(in, gz);
@@ -1701,10 +1785,10 @@ void runProgram(char* inFile, char* ctrlFile, char* outFile,
17011785
if (verbose)
17021786
fprintf(stderr, "Processing %s file: %s\n",
17031787
i ? "control" : "treatment", filename);
1704-
unsigned long totalLen = 0; // total length of paired reads
17051788
int unmapped = 0, paired = 0, single = 0, orphan = 0,
17061789
pairedPr = 0, singlePr = 0, supp = 0, skipped = 0,
17071790
lowMapQ = 0; // counting variables
1791+
unsigned long totalLen = 0; // total length of paired reads
17081792
Read** unpaired = NULL; // for unpaired alns with
17091793
int readLen = 0, readMem = 0; // average-extension option
17101794
int count;
@@ -1736,14 +1820,16 @@ void runProgram(char* inFile, char* ctrlFile, char* outFile,
17361820
}
17371821

17381822
// save pileup values
1739-
fragLen = savePileup(chrom, chromLen, fragLen, &genomeLen, i);
1740-
1741-
// reset 'diff' array for each Chrom
1742-
if (! i)
1823+
if (i)
1824+
savePileupCtrl(chrom, chromLen, fragLen, &genomeLen);
1825+
else {
1826+
fragLen = savePileupTreat(chrom, chromLen);
1827+
// reset 'diff' array for each Chrom
17431828
for (int j = 0; j < chromLen; j++)
17441829
if (chrom[j]->diff != NULL)
17451830
for (int k = 0; k < chrom[j]->len + 1; k++)
17461831
chrom[j]->diff[k] = 0.0f;
1832+
}
17471833
}
17481834

17491835
// open output files
@@ -1755,7 +1841,6 @@ void runProgram(char* inFile, char* ctrlFile, char* outFile,
17551841
chrom, chromLen, pqvalue, qvalOpt, minLen, maxGap,
17561842
verbose);
17571843

1758-
17591844
// free memory
17601845
if (xcount) {
17611846
for (int i = 0; i < xcount; i++)

0 commit comments

Comments
 (0)