Skip to content

Commit 9352925

Browse files
committed
New version (0.6.0). Removed emcncf2 and added udef option for genome build.
1 parent d002c51 commit 9352925

File tree

8 files changed

+48
-26
lines changed

8 files changed

+48
-26
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@ Package: facets
22
Type: Package
33
Title: Cellular Fraction and Copy Numbers from Tumor Sequencing
44
Version: 0.6.0
5-
Date: 2018-02-28
5+
Date: 2018-12-14
66
Author: Venkatraman E. Seshan and Ronglai Shen
77
Maintainer: Venkatraman E. Seshan <seshanv@mskcc.org>
88
Description: Algorithm to implement Fraction and Allelic Copy number
99
Estimate from Tumor/normal Sequencing.
1010
License: GPL (>= 2)
11-
Depends: R (>= 2.10), pctGCdata (>= 0.2.0)
11+
Depends: R (>= 3.0.0), pctGCdata (>= 0.3.0)
1212
Remotes: veseshan/pctGCdata
1313
NeedsCompilation: yes

NAMESPACE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
useDynLib(facets)
22
import(stats,graphics)
33
importFrom("utils", "read.csv")
4-
importFrom("grDevices", "colorRampPalette")
4+
importFrom("grDevices", "colorRampPalette", "rainbow")
55
importFrom("pctGCdata", "getGCpct")
66
export("readSnpMatrix", "preProcSample", "procSample", "emcncf", "emcncf2", "plotSample","logRlogORspider")

R/facets-procreads.R

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
# heterozygous and keep flags of the SNPs
2-
procSnps <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, gbuild="hg19", unmatched=FALSE, ndepthmax=1000) {
2+
procSnps <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, nX=23, unmatched=FALSE, ndepthmax=1000) {
33
# keep only chromsomes 1-22 & X for humans and 1-19, X for mice
4-
if (gbuild %in% c("hg19", "hg38", "hg18")) {
5-
chromlevels <- c(1:22,"X")
6-
} else {
7-
chromlevels <- c(1:19,"X")
8-
}
4+
# for other genomes (gbuild = udef) nX is number of autosomes plus 1
5+
chromlevels <- c(1:(nX-1),"X")
96
chr.keep <- rcmat$Chromosome %in% chromlevels
107
# keep only snps with normal read depth between ndepth and 1000
118
depthN.keep <- (rcmat$NOR.DP >= ndepth) & (rcmat$NOR.DP < ndepthmax)
@@ -46,7 +43,7 @@ scanSnp <- function(maploc, het, nbhd) {
4643
}
4744

4845
# obtain logR and logOR from read counts and GC-correct logR
49-
counts2logROR <- function(mat, gbuild, unmatched=FALSE, f=0.2) {
46+
counts2logROR <- function(mat, gbuild, unmatched=FALSE, ugcpct=NULL, f=0.2) {
5047
out <- mat[mat$keep==1,]
5148
# gc percentage
5249
out$gcpct <- rep(NA_real_, nrow(out))
@@ -57,7 +54,11 @@ counts2logROR <- function(mat, gbuild, unmatched=FALSE, f=0.2) {
5754
ii <- which(out$chrom==i)
5855
# allow for chromosomes with no SNPs i.e. not targeted
5956
if (length(ii) > 0) {
60-
out$gcpct[ii] <- getGCpct(i, out$maploc[ii], gbuild)
57+
if (gbuild == "udef") {
58+
out$gcpct[ii] <- getGCpct(i, out$maploc[ii], gbuild, ugcpct)
59+
} else {
60+
out$gcpct[ii] <- getGCpct(i, out$maploc[ii], gbuild)
61+
}
6162
}
6263
}
6364
##### log-ratio with gc correction and maf log-odds ratio steps

R/facets-wrapper.R

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,24 @@ readSnpMatrix <- function(filename, skip=0L, err.thresh=Inf, del.thresh=Inf, per
2222
rcmat
2323
}
2424

25-
preProcSample <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, cval=25, deltaCN=0, gbuild=c("hg19", "hg38", "hg18", "mm9", "mm10"), hetscale=TRUE, unmatched=FALSE, ndepthmax=1000) {
25+
preProcSample <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, cval=25, deltaCN=0, gbuild=c("hg19", "hg38", "hg18", "mm9", "mm10", "udef"), ugcpct=NULL, hetscale=TRUE, unmatched=FALSE, ndepthmax=1000) {
2626
gbuild <- match.arg(gbuild)
2727
# integer value for chromosome X depends on the genome
2828
if (gbuild %in% c("hg19", "hg38", "hg18")) nX <- 23
2929
if (gbuild %in% c("mm9", "mm10")) nX <- 20
30-
pmat <- procSnps(rcmat, ndepth, het.thresh, snp.nbhd, gbuild, unmatched, ndepthmax)
31-
dmat <- counts2logROR(pmat[pmat$rCountT>0,], gbuild, unmatched)
30+
if (gbuild == "udef") {
31+
if (missing(ugcpct)) {
32+
stop("GC percent data should be supplied if udef option is used")
33+
} else {
34+
nX <- length(ugcpct)
35+
}
36+
}
37+
pmat <- procSnps(rcmat, ndepth, het.thresh, snp.nbhd, nX, unmatched, ndepthmax)
38+
if (gbuild == "udef") {
39+
dmat <- counts2logROR(pmat[pmat$rCountT>0,], gbuild, unmatched, ugcpct)
40+
} else {
41+
dmat <- counts2logROR(pmat[pmat$rCountT>0,], gbuild, unmatched)
42+
}
3243
tmp <- segsnps(dmat, cval, hetscale, deltaCN)
3344
out <- list(pmat=pmat, gbuild=gbuild, nX=nX)
3445
c(out, tmp)
@@ -49,8 +60,7 @@ procSample <- function(x, cval=150, min.nhet=15, dipLogR=NULL) {
4960
chrs <- x$chromlevels
5061
nchr <- length(chrs)
5162
# get chromlevels from chrs
52-
if (x$gbuild %in% c("hg19", "hg38", "hg18")) chromlevels <- c(1:22,"X")[chrs]
53-
if (x$gbuild %in% c("mm9", "mm10")) chromlevels <- c(1:19,"X")[chrs]
63+
chromlevels <- c(1:(nX-1), "X")[chrs]
5464
# get the segment summary for the fit in seg.tree
5565
nsegs <- 0
5666
for (i in 1:nchr) {
@@ -162,8 +172,8 @@ plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive",
162172
if (length(ii)>0) out$lcn[ii] <- 5 + log10(out$lcn[ii])
163173
plot(c(0,length(jseg$cnlr)), c(0,max(out$tcn)), type="n", ylab="copy number (nv)", xaxt="n")
164174
abline(v=chrbdry, lwd=0.25)
165-
segments(segstart, out$tcn, segend, out$tcn, lwd=1.75, col=1)
166175
segments(segstart, out$lcn, segend, out$lcn, lwd=1.75, col=2)
176+
segments(segstart, out$tcn, segend, out$tcn, lwd=1.75, col=1)
167177
# add the cf
168178
plot(c(0,length(jseg$cnlr)), 0:1, type="n", ylab="", xaxt="n", yaxt="n")
169179
mtext("cf-nv", side=2, at=0.5, line=0.3, las=2, cex=0.75)
@@ -178,8 +188,8 @@ plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive",
178188
if (length(ii)>0) out$lcn.em[ii] <- 5 + log10(out$lcn.em[ii])
179189
plot(c(0,length(jseg$cnlr)), c(0,max(out$tcn.em)), type="n", ylab="copy number (em)", xaxt="n")
180190
abline(v=chrbdry, lwd=0.25)
181-
segments(segstart, out$tcn.em, segend, out$tcn.em, lwd=1.75, col=1)
182191
segments(segstart, out$lcn.em, segend, out$lcn.em, lwd=1.75, col=2)
192+
segments(segstart, out$tcn.em, segend, out$tcn.em, lwd=1.75, col=1)
183193
# add the cf
184194
plot(c(0,length(jseg$cnlr)), 0:1, type="n", ylab="", xaxt="n", yaxt="n")
185195
mtext("cf-em", side=2, at=0.5, line=0.2, las=2, cex=0.75)
@@ -215,7 +225,7 @@ logRlogORspider <- function(cncf, dipLogR=0, nfrac=0.005) {
215225
}
216226
}
217227

218-
plot(c(-0.95, 1.8), c(0, 5), type="n", xlab="Expected(logR - dipLogR)", ylab=" Expected(|logOR|)")
228+
plot(c(-0.95, 1.8), c(0, 5.2), type="n", xlab="Expected(logR - dipLogR)", ylab=" Expected(|logOR|)")
219229
l <- 1; i <-1; j <-0
220230
linecols <- c("black","cyan3","green3","blue")
221231
lines(logCNR[,l], logACR[,l], lty=1, col=j+1, lwd=1.25)
@@ -232,5 +242,7 @@ logRlogORspider <- function(cncf, dipLogR=0, nfrac=0.005) {
232242
nhets <- sum(cncf$nhet)
233243
ii <- cncf$num.mark > nfrac*nsnps & cncf$nhet > nfrac*nhets
234244
cex <- 0.3 + 2.7*(cncf$num.mark[ii]/sum(0.1*cncf$num.mark[ii]))
235-
points(cncf$cnlr.median[ii] - dipLogR, sqrt(abs(cncf$mafR[ii])), cex=cex, col="magenta4", lwd=1.5)
245+
chrcol <- rainbow(24)
246+
points(cncf$cnlr.median[ii] - dipLogR, sqrt(abs(cncf$mafR[ii])), cex=cex, pch=10, col=chrcol[cncf$chrom[ii]], lwd=1.5)
247+
legend(-1, 5.25, paste("chr", c(1:22, "X"), sep=""), ncol=4, pch=10, col=chrcol[1:23], cex=0.65)
236248
}

inst/ChangeLog

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
02/28/2018: v0.6.0
1+
12/13/2018: v0.6.0
22

33
o Removed emcncf2 and marked as defunct
4+
o Changed the order of tcn-lcn lines being drawn to address (0,0) masking
5+
o Added udef option for gbuild to enable analyzing other genomes (say
6+
dog) with user supplied GC percentage data
47

58
02/28/2018: v0.5.14
69

man/facets-internal.Rd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222
}
2323
\usage{
2424
jointsegsummary(jointseg)
25-
procSnps(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, gbuild="hg19",
25+
procSnps(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, nX=23,
2626
unmatched=FALSE, ndepthmax=1000)
27-
counts2logROR(mat, gbuild, unmatched=FALSE, f=0.2)
27+
counts2logROR(mat, gbuild, unmatched=FALSE, ugcpct = NULL, f=0.2)
2828
scanSnp(maploc, het, nbhd)
2929
fit.cpt.tree(genomdat, edgelim=10, cval=25, hscl=1, delta=0)
3030
prune.cpt.tree(seg.tree, cval=25)

man/preProcSample.Rd

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
}
77
\usage{
88
preProcSample(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, cval=25,
9-
deltaCN=0, gbuild=c("hg19", "hg38", "hg18", "mm9", "mm10"),
10-
hetscale=TRUE, unmatched=FALSE, ndepthmax=1000)
9+
deltaCN=0, gbuild=c("hg19", "hg38", "hg18", "mm9", "mm10", "udef"),
10+
ugcpct = NULL, hetscale=TRUE, unmatched=FALSE, ndepthmax=1000)
1111
}
1212
\arguments{
1313
\item{rcmat}{data frame with 6 required columns: \code{Chrom},
@@ -21,7 +21,13 @@
2121
\item{gbuild}{genome build used for the alignment of the genome.
2222
Default value is human genome build hg19. Other possibilities are
2323
hg38 & hg18 for human and mm9 & mm10 for mouse. Chromosomes used for
24-
analysis are \code{1-22, X} for humans and \code{1-19} for mouse.}
24+
analysis are \code{1-22, X} for humans and \code{1-19} for mouse.
25+
Option udef can be used to analyze other genomes.}
26+
\item{ugcpct}{If udef is chosen for gbuild then appropriate GC
27+
percentage date should be provided through this option. This is a
28+
list of numeric vectors that gives the GC percentage windows of
29+
width 1000 bases in steps of 100 i.e. 1-1000, 101-1100 etc. for the
30+
autosomes and the X chromosome.}
2531
\item{hetscale}{logical variable to indicate if logOR should get more
2632
weight in the test statistics for segmentation and clustering. Usually
2733
only 10\% of snps are hets and hetscale gives the logOR contribution

vignettes/FACETS.pdf

-1.12 KB
Binary file not shown.

0 commit comments

Comments
 (0)