Skip to content

Commit 71036b9

Browse files
author
Tallulah Andrews
committed
merged dev
2 parents 9baa923 + a644da3 commit 71036b9

File tree

10 files changed

+192
-29
lines changed

10 files changed

+192
-29
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: M3Drop
2-
Version: 0.99.1
3-
Date: 2016-02-10
2+
Version: 0.99.2
3+
Date: 2016-04-17
44
Title: Michaelis-Menten Modelling of Dropouts
55
Author: Tallulah Andrews <tallulandrews@gmail.com>
66
Maintainer: Tallulah Andrews <tallulandrews@gmail.com>

R/Brennecke_implementation.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Brennecke_getVariableGenes <- function(expr_mat, spikes=NA, suppress.plot=FALSE, fdr=0.1, minBiolDisp=0.5) {
33
#require(statmod)
44

5-
rowVars <- function(x) { unlist(apply(x,1,var))}
5+
rowVars <- function(x) { unlist(apply(x,1,var, na.rm=T))}
66

77
colGenes = "black"
88
colSp = "blue"
@@ -22,23 +22,23 @@ Brennecke_getVariableGenes <- function(expr_mat, spikes=NA, suppress.plot=FALSE,
2222
countsGenes = fullCountTable;
2323
}
2424

25-
meansSp = rowMeans(countsSp)
25+
meansSp = rowMeans(countsSp, na.rm=T)
2626
varsSp = rowVars(countsSp)
2727
cv2Sp = varsSp/meansSp^2
28-
meansGenes = rowMeans(countsGenes)
28+
meansGenes = rowMeans(countsGenes, na.rm=T)
2929
varsGenes = rowVars(countsGenes)
3030
cv2Genes = varsGenes/meansGenes^2
3131
# Fit Model
3232
minMeanForFit <- unname( quantile( meansSp[ which( cv2Sp > 0.3 ) ], 0.80))
3333
useForFit <- meansSp >= minMeanForFit
34-
if (sum(useForFit) < 50) {
34+
if (sum(useForFit, na.rm=T) < 20) {
3535
warning("Too few spike-ins exceed minMeanForFit, recomputing using all genes.")
3636
meansAll = c(meansGenes, meansSp)
3737
cv2All = c(cv2Genes,cv2Sp)
3838
minMeanForFit <- unname( quantile( meansAll[ which( cv2All > 0.3 ) ], 0.80))
3939
useForFit <- meansSp >= minMeanForFit
4040
}
41-
if (sum(useForFit) < 50) {warning(paste("Only", sum(useForFit), "spike-ins to be used in fitting, may result in poor fit."))}
41+
if (sum(useForFit, na.rm=T) < 30) {warning(paste("Only", sum(useForFit), "spike-ins to be used in fitting, may result in poor fit."))}
4242
fit <- glmgam.fit( cbind( a0 = 1, a1tilde = 1/meansSp[useForFit] ), cv2Sp[useForFit] )
4343
a0 <- unname( fit$coefficients["a0"] )
4444
a1 <- unname( fit$coefficients["a1tilde"])

R/Curve_fitting.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ bg__fit_MM <- function (p,s) {
66
# Kerr = exp(Kcoeff+Kerr)-exp(Kcoeff)
77
# predicted = fitted(fit)
88
# krt=summary(fit)$parameters[1,1]
9-
# return(list(K=krt,Kerr=Kerr,predictions=predicted, model=c("MMenton",paste("Krt =",round(krt,digits=3))),SSr=round(sum((residuals(fit))^2)),SAr=round(sum(abs(residuals(fit))))))
9+
# return(list(K=krt,Kerr=Kerr,predictions=predicted, model=c("MMenten",paste("Krt =",round(krt,digits=3))),SSr=round(sum((residuals(fit))^2)),SAr=round(sum(abs(residuals(fit))))))
1010
if (length(p) != length(s)) {
1111
stop(print("Error: p and s not same length. Cannot fit Michaelis-Menten."))
1212
}
@@ -30,7 +30,7 @@ bg__fit_MM <- function (p,s) {
3030
Kerr = fit@coef[2]
3131
predicted = 1-(s/(krt+s))
3232
residuals = p-predicted
33-
return(list(K=krt,Kerr=Kerr,fitted_err = res_err,predictions=predicted, model=c("MMenton",paste("Krt =",round(krt,digits=3))),SSr=round(sum((residuals)^2)),SAr=round(sum(abs(residuals)))))
33+
return(list(K=krt,Kerr=Kerr,fitted_err = res_err,predictions=predicted, model=c("MMenten",paste("K =",round(krt,digits=3))),SSr=round(sum((residuals)^2)),SAr=round(sum(abs(residuals)))))
3434

3535
}
3636
bg__fit_logistic <- function(p,s) {
@@ -98,6 +98,6 @@ M3Drop_Dropout_Models <- function(expr_mat, xlim=NA, suppress.plot=FALSE) {
9898
sizeloc = bg__add_model_to_plot(SCDE, BasePlot, lty=2, lwd=2.5, col="magenta3",legend_loc = c(sizeloc$rect$left+sizeloc$rect$w,sizeloc$rect$top-sizeloc$rect$h-0.05));
9999
sizeloc = bg__add_model_to_plot(ZIFA, BasePlot, lty=3, lwd=2.5, col="red",legend_loc = c(sizeloc$rect$left+sizeloc$rect$w,sizeloc$rect$top-sizeloc$rect$h-0.05));
100100
}
101-
invisible(list(MMfit = MM, LogiFit = SCDE, ExpoFit = ZIFA));
101+
invisible(list(MMFit = MM, LogiFit = SCDE, ExpoFit = ZIFA));
102102
}
103103

R/Extremes.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ bg__test_DE_K_equiv <- function (expr_mat, fit=NA) {
2222
# Use the fact that errors of proportions are well define by converting S to proportion detected equivalents?
2323
hidden__test_DE_P_equiv <- function (expr_mat, fit=NA) {
2424
gene_info = bg__calc_variables(expr_mat);
25-
if (is.na(fit)) {
25+
if (is.na(fit)[1]) {
2626
fit = bg__fit_MM(gene_info$p, gene_info$s);
2727
}
2828
p_obs = gene_info$p;
@@ -78,7 +78,7 @@ hidden__test_DE_S_equiv <- function (expr_mat, fit=NA, method="propagate") {
7878

7979
bg__get_extreme_residuals <- function (expr_mat, fit=NA, v_threshold=c(0.05,0.95), percent = NA, fdr_threshold = 0.1, direction="right", suppress.plot = FALSE) {
8080
gene_info = bg__calc_variables(expr_mat);
81-
if (is.na(fit)) {
81+
if (is.na(fit)[1]) {
8282
fit = bg__fit_MM(gene_info$p, gene_info$s);
8383
}
8484
res = bg__horizontal_residuals_MM_log10(fit$K, gene_info$p, gene_info$s)

R/Normalization.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
hidden__UQ <- function(x){quantile(x[x>0],0.75)};
33

44
bg__filter_cells <- function(expr_mat,labels=NA, suppress.plot=FALSE, min_detected_genes=NA) {
5-
num_detected = colSums(expr_mat > 0);
5+
num_detected = colSums(expr_mat > 0, na.rm=T);
66
if (!is.na(min_detected_genes)) {
77
low_quality = num_detected < min_detected_genes;
88
} else {
9-
num_zero = colSums(expr_mat == 0);
9+
num_zero = colSums(expr_mat == 0, na.rm=T);
1010
cell_zero = num_zero;
1111
mu = mean(cell_zero);
1212
sigma = sd(cell_zero);

R/Plotting_fxns.R

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ bg__dropout_plot_base <- function (expr_mat, xlim = NA, suppress.plot=FALSE) {
1717
if (!suppress.plot) {
1818
par(fg="black")
1919
if (!(sum(is.na(xlim)))) {
20-
plot(xes,gene_info$p, main="", ylab="Dropout Proportion", xlab="log(expression)", col = dens.col,pch=16, xlim=xlim, ylim=c(0,1))
20+
plot(xes,gene_info$p, main="", ylab="Dropout Proportion", xlab="log10(expression)", col = dens.col,pch=16, xlim=xlim, ylim=c(0,1))
2121
} else {
2222
plot(xes,gene_info$p, main="", ylab="Dropout Proportion", xlab="log(expression)", col = dens.col,pch=16)
2323
}
@@ -55,10 +55,10 @@ bg__expression_heatmap <- function (genes, expr_mat, cell_labels=NA, gene_labels
5555
if(!is.numeric(genes)) {
5656
new_genes = match(genes, rownames(expr_mat));
5757
nomatch = sum(is.na(new_genes));
58-
if (nomatch > 0) {warning(paste(nomatch, " genes could not be matched to data, they will not be included in the heatmap."));}
58+
if (nomatch > 0) {warning(paste("Warning: ",nomatch, " genes could not be matched to data, they will not be included in the heatmap."));}
5959
genes = new_genes[!is.na(new_genes)];
6060
}
61-
if (length(genes) < 1) {warning("No genes for heatmap.");return();}
61+
if (length(genes) < 1) {stop("Error: No genes for heatmap.");return();}
6262
# Plot heatmap of expression
6363
heatcolours <- rev(brewer.pal(11,"RdBu"))
6464
col_breaks = c(-100,seq(-2,2,length=10),100)
@@ -71,7 +71,7 @@ bg__expression_heatmap <- function (genes, expr_mat, cell_labels=NA, gene_labels
7171
if (!is.na(key_genes[1])) {
7272
rownames(heat_data)[rownames(expr_mat[genes,]) %in% key_genes] = rownames(expr_mat[genes,])[rownames(expr_mat[genes,]) %in% key_genes];
7373
}
74-
colnames(heat_data) = rep("", length(heat_data[1,]));
74+
colnames(heat_data) = 1:length(colnames(heat_data));
7575
if (!is.na(key_cells[1])) {
7676
colnames(heat_data)[colnames(expr_mat[genes,]) %in% key_cells] = colnames(expr_mat[genes,])[colnames(expr_mat[genes,]) %in% key_cells];
7777
}
@@ -98,7 +98,11 @@ bg__expression_heatmap <- function (genes, expr_mat, cell_labels=NA, gene_labels
9898
lmat=rbind(c(6,0,5),c(0,0,2),c(4,1,3))
9999

100100

101-
heatmap_output = suppressWarnings(heatmap.2(heat_data, ColSideColors = ColColors, RowSideColors = RowColors, col=heatcolours, breaks=col_breaks, scale="row",symbreaks=T, trace="none", dendrogram="column", key=FALSE, Rowv=TRUE, Colv=TRUE,lwid=lwid, lhei=lhei,lmat=lmat, hclustfun=function(x){hclust(x,method="ward.D2")}))
101+
if (dim(heat_data)[1] < 10000) {
102+
heatmap_output = suppressWarnings(heatmap.2(heat_data, ColSideColors = ColColors, RowSideColors = RowColors, col=heatcolours, breaks=col_breaks, scale="row",symbreaks=T, trace="none", dendrogram="column", key=FALSE, Rowv=TRUE, Colv=TRUE,lwid=lwid, lhei=lhei,lmat=lmat, hclustfun=function(x){hclust(x,method="ward.D2")}))
103+
} else {
104+
heatmap_output = suppressWarnings(heatmap.2(heat_data, ColSideColors = ColColors, RowSideColors = RowColors, col=heatcolours, breaks=col_breaks, scale="row",symbreaks=T, trace="none", dendrogram="column", key=FALSE, Rowv=FALSE, Colv=TRUE,lwid=lwid, lhei=lhei,lmat=lmat, hclustfun=function(x){hclust(x,method="ward.D2")}))
105+
}
102106
# Custom key
103107
par(fig = c(0, 1/(5.2),4/(5.2), 1), mar=c(4,1,1,1), new=TRUE)
104108
scale01 <- function(x, low = min(x), high = max(x)) {
@@ -149,8 +153,54 @@ M3Drop_Expression_Heatmap <- function(genes, expr_mat, cell_labels=NA, interesti
149153
if (is.numeric(key_cells) | is.logical(key_cells)) {
150154
key_cells = rownames(expr_mat)[key_cells];
151155
}
156+
if (is.factor(genes)) {
157+
genes = as.character(genes);
158+
}
159+
if (!is.vector(genes)) {
160+
stop("Error: genes must be a vector.")
161+
}
152162
heatmap_output = bg__expression_heatmap(genes, expr_mat, cell_labels=cell_labels, gene_labels=as.numeric(gene_labels), key_genes=as.character(key_genes), key_cells=key_cells);
153163
invisible(heatmap_output);
154164
}
155165

156-
M3Drop_Get_Heatmap_Cell_Clusters <- function (heatmap_output, k) {cutree(as.hclust(heatmap_output$colDendrogram), k=k)}
166+
M3Drop_Get_Heatmap_Cell_Clusters <- function (heatmap_output, k) {
167+
tryCatch(
168+
returned_val <- cutree(as.hclust(heatmap_output$colDendrogram), k=k),
169+
warning=function(w) {print(w)},
170+
error=function(e){
171+
print(e);
172+
print("Dendrogram may have flat branches, trying again");
173+
returned_val <-hidden_get_clusters(heatmap_output,k)
174+
}
175+
)
176+
return(returned_val);
177+
}
178+
179+
hidden_get_clusters<- function(heatout, k){
180+
dendro=heatout$colDendrogram
181+
curr_k = 1;
182+
dendro_list = list(dendro)
183+
dendro_heights = attr(dendro, "height")
184+
while( curr_k < k ){
185+
to_split = which(dendro_heights == max(dendro_heights))
186+
to_split_dendro = dendro_list[[to_split]]
187+
to_split_height = dendro_heights[to_split]
188+
189+
children = as.list(to_split_dendro)
190+
for (i in 1:length(children)) {
191+
dendro_heights = c(dendro_heights,attr(children[[i]],"height"))
192+
dendro_list[[length(dendro_list)+1]] <- children[[i]]
193+
}
194+
# Remove to split
195+
dendro_list[to_split] = NULL
196+
dendro_heights = dendro_heights[-to_split]
197+
curr_k = curr_k-1+length(children)
198+
}
199+
# Make group vector
200+
names_orig_order = labels(dendro)[order(heatout$colInd)]
201+
groups = rep(0, times=length(names_orig_order))
202+
for (i in 1:length(dendro_list)) {
203+
groups[names_orig_order %in% labels(dendro_list[[i]])] = i
204+
}
205+
return(groups);
206+
}

R/basics.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
bg__calc_variables <- function(expr_mat) {
22
# Calc variables
3+
if (sum(expr_mat < 0) >0) {stop("Expression matrix contains negative values! M3Drop requires an expression matrix that is not log-transformed.")}
34
p = apply(expr_mat,1,function(x){y = x[!is.na(x)]; sum(y==0)/length(y)});
45
s = rowMeans(expr_mat, na.rm=T);
56
s_stderr = unlist(apply(expr_mat,1,sd))/sqrt(length(expr_mat[1,]));
@@ -15,7 +16,6 @@ hidden__invert_MM <- function (K, p) {K*(1-p)/(p)}
1516
bg__horizontal_residuals_MM_log10 <- function (K, p, s) {log(s)/log(10) - log(hidden__invert_MM(K,p))/log(10)}
1617

1718
hidden_getAUC <- function(gene, labels) {
18-
require("ROCR")
1919
ranked=rank(gene);
2020
ms = aggregate(ranked~unlist(labels),FUN=mean); #Get average score for each cluster
2121
posgroup = as.character(unlist(ms[which(ms[,2]==max(ms[,2])),1])); #Get cluster with highest average score
@@ -25,8 +25,8 @@ hidden_getAUC <- function(gene, labels) {
2525
truth = labels == posgroup
2626

2727
#Make predictions & get auc using RCOR package.
28-
pred=prediction(ranked,as.numeric(truth))
29-
val = unlist(performance(pred,"auc")@y.values)
28+
pred=ROCR::prediction(ranked,as.numeric(truth))
29+
val = unlist(ROCR::performance(pred,"auc")@y.values)
3030
pval = wilcox.test(gene[truth],gene[!truth])$p.value
3131
if (!exists("pval")) {pval=NA}
3232

@@ -44,6 +44,6 @@ M3Drop_getmarkers <- function(expr_mat, labels) {
4444
auc_df[,1] = as.numeric(as.character(auc_df[,1]))
4545
auc_df[,3] = as.numeric(as.character(auc_df[,3]))
4646
auc_df = auc_df[auc_df[,1] > 0,]
47+
auc_df = auc_df[order(-auc_df$AUC),]
4748
return(auc_df);
48-
4949
}

man/M3D_Test_Shift.Rd

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@
1111
\item{expr_mat}{a numeric matrix of expression values, columns = samples, rows = genes.}
1212
\item{genes_to_test}{vector of gene names to test.}
1313
\item{name}{string used to title the plot.}
14-
\item{background}{vector of gene names to test against.}
14+
\item{background}{vector of gene names to test against. (default = all genes)}
1515
\item{suppress.plot}{logical, whether to the fitted Michaelis-Menten curve and highlight the given set of genes to test.}
1616
}
17-
\details{Fits a Michaelis-Menten function to the dropout-rate of the provided data, then tests whether a given set of genes is significantly shifted left or right of the curve. Horizontal residuals are calculated as : \deqn{\log_{10} S - \log_{10} \frac{K*(1-P)}{P}}{log_10(S) - log_10( (K * (1 - P)) / P )}. Uses a Wilcox rank-sum test/Mann-Whitney U test to compare the residuals for the given genes to the residuals for all genes.
17+
\details{Fits a Michaelis-Menten function to the dropout-rate of the provided data, then tests whether a given set of genes (eg. pseudogenes) is significantly shifted left or right of the curve. Horizontal residuals are calculated as : \deqn{\log_{10} S - \log_{10} \frac{K*(1-P)}{P}}{log_10(S) - log_10( (K * (1 - P)) / P )}. Uses a Wilcox rank-sum test/Mann-Whitney U test to compare the residuals for the given genes to the residuals for all genes.
1818
}
1919
\value{
20-
A one row dataframe with columns: sample (sample median horizontal residual), pop (population median horizontal residual), p.value
20+
A one row dataframe with columns:
21+
sample (median horizontal residual of genes in the test set),
22+
pop (median horizontal residual of genes in the background set),
23+
p.value
2124
}
2225
\examples{
2326
library(M3DExampleData)

man/bg__horizontal_residuals_MM_log10.Rd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
residuals = bg__horizontal_residuals_MM_log10(K=9, p=gene_info$p, s=gene_info$s)
2424
}
2525
\seealso{
26-
\code{\link{M3D_Test_Shift}}
27-
\code{\link{M3D_Get_Extremes}}
26+
\code{\link{M3Drop_Test_Shift}}
27+
\code{\link{M3Drop_Get_Extremes}}
2828
}
2929
\keyword{residuals}

0 commit comments

Comments
 (0)