Skip to content

Commit a4580ae

Browse files
author
Lin Yang
committed
add the deseq2 clustering heatmap
1 parent 4355a95 commit a4580ae

File tree

4 files changed

+19418
-3
lines changed

4 files changed

+19418
-3
lines changed

modules/differential_expression/differential_expression_cohort.snakefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ rule deseq2_differential_genes:
5757
treatment = treatment,
5858
control = control,
5959
meta = config["metasheet"],
60+
pgenes = "static/deseq2/hg_pcoding.csv",
6061
path = "set +eu;source activate %s" % config['stat_root']
6162
message:
6263
"Running DESeq2 on the samples"
@@ -66,7 +67,7 @@ rule deseq2_differential_genes:
6667
shell:
6768
"{params.path}; Rscript src/differentialexpr/DESeq2.R --input {params.filelist} --type salmon \
6869
--batch {params.batch} --meta {params.meta} --tx2gene {params.tx_annot} \
69-
--condition {params.condition} --treatment {params.treatment} --control {params.control} --outpath {params.out_path}"
70+
--condition {params.condition} --pcoding {params.pgenes} --treatment {params.treatment} --control {params.control} --outpath {params.out_path}"
7071

7172

7273

src/differentialexpr/DESeq2.R

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ option_list = list(
2424
make_option(c("-r", "--treatment"), type="character", default="./",
2525
help="Treatment", metavar="character"),
2626
make_option(c("-c", "--control"), type="character", default="./",
27-
help="Control", metavar="character")
27+
help="Control", metavar="character"),
28+
make_option(c("-p", "--pcoding"), type="character", default="./",
29+
help="proding coding gene list", metavar="character")
2830
)
2931

3032

@@ -130,9 +132,10 @@ print(class(dds))
130132
print (paste("Comparing ",opt$treatment , " VS ", opt$control, sep = ""))
131133
res <- results(dds, contrast = c("Condition",c(opt$treatment,opt$control)))
132134

135+
clustering_heatmap(dds, res, opt$pcoding, opt$outpath, opt$treatment, opt$control)
133136

134137
res_final <- as.data.frame(res)
135138
res_final$Gene_name <- rownames(res_final)
136139
res_final <- res_final[c(7, 1:6)]
137140
res_final$`-log10(padj)` <- -log10(res_final$padj)
138-
write.table(res_final,file = paste(opt$outpath,opt$condition,'_',opt$treatment,'_vs_',opt$control,'_DESeq2.txt',sep = ""), quote = FALSE,sep = "\t")
141+
write.table(res_final,file = paste(opt$outpath,opt$condition,'_',opt$treatment,'_vs_',opt$control,'_DESeq2.txt',sep = ""), quote = FALSE,sep = "\t")
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
library(vegan)
2+
library(pheatmap)
3+
4+
clustering_heatmap <- function(dds, res, pgenes, outpath, treatment, control) {
5+
6+
print("loding the human protein coding genes...")
7+
pgenes <- read.csv(pgenes)
8+
deseq_result <- res
9+
deseq_result <- as.data.frame(deseq_result)
10+
11+
deseq_result <- deseq_result[rownames(deseq_result) %in% pgenes[[1]],]
12+
deseq_result <- na.omit(deseq_result)
13+
14+
#deseq_result <-deseq_result[order(as.numeric(deseq_result[,"stat"])),]
15+
16+
#get the normalized count matrix
17+
cdata <- as.data.frame(counts(dds, normalized = TRUE))
18+
cdata <- cdata[rownames(cdata) %in% rownames(deseq_result),]
19+
20+
#cluster the sample based on phenotype
21+
coldata <- as.data.frame(colData(dds))
22+
coldata <- coldata[order(coldata$Condition, decreasing = TRUE),]
23+
24+
cdata <- cdata[rownames(coldata)]
25+
26+
print("Calculating the expression density...")
27+
div <- diversity(cdata, index = "invsimpson")
28+
29+
cdata <- cbind(cdata, div)
30+
31+
deseq_result <- merge(deseq_result, cdata[c("div")], by = 0, all = FALSE)
32+
33+
#calculate the median of diversity for up-regulated genes and down-regulated genes
34+
up_div <- median(deseq_result[deseq_result$log2FoldChange > 0 & deseq_result$pvalue <= 0.05,]$div)
35+
down_div <- median(deseq_result[deseq_result$log2FoldChange < 0 & deseq_result$pvalue <= 0.05,]$div)
36+
37+
#rank the genes
38+
deseq_result <-deseq_result[order(as.numeric(deseq_result[,"stat"])),]
39+
40+
down_50 <- deseq_result[deseq_result$div > down_div,]$Row.names[1:50]
41+
up_50 <- tail(deseq_result[deseq_result$div > up_div,], 50)$Row.names
42+
43+
# #cluster the sample based on phenotype
44+
# coldata <- as.data.frame(colData(dds))
45+
# coldata <- coldata[order(coldata$Condition, decreasing = TRUE),]
46+
47+
48+
df <- cdata[c(down_50, up_50),]
49+
#df <- df[rownames(coldata)]
50+
51+
df <- df[-ncol(df)]
52+
#calculate the z-score across the samples
53+
a <- t(scale(t(df)))
54+
55+
pdf(paste0(outpath,'heatmap_',treatment,'_vs_',control,'.pdf'), width = 5, height = 20)
56+
pheatmap(a, cluster_cols = FALSE, cluster_rows = FALSE, annotation = coldata["Condition"])
57+
dev.off()
58+
}
59+
#deseq_result <- res
60+
#deseq_result <- as.data.frame(deseq_result)
61+
#
62+
#deseq_result <- deseq_result[rownames(deseq_result) %in% pgenes$symbol,]
63+
#deseq_result <- na.omit(deseq_result)
64+
#
65+
##deseq_result <-deseq_result[order(as.numeric(deseq_result[,"stat"])),]
66+
#
67+
##get the normalized count matrix
68+
#cdata <- as.data.frame(counts(dds, normalized = TRUE))
69+
#cdata <- cdata[rownames(cdata) %in% rownames(deseq_result),]
70+
#
71+
##cluster the sample based on phenotype
72+
#coldata <- as.data.frame(colData(dds))
73+
#coldata <- coldata[order(coldata$Condition, decreasing = TRUE),]
74+
#
75+
#cdata <- cdata[rownames(coldata)]
76+
#
77+
#div <- diversity(cdata, index = "invsimpson")
78+
#
79+
#cdata <- cbind(cdata, div)
80+
#
81+
#deseq_result <- merge(deseq_result, cdata[c("div")], by = 0, all = FALSE)
82+
#
83+
##calculate the median of diversity for up-regulated genes and down-regulated genes
84+
#up_div <- median(deseq_result[deseq_result$log2FoldChange > 0 & deseq_result$pvalue <= 0.05,]$div)
85+
#down_div <- median(deseq_result[deseq_result$log2FoldChange < 0 & deseq_result$pvalue <= 0.05,]$div)
86+
#
87+
##rank the genes
88+
#deseq_result <-deseq_result[order(as.numeric(deseq_result[,"stat"])),]
89+
#
90+
#down_50 <- deseq_result[deseq_result$div > down_div,]$Row.names[1:50]
91+
#up_50 <- tail(deseq_result[deseq_result$div > up_div,], 50)$Row.names
92+
#
93+
## #cluster the sample based on phenotype
94+
## coldata <- as.data.frame(colData(dds))
95+
## coldata <- coldata[order(coldata$Condition, decreasing = TRUE),]
96+
#
97+
#
98+
#df <- cdata[c(down_50, up_50),]
99+
##df <- df[rownames(coldata)]
100+
#
101+
#df <- df[-ncol(df)]
102+
##calculate the z-score across the samples
103+
#a <- t(scale(t(df)))
104+
#
105+
#pdf("/Users/linyang/Documents/Rplot08.pdf", width = 5, height = 20)
106+
#pheatmap(a, cluster_cols = FALSE, cluster_rows = FALSE, annotation = coldata["Condition"])
107+
#dev.off()
108+
#

0 commit comments

Comments
 (0)