Skip to content

Commit 1e9d253

Browse files
author
Ubuntu
committed
added new fusion boxplot
1 parent 6c35a55 commit 1e9d253

File tree

1 file changed

+93
-11
lines changed

1 file changed

+93
-11
lines changed

src/mutation/fusion_plot.R

Lines changed: 93 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ suppressMessages(library(optparse))
88
suppressMessages(library(data.table))
99
suppressMessages(library(ggplot2))
1010
suppressMessages(library(tidyverse))
11+
suppressMessages(library(reshape))
12+
suppressMessages(library(stringr))
1113

1214

1315
option_list = list(
@@ -127,20 +129,26 @@ colnames(pradafusion) <- c("Gene1", "Gene2", "Transcript1_Len", "Transcript2_Len
127129
###process the file for plotting
128130
pradafusion<-na.omit(pradafusion)
129131
pradafusion$BitScore <- log(pradafusion$BitScore, 10)
132+
130133
pradafusion<-pradafusion%>%
131134
unite(FusionName, Gene1, Gene2, sep="--")
132135
highlight_genes <- pradafusion %>%
133136
filter((Evalue < 0.05 & Align_Len > 1000) | (Evalue <0.05 & BitScore > 2))
134137

135138

136-
png(paste(outdir, pheno, "_prada_homology.png", sep = ""), width = 800, height = 700)
139+
png(paste(outdir, pheno, "_prada_homology.png", sep = ""), width = 1600, height = 1400, res = 300)
137140
pradafusion %>%
138141
ggplot(aes(x=BitScore,y=Evalue,size=Align_Len)) +
139142
geom_point(alpha=0.3) +
140143
geom_point(data=highlight_genes,
141144
aes(x=BitScore,y=Evalue),
142145
color="#e6550d", alpha=0.6)+
143-
labs(x ="log10(BitScore)", y = "Evalue") + theme_bw()
146+
geom_label_repel(data = highlight_genes, aes(label = FusionName), size = 1.5) +
147+
labs(x ="log10(BitScore)", y = "Evalue") + theme_bw() +
148+
theme(axis.text=element_text(angle=0,size=8,face = "bold",hjust=0.5),
149+
axis.title = element_text(size = 8,face = "bold"),
150+
legend.text = element_text(size = 8),
151+
legend.title = element_text(size = 8))
144152
dev.off()
145153

146154
#remove the homogous fusion gene pair
@@ -154,33 +162,107 @@ write.table(merge.fusion.df, paste(outdir,pheno,"_fusion_gene_table.txt", sep =
154162

155163

156164
### extract expression of oncogene,tsg and protein kinase genes
157-
png(paste(outdir, pheno,"_fusion_gene_plot.png", sep = ""), width = 1600, height = 1400)
165+
png(paste(outdir, pheno,"_fusion_gene_plot.png", sep = ""), width = 1600, height = 1400, res = 300)
158166
#pdf(paste(outdir, pheno,"_fusion_gene_plot.pdf", sep = ""), width = 8, height = 7)
159167
ggplot(merge.fusion.df, aes(x=Phenotype, y=log10(Expression+1))) +
160168
geom_violin(trim=TRUE) +
161-
geom_jitter(aes(color = Type),shape=16, size = 4#, position=position_jitter(0.15)
169+
geom_jitter(aes(color = Type),shape=16#, size = 4#, position=position_jitter(0.15)
162170
)+
163171
theme_bw()+
164172
scale_colour_manual(name = "Type", values = c(Oncogene="#a6bddb",Tumor_Suppressor="#99d8c9", Both="#4DAF4A",Cancer_Driver="grey"))+
165173
new_scale_color() +
166174
geom_label_repel(data = subset(merge.fusion.df, Type != "Others"),
167175
aes(label = Gene, fill = Type), #alpha = 0.5,#fill = "white",
168-
fontface = 'bold',label.size = 0.15, family = "serif"
176+
fontface = 'bold',label.size = 0.15, family = "serif",size = 2.5
169177
)+
170178
scale_fill_manual(values = c(Oncogene="#a6bddb",Tumor_Suppressor="#99d8c9", Both="#4DAF4A",Cancer_Driver="grey"),guide = FALSE)+
171179
scale_colour_manual(name = "Target", values = c(left_target="#636363",right_target="#e6550d"))+
172180
scale_x_discrete(name ="", labels=c("APre" = "Pre")) +
173-
theme(axis.text.x=element_text(angle=0,size=20,face = "bold",hjust=0.5),
174-
axis.text.y=element_text(size=20,face = "bold",hjust=1),
181+
theme(axis.text.x=element_text(angle=0,size=8,face = "bold",hjust=0.5),
182+
axis.text.y=element_text(size=8,face = "bold",hjust=1),
175183
axis.title.x = element_blank(),
176-
axis.title.y = element_text(size = 20,face = "bold"),
177-
legend.text = element_text(size = 20),
178-
legend.title = element_text(size = 20),
179-
strip.text = element_text(size = 20, face = "bold"),
184+
axis.title.y = element_text(size = 8,face = "bold"),
185+
legend.text = element_text(size = 8),
186+
legend.title = element_text(size = 8),
187+
strip.text = element_text(size = 8, face = "bold"),
180188
legend.position = "right")
181189
dev.off()
182190

183191

184192

185193

194+
##fusion box plot
195+
type <- c("Oncogene", "Tumor_Suppressor", "Cancer_Driver", "Others")
196+
197+
ta <- merge.fusion.df
198+
199+
#preprocess the data
200+
processed_ta <- NULL
201+
for (i in unique(ta$Phenotype)) {
202+
tmp_ta <- ta[ta$Phenotype == i,]
203+
tmp_ta_others <- tmp_ta[tmp_ta$Type == "Others",]
204+
tmp_ta_sig <- tmp_ta[tmp_ta$Type != "Others",]
205+
206+
#remove the duplicates for others pairs, and calculating their average expression
207+
tmp_ta_others <- tmp_ta_others %>% group_by(Gene) %>% mutate(Expression = mean(Expression))
208+
tmp_ta_others <- tmp_ta_others[!duplicated(tmp_ta_others$Gene),]
209+
tmp_ta_others <- tmp_ta_others %>% group_by(Gene) %>% mutate(ID=1:n())
210+
211+
#group multiple hits for signifcant gene pairs
212+
tmp_ta_sig <- tmp_ta_sig %>% group_by(Gene) %>% mutate(ID=1:n())
213+
214+
tmp_ta <- rbind(tmp_ta_others, tmp_ta_sig)
215+
216+
processed_ta <- rbind(processed_ta, tmp_ta)
217+
}
218+
219+
#re-order the figure followed: Oncogenes, Tumor suppressor, Cancer driver, Others
220+
reorder_ta <- NULL
221+
for (t in type) {
222+
tmp <- processed_ta[processed_ta$Type ==t,]
223+
tmp <- tmp[order(tmp$Expression, decreasing = TRUE),]
224+
reorder_ta <- rbind(reorder_ta, tmp)
225+
}
226+
227+
228+
reorder_ta$Gene <- factor(reorder_ta$Gene, levels = rev(unique(reorder_ta$Gene)))
229+
reorder_ta <- reorder_ta[reorder_ta$Expression != 0,]
230+
231+
232+
####labeling
233+
label <- reorder_ta[reorder_ta$Pos != "Others",]
234+
test <- str_split_fixed(label$Gene, "--", 2)
235+
label$left <- test[,1]
236+
label$right <- test[,2]
237+
label$Pos <- ifelse(label$Pos == "left_target", label$left, ifelse(label$Pos == "right_target", label$right, "both"))
238+
label <- label[colnames(reorder_ta)]
239+
label <- label[!duplicated(label$Gene),]
240+
###
241+
242+
243+
###define the color
244+
color_ta <- data.frame(terms = type, color = c("#e34a33", "#a6bddb", "#fc9272", "#bdbdbd"))
245+
iterms <- as.character(color_ta[color_ta$terms %in% unique(reorder_ta$Type),]$terms)
246+
color <- as.character(color_ta[color_ta$terms %in% iterms,]$color)
247+
248+
png(paste(outdir, "fusion_boxplot.png", sep = ""), width = 2000, height = 1400, res = 300)
249+
p <- ggplot(reorder_ta, aes(x = Gene, y = Expression, fill = Type, group=factor(ID))) +
250+
geom_bar(stat = "identity",position=position_dodge(), color = "#636363") +
251+
facet_grid(Phenotype ~ ., scales = "free_y", space = "free_y") + #, scales = "free_y", space = "free_y") +
252+
#facet_wrap(~Phenotype, ncol = 1, strip.position="right", scales = "free_y") + coord_flip() + theme_bw() +
253+
geom_text(data = label, aes(label=Pos), position = position_dodge(.9), hjust = -0.1, size = 2) +
254+
coord_flip() + theme_bw() +
255+
scale_fill_manual(breaks = iterms, values = color) +
256+
labs(x = "Gene Pairs") +
257+
theme(axis.text.x= element_text(size=8),
258+
axis.text.y = element_text(size=6),
259+
axis.title = element_text(size = 8,face = "bold"),
260+
legend.title = element_text(size = 8),
261+
legend.text = element_text(size = 6),
262+
strip.text = element_text(size = 8, face = "bold"))
263+
p + expand_limits(y = c(min(reorder_ta$Expression), max(reorder_ta$Expression)*1.1))
264+
dev.off()
265+
266+
267+
186268

0 commit comments

Comments
 (0)