@@ -8,6 +8,8 @@ suppressMessages(library(optparse))
8
8
suppressMessages(library(data.table ))
9
9
suppressMessages(library(ggplot2 ))
10
10
suppressMessages(library(tidyverse ))
11
+ suppressMessages(library(reshape ))
12
+ suppressMessages(library(stringr ))
11
13
12
14
13
15
option_list = list (
@@ -127,20 +129,26 @@ colnames(pradafusion) <- c("Gene1", "Gene2", "Transcript1_Len", "Transcript2_Len
127
129
# ##process the file for plotting
128
130
pradafusion <- na.omit(pradafusion )
129
131
pradafusion $ BitScore <- log(pradafusion $ BitScore , 10 )
132
+
130
133
pradafusion <- pradafusion %> %
131
134
unite(FusionName , Gene1 , Gene2 , sep = " --" )
132
135
highlight_genes <- pradafusion %> %
133
136
filter((Evalue < 0.05 & Align_Len > 1000 ) | (Evalue < 0.05 & BitScore > 2 ))
134
137
135
138
136
- png(paste(outdir , pheno , " _prada_homology.png" , sep = " " ), width = 800 , height = 700 )
139
+ png(paste(outdir , pheno , " _prada_homology.png" , sep = " " ), width = 1600 , height = 1400 , res = 300 )
137
140
pradafusion %> %
138
141
ggplot(aes(x = BitScore ,y = Evalue ,size = Align_Len )) +
139
142
geom_point(alpha = 0.3 ) +
140
143
geom_point(data = highlight_genes ,
141
144
aes(x = BitScore ,y = Evalue ),
142
145
color = " #e6550d" , alpha = 0.6 )+
143
- labs(x = " log10(BitScore)" , y = " Evalue" ) + theme_bw()
146
+ geom_label_repel(data = highlight_genes , aes(label = FusionName ), size = 1.5 ) +
147
+ labs(x = " log10(BitScore)" , y = " Evalue" ) + theme_bw() +
148
+ theme(axis.text = element_text(angle = 0 ,size = 8 ,face = " bold" ,hjust = 0.5 ),
149
+ axis.title = element_text(size = 8 ,face = " bold" ),
150
+ legend.text = element_text(size = 8 ),
151
+ legend.title = element_text(size = 8 ))
144
152
dev.off()
145
153
146
154
# remove the homogous fusion gene pair
@@ -154,33 +162,107 @@ write.table(merge.fusion.df, paste(outdir,pheno,"_fusion_gene_table.txt", sep =
154
162
155
163
156
164
# ## extract expression of oncogene,tsg and protein kinase genes
157
- png(paste(outdir , pheno ," _fusion_gene_plot.png" , sep = " " ), width = 1600 , height = 1400 )
165
+ png(paste(outdir , pheno ," _fusion_gene_plot.png" , sep = " " ), width = 1600 , height = 1400 , res = 300 )
158
166
# pdf(paste(outdir, pheno,"_fusion_gene_plot.pdf", sep = ""), width = 8, height = 7)
159
167
ggplot(merge.fusion.df , aes(x = Phenotype , y = log10(Expression + 1 ))) +
160
168
geom_violin(trim = TRUE ) +
161
- geom_jitter(aes(color = Type ),shape = 16 , size = 4 # , position=position_jitter(0.15)
169
+ geom_jitter(aes(color = Type ),shape = 16 # , size = 4#, position=position_jitter(0.15)
162
170
)+
163
171
theme_bw()+
164
172
scale_colour_manual(name = " Type" , values = c(Oncogene = " #a6bddb" ,Tumor_Suppressor = " #99d8c9" , Both = " #4DAF4A" ,Cancer_Driver = " grey" ))+
165
173
new_scale_color() +
166
174
geom_label_repel(data = subset(merge.fusion.df , Type != " Others" ),
167
175
aes(label = Gene , fill = Type ), # alpha = 0.5,#fill = "white",
168
- fontface = ' bold' ,label.size = 0.15 , family = " serif"
176
+ fontface = ' bold' ,label.size = 0.15 , family = " serif" , size = 2.5
169
177
)+
170
178
scale_fill_manual(values = c(Oncogene = " #a6bddb" ,Tumor_Suppressor = " #99d8c9" , Both = " #4DAF4A" ,Cancer_Driver = " grey" ),guide = FALSE )+
171
179
scale_colour_manual(name = " Target" , values = c(left_target = " #636363" ,right_target = " #e6550d" ))+
172
180
scale_x_discrete(name = " " , labels = c(" APre" = " Pre" )) +
173
- theme(axis.text.x = element_text(angle = 0 ,size = 20 ,face = " bold" ,hjust = 0.5 ),
174
- axis.text.y = element_text(size = 20 ,face = " bold" ,hjust = 1 ),
181
+ theme(axis.text.x = element_text(angle = 0 ,size = 8 ,face = " bold" ,hjust = 0.5 ),
182
+ axis.text.y = element_text(size = 8 ,face = " bold" ,hjust = 1 ),
175
183
axis.title.x = element_blank(),
176
- axis.title.y = element_text(size = 20 ,face = " bold" ),
177
- legend.text = element_text(size = 20 ),
178
- legend.title = element_text(size = 20 ),
179
- strip.text = element_text(size = 20 , face = " bold" ),
184
+ axis.title.y = element_text(size = 8 ,face = " bold" ),
185
+ legend.text = element_text(size = 8 ),
186
+ legend.title = element_text(size = 8 ),
187
+ strip.text = element_text(size = 8 , face = " bold" ),
180
188
legend.position = " right" )
181
189
dev.off()
182
190
183
191
184
192
185
193
194
+ # #fusion box plot
195
+ type <- c(" Oncogene" , " Tumor_Suppressor" , " Cancer_Driver" , " Others" )
196
+
197
+ ta <- merge.fusion.df
198
+
199
+ # preprocess the data
200
+ processed_ta <- NULL
201
+ for (i in unique(ta $ Phenotype )) {
202
+ tmp_ta <- ta [ta $ Phenotype == i ,]
203
+ tmp_ta_others <- tmp_ta [tmp_ta $ Type == " Others" ,]
204
+ tmp_ta_sig <- tmp_ta [tmp_ta $ Type != " Others" ,]
205
+
206
+ # remove the duplicates for others pairs, and calculating their average expression
207
+ tmp_ta_others <- tmp_ta_others %> % group_by(Gene ) %> % mutate(Expression = mean(Expression ))
208
+ tmp_ta_others <- tmp_ta_others [! duplicated(tmp_ta_others $ Gene ),]
209
+ tmp_ta_others <- tmp_ta_others %> % group_by(Gene ) %> % mutate(ID = 1 : n())
210
+
211
+ # group multiple hits for signifcant gene pairs
212
+ tmp_ta_sig <- tmp_ta_sig %> % group_by(Gene ) %> % mutate(ID = 1 : n())
213
+
214
+ tmp_ta <- rbind(tmp_ta_others , tmp_ta_sig )
215
+
216
+ processed_ta <- rbind(processed_ta , tmp_ta )
217
+ }
218
+
219
+ # re-order the figure followed: Oncogenes, Tumor suppressor, Cancer driver, Others
220
+ reorder_ta <- NULL
221
+ for (t in type ) {
222
+ tmp <- processed_ta [processed_ta $ Type == t ,]
223
+ tmp <- tmp [order(tmp $ Expression , decreasing = TRUE ),]
224
+ reorder_ta <- rbind(reorder_ta , tmp )
225
+ }
226
+
227
+
228
+ reorder_ta $ Gene <- factor (reorder_ta $ Gene , levels = rev(unique(reorder_ta $ Gene )))
229
+ reorder_ta <- reorder_ta [reorder_ta $ Expression != 0 ,]
230
+
231
+
232
+ # ###labeling
233
+ label <- reorder_ta [reorder_ta $ Pos != " Others" ,]
234
+ test <- str_split_fixed(label $ Gene , " --" , 2 )
235
+ label $ left <- test [,1 ]
236
+ label $ right <- test [,2 ]
237
+ label $ Pos <- ifelse(label $ Pos == " left_target" , label $ left , ifelse(label $ Pos == " right_target" , label $ right , " both" ))
238
+ label <- label [colnames(reorder_ta )]
239
+ label <- label [! duplicated(label $ Gene ),]
240
+ # ##
241
+
242
+
243
+ # ##define the color
244
+ color_ta <- data.frame (terms = type , color = c(" #e34a33" , " #a6bddb" , " #fc9272" , " #bdbdbd" ))
245
+ iterms <- as.character(color_ta [color_ta $ terms %in% unique(reorder_ta $ Type ),]$ terms )
246
+ color <- as.character(color_ta [color_ta $ terms %in% iterms ,]$ color )
247
+
248
+ png(paste(outdir , " fusion_boxplot.png" , sep = " " ), width = 2000 , height = 1400 , res = 300 )
249
+ p <- ggplot(reorder_ta , aes(x = Gene , y = Expression , fill = Type , group = factor (ID ))) +
250
+ geom_bar(stat = " identity" ,position = position_dodge(), color = " #636363" ) +
251
+ facet_grid(Phenotype ~ . , scales = " free_y" , space = " free_y" ) + # , scales = "free_y", space = "free_y") +
252
+ # facet_wrap(~Phenotype, ncol = 1, strip.position="right", scales = "free_y") + coord_flip() + theme_bw() +
253
+ geom_text(data = label , aes(label = Pos ), position = position_dodge(.9 ), hjust = - 0.1 , size = 2 ) +
254
+ coord_flip() + theme_bw() +
255
+ scale_fill_manual(breaks = iterms , values = color ) +
256
+ labs(x = " Gene Pairs" ) +
257
+ theme(axis.text.x = element_text(size = 8 ),
258
+ axis.text.y = element_text(size = 6 ),
259
+ axis.title = element_text(size = 8 ,face = " bold" ),
260
+ legend.title = element_text(size = 8 ),
261
+ legend.text = element_text(size = 6 ),
262
+ strip.text = element_text(size = 8 , face = " bold" ))
263
+ p + expand_limits(y = c(min(reorder_ta $ Expression ), max(reorder_ta $ Expression )* 1.1 ))
264
+ dev.off()
265
+
266
+
267
+
186
268
0 commit comments