@@ -164,3 +164,206 @@ vocalno_plot <- function(diff_data = NULL,
164164 }
165165
166166}
167+
168+
169+
170+
171+ # ' Create ECDF Plot with Optional Boxplot Insert
172+ # '
173+ # ' This function creates an empirical cumulative distribution function (ECDF) plot
174+ # ' for RNA-seq, Ribosome profiling, or Translation Efficiency (TE) data. It can
175+ # ' optionally include an inserted boxplot and performs statistical comparisons
176+ # ' between samples.
177+ # '
178+ # ' @param normed_data A list containing normalized data with elements:
179+ # ' \itemize{
180+ # ' \item \code{tpm.rna}: TPM values for RNA-seq data
181+ # ' \item \code{tpm.ribo}: TPM values for Ribosome profiling data
182+ # ' \item \code{te}: Translation efficiency values
183+ # ' }
184+ # ' Each element should be a data frame with columns: gene_id, gene_name,
185+ # ' gene_biotype, and sample columns.
186+ # ' @param type Character string specifying data type. One of "rna", "ribo", or "te".
187+ # ' Default is c("rna", "ribo", "te").
188+ # ' @param samples_selected Character vector of sample names to include in the plot.
189+ # ' If NULL, all samples will be used.
190+ # ' @param ref_group Character string specifying the reference group for statistical
191+ # ' comparisons. Should match one of the sample names.
192+ # ' @param colors Named character vector of colors for each sample. If NULL,
193+ # ' default ggplot2 colors will be used.
194+ # ' @param ecdf_xlim Numeric vector of length 2 specifying x-axis limits for the
195+ # ' ECDF plot. If NULL, automatic limits will be used.
196+ # ' @param box_ylim Numeric vector of length 2 specifying y-axis limits for the
197+ # ' boxplot insert. If NULL, automatic limits will be used.
198+ # ' @param insert_box Logical indicating whether to insert a boxplot. Default is FALSE.
199+ # ' @param x_pos Numeric value specifying the x position of the boxplot insert
200+ # ' (as proportion of plot width). Default is 0.45.
201+ # ' @param y_pos Numeric value specifying the y position of the boxplot insert
202+ # ' (as proportion of plot height). Default is 0.4.
203+ # ' @param width Numeric value specifying the width of the boxplot insert
204+ # ' (as proportion of plot width). Default is 0.2.
205+ # ' @param height Numeric value specifying the height of the boxplot insert
206+ # ' (as proportion of plot height). Default is 0.65.
207+ # '
208+ # ' @return A list containing:
209+ # ' \itemize{
210+ # ' \item \code{plot}: A ggplot2 object containing the ECDF plot (with optional boxplot insert)
211+ # ' \item \code{statistics}: A data frame with statistical comparison results from Wilcoxon tests
212+ # ' }
213+ # '
214+ # ' @details
215+ # ' The function performs the following steps:
216+ # ' \enumerate{
217+ # ' \item Selects the appropriate data based on the \code{type} parameter
218+ # ' \item Converts data from wide to long format
219+ # ' \item Filters out NA and infinite values
220+ # ' \item Performs pairwise Wilcoxon tests using the specified reference group
221+ # ' \item Creates an ECDF plot with log2-transformed values
222+ # ' \item Optionally adds a boxplot insert using cowplot
223+ # ' }
224+ # '
225+ # ' @note
226+ # ' This function requires the following packages:
227+ # ' \itemize{
228+ # ' \item \code{ggplot2}
229+ # ' \item \code{reshape2}
230+ # ' \item \code{dplyr}
231+ # ' \item \code{ggpubr}
232+ # ' \item \code{cowplot} (only if \code{insert_box = TRUE})
233+ # ' }
234+ # '
235+ # ' @examples
236+ # ' \dontrun{
237+ # ' # Basic ECDF plot for TE data
238+ # ' result <- ecdf_plot(normed_data = my_data,
239+ # ' type = "te",
240+ # ' samples_selected = c("sample1", "sample2", "sample3"),
241+ # ' ref_group = "sample1")
242+ # '
243+ # ' # ECDF plot with boxplot insert and custom colors
244+ # ' result <- ecdf_plot(normed_data = my_data,
245+ # ' type = "te",
246+ # ' samples_selected = c("sample1", "sample2", "sample3"),
247+ # ' ref_group = "sample1",
248+ # ' colors = c("sample1" = "blue", "sample2" = "red", "sample3" = "green"),
249+ # ' insert_box = TRUE,
250+ # ' x_pos = 0.5, y_pos = 0.3)
251+ # '
252+ # ' # Access the plot and statistics
253+ # ' print(result$plot)
254+ # ' print(result$statistics)
255+ # ' }
256+ # '
257+ # ' @seealso
258+ # ' \code{\link[ggplot2]{stat_ecdf}}, \code{\link[ggpubr]{compare_means}},
259+ # ' \code{\link[cowplot]{ggdraw}}
260+ # '
261+ # ' @export
262+ ecdf_plot <- function (normed_data = NULL ,
263+ type = c(" rna" , " ribo" , " te" ),
264+ samples_selected = NULL ,
265+ ref_group = NULL ,
266+ colors = NULL ,
267+ ecdf_xlim = NULL ,
268+ box_ylim = NULL ,
269+ insert_box = FALSE ,
270+ x_pos = 0.45 , y_pos = 0.4 ,
271+ width = 0.2 , height = 0.65 ){
272+ # check data
273+ if (type == " rna" ){
274+ df <- normed_data $ tpm.rna
275+ }else if (type == " ribo" ){
276+ df <- normed_data $ tpm.ribo
277+ }else {
278+ df <- normed_data $ te
279+ }
280+
281+ # wide format to long
282+ df.long <- tidyr :: pivot_longer(cols = - c(gene_id , gene_name , gene_biotype ),
283+ names_to = " sample" ,
284+ values_to = " value" ) %> %
285+ stats :: na.omit() %> %
286+ dplyr :: filter(! is.infinite(value ))
287+
288+ # filter samples
289+ df.long <- subset(df.long , sample %in% samples_selected )
290+
291+ # ===========================================================================
292+ # stastics
293+ if (! requireNamespace(" ggpubr" , quietly = TRUE )) {
294+ stop(" Package 'ggpubr' is required. Please install it." )
295+ }
296+
297+ stcs <- ggpubr :: compare_means(data = df.long ,
298+ formula = value ~ sample ,
299+ ref.group = ref_group ,
300+ method = " wilcox.test" )
301+
302+ # ===========================================================================
303+ # plot
304+
305+ if (is.null(colors )){
306+ cols <- NULL
307+ cols2 <- NULL
308+ }else {
309+ cols <- scale_color_manual(values = colors ,name = " " )
310+ cols2 <- scale_fill_manual(values = colors ,name = " " )
311+ }
312+
313+ if (is.null(ecdf_xlim )){
314+ xlims <- NULL
315+ }else {
316+ xlims <- xlim(ecdf_xlim )
317+ }
318+
319+ if (is.null(box_ylim )){
320+ ylims <- NULL
321+ }else {
322+ ylims <- ylim(box_ylim )
323+ }
324+
325+ ecdf <-
326+ ggplot(df.long ) +
327+ stat_ecdf(aes(x = log2(value ), color = sample ),linewidth = 0.3 ) +
328+ theme_bw() +
329+ theme(panel.grid = element_blank(),
330+ aspect.ratio = 1 ,
331+ strip.text = element_text(face = " bold" ,size = rel(1 )),
332+ axis.text = element_text(colour = " black" )) +
333+ cols + xlims +
334+ ylab(" Cumulative Fraction" ) +
335+ xlab(" log2(Translation efficiency)" )
336+
337+ # ============================================================================
338+ # insert boxplot
339+ if (insert_box == TRUE ){
340+ ecdf.box <-
341+ ggplot(df.long ) +
342+ geom_boxplot(aes(x = sample ,y = log2(value ),fill = sample ),
343+ width = 0.6 ,notch = T ,
344+ outliers = F ,show.legend = F ) +
345+ theme_bw() +
346+ theme(panel.grid = element_blank(),
347+ axis.text = element_blank(),
348+ plot.background = element_blank(),
349+ axis.ticks = element_blank(),
350+ strip.text = element_text(face = " bold" ,size = rel(1 ))) +
351+ cols2 + ylims +
352+ xlab(" " ) + ylab(" log2(TE)" )
353+
354+ # INSERT
355+ if (! requireNamespace(" cowplot" , quietly = TRUE )) {
356+ stop(" Package 'cowplot' is required. Please install it." )
357+ }
358+
359+ p <- cowplot :: ggdraw(ecdf ) +
360+ cowplot :: draw_plot(plot = ecdf.box ,
361+ x = x_pos ,y = y_pos ,
362+ width = width ,height = height ,
363+ vjust = 0.5 ,hjust = 0 )
364+
365+ return (list (plot = p ,statistics = stcs ))
366+ }else {
367+ return (list (plot = ecdf ,statistics = stcs ))
368+ }
369+ }
0 commit comments