|
| 1 | +#### libraries #### |
| 2 | +library(data.table) |
| 3 | +library(tidyverse) |
| 4 | +library(patchwork) |
| 5 | +library(ggplot2) |
| 6 | +library(ggnewscale) |
| 7 | +# for interactive plotting |
| 8 | +library(plotly) |
| 9 | +library(htmlwidgets) |
| 10 | +# set plot base size |
| 11 | +theme_set(theme_minimal(base_size = 6)) |
| 12 | + |
| 13 | +#### configs #### |
| 14 | +# input |
| 15 | +sample_annotation_path <- snakemake@input[["sample_annotation"]] |
| 16 | +sample_annotation_w_QC_path <- snakemake@input[["sample_annotation_w_QC"]] |
| 17 | + |
| 18 | +# output |
| 19 | +sample_annotation_plot_path <- snakemake@output[["sample_annotation_plot"]] |
| 20 | +sample_annotation_html_path <- snakemake@output[["sample_annotation_html"]] |
| 21 | + |
| 22 | +#### load & prepare data #### |
| 23 | +# load data |
| 24 | +sample_annotation <- data.frame(fread(file.path(sample_annotation_path), header=TRUE), row.names=1) |
| 25 | +anno <- data.frame(fread(file.path(sample_annotation_w_QC_path), header=TRUE), row.names=1) |
| 26 | + |
| 27 | +# determine QC (pipeline provided) columns |
| 28 | +qc_cols <- setdiff(names(anno), names(sample_annotation)) |
| 29 | + |
| 30 | +# determine metadata (user provided) columns by removing non-numeric columns that are unique for each row (e.g., bam_file) |
| 31 | +sample_annotation <- sample_annotation %>% select(where(\(.x){ n <- dplyr::n_distinct(na.omit(.x)); (is.numeric(.x) || n < nrow(sample_annotation)) })) |
| 32 | +meta_cols <- names(sample_annotation) |
| 33 | + |
| 34 | +# drop columns with no variation (e.g., read_type) |
| 35 | +has_var <- function(x) length(unique(na.omit(x))) > 1 |
| 36 | +qc_cols <- keep(qc_cols, ~ has_var(anno[[.x]])) |
| 37 | +meta_cols <- keep(meta_cols, ~ has_var(anno[[.x]])) |
| 38 | + |
| 39 | +# collapse "duplicates" (ie redundant columns) only among non-numeric |
| 40 | +num_cols <- meta_cols[vapply(anno[meta_cols], is.numeric, logical(1))] |
| 41 | +cat_cols <- setdiff(meta_cols, num_cols) |
| 42 | +sig <- vapply(cat_cols, |
| 43 | + \(col) paste(match(anno[[col]], unique(anno[[col]])), collapse = "|"), |
| 44 | + character(1)) |
| 45 | +meta_cols <- c(cat_cols[!duplicated(sig)], num_cols) |
| 46 | + |
| 47 | +#### Z-score & cluster QC data #### |
| 48 | +qc_mat <- anno |> select(all_of(qc_cols)) |> scale() |> as.matrix() |
| 49 | + |
| 50 | +row_ord <- hclust(dist(qc_mat))$order |
| 51 | +col_ord <- hclust(dist(t(qc_mat)))$order |
| 52 | + |
| 53 | +qc_long <- as_tibble(qc_mat[row_ord, col_ord], rownames = "sample") |> |
| 54 | + pivot_longer(-sample, names_to = "metric", values_to = "z") |
| 55 | + |
| 56 | +#### prepare metadata for plotting #### |
| 57 | +meta_long <- anno[row_ord, meta_cols] %>% |
| 58 | + mutate(sample = rownames(anno)[row_ord]) %>% |
| 59 | + mutate(across(-sample, as.character)) %>% |
| 60 | + pivot_longer(-sample, names_to = "meta", values_to = "value") %>% |
| 61 | + group_by(meta) %>% |
| 62 | + mutate(num_val = suppressWarnings(as.numeric(value)), |
| 63 | + type = if (all(!is.na(num_val))) "numeric" else "factor", |
| 64 | + col = if (type[1] == "numeric") { |
| 65 | + scales::col_numeric("plasma", |
| 66 | + domain = range(num_val, na.rm = TRUE))(num_val) |
| 67 | + } else { |
| 68 | + pal <- scales::hue_pal(l = 65)(n_distinct(value)) |
| 69 | + setNames(pal, sort(unique(value)))[value] |
| 70 | + } |
| 71 | + ) %>% |
| 72 | + ungroup() %>% |
| 73 | + select(-num_val) |
| 74 | + |
| 75 | +#### embed ALL metadata into a tooltip string of interactive plot #### |
| 76 | +meta_txt <- anno[row_ord, meta_cols] %>% mutate(across(everything(), as.character)) |
| 77 | +meta_txt <- pmap_chr(meta_txt, \(...) { |
| 78 | + vals <- c(...) |
| 79 | + paste(paste(names(vals), vals, sep = ": "), collapse = "<br>") |
| 80 | + }) |
| 81 | +names(meta_txt) <- rownames(anno)[row_ord] |
| 82 | + |
| 83 | +#### plot heatmaps #### |
| 84 | + |
| 85 | +#### QC heatmap #### |
| 86 | + |
| 87 | +# add (un-scaled) metric values (raw) for tooltip of interactive plot |
| 88 | +qc_raw_long <- anno[row_ord, qc_cols] %>% |
| 89 | + mutate(sample = rownames(anno)[row_ord]) %>% |
| 90 | + pivot_longer(-sample, names_to = "metric", values_to = "raw") |
| 91 | + |
| 92 | +qc_long <- qc_long %>% left_join(qc_raw_long, by = c("sample", "metric")) |
| 93 | + |
| 94 | +# keep the clustered order in the plot and add hover-tooltip |
| 95 | +qc_long <- qc_long %>% |
| 96 | + mutate(sample = factor(sample, levels = rownames(qc_mat)[row_ord]), # row-order |
| 97 | + metric = factor(metric, levels = colnames(qc_mat)[col_ord]), # col-order |
| 98 | + hover = paste0("Sample: ", sample, |
| 99 | + "<br>Metric: ", metric, |
| 100 | + "<br>Value: ", signif(raw, 4), |
| 101 | + "<br>", meta_txt[as.character(sample)]) |
| 102 | + ) |
| 103 | + |
| 104 | +# plot |
| 105 | +p_qc <- ggplot(qc_long, aes(x = metric, |
| 106 | + y = sample, |
| 107 | + fill = z, |
| 108 | + text = hover)) + |
| 109 | + geom_tile() + |
| 110 | + scale_x_discrete(limits = colnames(qc_mat)[col_ord]) + # enforce col order |
| 111 | + scale_y_discrete(limits = rownames(qc_mat)[row_ord]) + # enforce row order |
| 112 | + scale_fill_gradient2(low = "blue", mid = "white", high = "red", name = "z-score", |
| 113 | + guide = guide_colourbar(barheight = 2, # thinner |
| 114 | + barwidth = 0.15)) + |
| 115 | + labs(x = NULL, y = NULL, title = "QC metrics (scaled)") + |
| 116 | + theme(axis.text.x = element_text(angle = 45, hjust = 1), |
| 117 | + panel.grid = element_blank()) |
| 118 | + |
| 119 | +#### metadata heatmap as "annotation" #### |
| 120 | +p_meta <- NULL |
| 121 | +if(length(meta_cols) > 0){ |
| 122 | + # order columns (x) exactly like the QC heatmap |
| 123 | + meta_long <- meta_long %>% mutate(sample = factor(sample, levels = rownames(qc_mat)[row_ord])) # row-order |
| 124 | + meta_levels <- unique(meta_long$meta) |
| 125 | + meta_long <- meta_long %>% mutate(meta = factor(meta, levels = meta_levels)) |
| 126 | + |
| 127 | + p_meta <- ggplot() + |
| 128 | + scale_y_discrete(limits = levels(qc_long$sample)) + |
| 129 | + scale_x_discrete(limits = meta_levels) + |
| 130 | + labs(x = NULL, y = NULL, title = "Metadata") + |
| 131 | + theme(axis.text.x = element_text(angle = 45, hjust = 1), |
| 132 | + axis.text.y = element_blank(), |
| 133 | + axis.title = element_blank(), |
| 134 | + panel.grid = element_blank()) |
| 135 | + |
| 136 | + for (v in meta_levels) { |
| 137 | + dat <- dplyr::filter(meta_long, meta == v) |
| 138 | + |
| 139 | + p_meta <- p_meta + ggnewscale::new_scale_fill() # reset “fill” for this column |
| 140 | + |
| 141 | + if (dat$type[1] == "numeric") { # continuous legend |
| 142 | + p_meta <- p_meta + |
| 143 | + geom_tile(data = dat, aes(x = meta, y = sample, fill = as.numeric(value)), colour = "grey60", linewidth = 0.1) + |
| 144 | + scale_fill_viridis_c(name = v, option = "plasma", |
| 145 | + guide = guide_colourbar(barheight = 2, # thinner |
| 146 | + barwidth = 0.15)) |
| 147 | + } else { # categorical legend |
| 148 | + pal <- setNames(dat$col, dat$value) |
| 149 | + p_meta <- p_meta + |
| 150 | + geom_tile(data = dat, aes(x = meta, y = sample, fill = value), colour = "grey60", linewidth = 0.1) + |
| 151 | + scale_fill_manual(values = pal, name = v, |
| 152 | + guide = guide_legend(keywidth = 0.25, # thinner |
| 153 | + keyheight = 0.4)) |
| 154 | + } |
| 155 | + } |
| 156 | +} |
| 157 | + |
| 158 | +#### combine and save plots #### |
| 159 | +p_combined <- if (is.null(p_meta)) p_qc else (p_qc | p_meta) + plot_layout(widths = c(length(qc_cols), length(meta_cols)), guides = "collect") & theme(legend.position = "right") |
| 160 | + |
| 161 | +# determine sizes |
| 162 | +n_rows <- nrow(qc_mat) |
| 163 | +n_cols <- length(qc_cols) + length(meta_cols) |
| 164 | +max_row_label <- max(nchar(rownames(anno))) |
| 165 | +max_col_label <- max(nchar(c(qc_cols, meta_cols))) |
| 166 | + |
| 167 | +height_in <- n_rows * 0.08 + max_col_label * 0.05 + 1 |
| 168 | +width_in <- n_cols * 0.10 + max_row_label * 0.05 + 2 |
| 169 | + |
| 170 | +# options(repr.plot.width = width_in, repr.plot.height = height_in) |
| 171 | +# p_combined |
| 172 | + |
| 173 | +ggsave(sample_annotation_plot_path, plot = p_combined, width = width_in, height = height_in, units = "in", dpi = 300) |
| 174 | + |
| 175 | +#### interactive plot #### |
| 176 | +# determine sizes in pixels |
| 177 | +width_px <- round((length(qc_cols) * 0.10 + max_row_label * 0.05 + 2) * 96) |
| 178 | +height_px <- round(height_in * 96) |
| 179 | + |
| 180 | +p_qc_interactive <- plotly::ggplotly(p_qc, tooltip = "text", width = width_px, height = height_px) |
| 181 | + |
| 182 | +# p_qc_interactive |
| 183 | + |
| 184 | +htmlwidgets::saveWidget(p_qc_interactive, sample_annotation_html_path, selfcontained = TRUE, title = "Sample annotation") |
0 commit comments