|
| 1 | +###### Setup ###### |
| 2 | +library(data.table) |
| 3 | +library(ggplot2) |
| 4 | +source("/opt/Plot_Functions.R") |
| 5 | + |
| 6 | +arguments <- commandArgs(trailingOnly = TRUE) |
| 7 | +distance_file_name <- arguments[[1]] |
| 8 | +shuffled_distances_file_name <- arguments[[2]] |
| 9 | +heterotypic_metadata_file_name <- arguments[[3]] |
| 10 | +sample_metadata_file_name <- arguments[[4]] |
| 11 | +output_folder <- arguments[[5]] |
| 12 | + |
| 13 | +######## Distances ####### |
| 14 | +distances <- fread(distance_file_name) |
| 15 | +samples <- fread(sample_metadata_file_name) |
| 16 | +metadata <- fread(heterotypic_metadata_file_name) |
| 17 | + |
| 18 | +samples[is.na(comparison), comparison := "NA" ] |
| 19 | +suppressWarnings(distances[, color := NULL]) |
| 20 | +suppressWarnings(distances[, comparison := NULL]) |
| 21 | +distances <- merge(distances, samples, by.x = "Metadata_sample_name", by.y = "sample_name") |
| 22 | +distances <- distances[comparison != "NA",] |
| 23 | +distances[, pair := paste0(spatial_analysis_cell_type1, "-", spatial_analysis_cell_type2)] |
| 24 | + |
| 25 | +metadata[, pair := paste0(cell_type1, "-", cell_type2)] |
| 26 | +pairs <- metadata$pair |
| 27 | + |
| 28 | +pair_colors <- metadata[, color] |
| 29 | +names(pair_colors) <- pairs |
| 30 | + |
| 31 | +comps <- sort(unique(samples[!is.na(comparison), comparison])) |
| 32 | +n_categories <- length(comps) |
| 33 | +######## Shuffled Distances ####### |
| 34 | +shuffled <- fread(shuffled_distances_file_name) |
| 35 | + |
| 36 | +suppressWarnings(shuffled[, color := NULL]) |
| 37 | +suppressWarnings(shuffled[, comparison := NULL]) |
| 38 | +shuffled <- merge(shuffled, samples, by.x = "Metadata_sample_name", by.y = "sample_name") |
| 39 | +shuffled <- shuffled[comparison != "NA",] |
| 40 | +shuffled[, pair := paste0(spatial_analysis_cell_type1, "-", spatial_analysis_cell_type2)] |
| 41 | + |
| 42 | +################# Median Distances ######################## |
| 43 | +median_distances_ALL <- unique(distances[, .(median_distance = median(distance)), by = pair]) |
| 44 | +median_distances_COMPARISON <- unique(distances[, .(median_distance = median(distance)), by = c("pair", "comparison")]) |
| 45 | + |
| 46 | +median_shuffled_ALL <- unique(shuffled[, .(median_distance = median(distance)), by = c("pair", "permutation")]) |
| 47 | +median_shuffled_COMPARISON <- unique(shuffled[, .(median_distance = median(distance)), by = c("pair", "comparison", "permutation")]) |
| 48 | + |
| 49 | +if (n_categories == 2){ |
| 50 | + median_distances_DIFFERENCE <- dcast(median_distances_COMPARISON, pair ~ comparison, value.var = "median_distance") |
| 51 | + median_distances_DIFFERENCE <- median_distances_DIFFERENCE[, .(pair, difference = get(comps[[1]]) - get(comps[[2]]))] |
| 52 | + median_shuffled_DIFFERENCE <- dcast(median_shuffled_COMPARISON, pair + permutation ~ comparison, value.var = "median_distance") |
| 53 | + median_shuffled_DIFFERENCE <- median_shuffled_DIFFERENCE[, .(pair, difference = get(comps[[1]]) - get(comps[[2]])), by = permutation] |
| 54 | +} |
| 55 | + |
| 56 | +######################### Permutation tests ######################### |
| 57 | +for (my_comp in comps){ |
| 58 | + for(my_pair in pairs){ |
| 59 | + median_distances_COMPARISON[pair == my_pair & comparison == my_comp, pval := |
| 60 | + mean(abs(median_shuffled_COMPARISON[pair == my_pair & comparison == my_comp, median_distance]) > abs(median_distance))] |
| 61 | + } |
| 62 | +} |
| 63 | +median_distances_COMPARISON[, fdr := p.adjust(pval, "BH"), by = comparison] |
| 64 | + |
| 65 | +for(my_pair in pairs){ |
| 66 | + median_distances_ALL[pair == my_pair, |
| 67 | + pval := mean(abs(median_shuffled_ALL[pair == my_pair, median_distance]) > abs(median_distance))] |
| 68 | +} |
| 69 | +median_distances_ALL[, fdr := p.adjust(pval, "BH")] |
| 70 | + |
| 71 | +if (length(comps) == 2){ |
| 72 | + for(my_pair in pairs){ |
| 73 | + median_distances_DIFFERENCE[pair == my_pair, |
| 74 | + pval := mean(abs(median_shuffled_DIFFERENCE[pair == my_pair, difference]) > abs(difference))] |
| 75 | + } |
| 76 | + median_distances_DIFFERENCE[, fdr := p.adjust(pval, "BH")] |
| 77 | +} |
| 78 | + |
| 79 | +fwrite(x = median_distances_ALL, file = paste0(output_folder,"/median_distances_ALL.csv")) |
| 80 | +fwrite(x = median_shuffled_ALL, file = paste0(output_folder,"/median_shuffled_ALL.csv")) |
| 81 | +fwrite(x = median_distances_COMPARISON, file = paste0(output_folder,"/median_distances_COMPARISON.csv")) |
| 82 | +fwrite(x = median_shuffled_COMPARISON, file = paste0(output_folder,"/median_shuffled_COMPARISON.csv")) |
| 83 | +if (n_categories == 2){ |
| 84 | + fwrite(x = median_distances_DIFFERENCE, file = paste0(output_folder,"/median_distances_DIFFERENCE.csv")) |
| 85 | + fwrite(x = median_shuffled_DIFFERENCE, file = paste0(output_folder,"/median_shuffled_DIFFERENCE.csv")) |
| 86 | +} |
| 87 | + |
| 88 | +################### Permutation Plots ######################## |
| 89 | +for (my_pair in pairs){ |
| 90 | + out_dir <- paste0(output_folder, "/Permutations/", my_pair) |
| 91 | + dir.create(out_dir, recursive = T, showWarnings = F) |
| 92 | + observed_plot_data <- median_distances_ALL[pair == my_pair, .(pair, median_distance, fdr)] |
| 93 | + random_plot_data <- median_shuffled_ALL[pair == my_pair, .(pair, permutation, median_distance)] |
| 94 | + my_plot <- permutation_density(random_plot_data, observed_plot_data, "median_distance", my_pair, |
| 95 | + "Median Distance", "Permutations", pair_colors[[my_pair]]) |
| 96 | + pdf_plotter(filename = paste0(out_dir, "/", my_pair, "-all-heterotypic_permutations.pdf"), plot = my_plot) |
| 97 | + if(n_categories >= 1){ |
| 98 | + my_plots <- lapply(comps, function(my_comp){ |
| 99 | + observed_plot_data <- median_distances_COMPARISON[comparison == my_comp & pair == my_pair, |
| 100 | + .(comparison, pair, median_distance, fdr)] |
| 101 | + random_plot_data <- median_shuffled_COMPARISON[comparison == my_comp & pair == my_pair, |
| 102 | + .(comparison, pair, permutation, median_distance)] |
| 103 | + permutation_density(random_plot_data, observed_plot_data, "median_distance", paste0(my_comp, " ", my_pair), |
| 104 | + "Median Distance", "Permutations", pair_colors[[my_pair]]) |
| 105 | + }) |
| 106 | + multi_pdf_plotter(filename = paste0(out_dir, "/", my_pair, "-category-heterotypic_permutations.pdf"), |
| 107 | + plots = my_plots, n_row = 2, n_col = 1) |
| 108 | + if(n_categories == 2){ |
| 109 | + observed_plot_data <- median_distances_DIFFERENCE[pair == my_pair, .(pair, difference, fdr)] |
| 110 | + random_plot_data <- median_shuffled_DIFFERENCE[pair == my_pair, .(pair, permutation, difference)] |
| 111 | + my_plot <- permutation_density(random_plot_data, observed_plot_data, "difference", my_pair, |
| 112 | + "Median Distance Difference", "Permutations", pair_colors[[my_pair]]) |
| 113 | + pdf_plotter(filename = paste0(out_dir, "/", my_pair, "-difference-heterotypic_permutations.pdf"), plot = my_plot) |
| 114 | + } |
| 115 | + } |
| 116 | +} |
0 commit comments