Skip to content

Commit 3f230f2

Browse files
committed
Add permutation test and visualisation for heterotypic interactions
1 parent 7d18d94 commit 3f230f2

File tree

8 files changed

+312
-19
lines changed

8 files changed

+312
-19
lines changed

main.nf

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@ include {threshold_expression} from "$script_folder/workflows.nf"
1919

2020
include {analyse_homotypic_interactions} from "$script_folder/workflows.nf"
2121
include {calculate_heterotypic_distances} from "$script_folder/workflows.nf"
22+
include {permute_heterotypic_interactions} from "$script_folder/workflows.nf"
2223

2324
include {visualize_areas} from "$script_folder/workflows.nf"
2425
include {visualize_cell_types} from "$script_folder/workflows.nf"
2526
include {visualize_cell_clusters} from "$script_folder/workflows.nf"
2627
include {visualize_cell_thresholds} from "$script_folder/workflows.nf"
2728
include {visualize_homotypic_interactions} from "$script_folder/workflows.nf"
2829
include {visualize_heterotypic_interactions} from "$script_folder/workflows.nf"
29-
30+
include {visualize_permuted_interactions} from "$script_folder/workflows.nf"
3031

3132
workflow {
3233
sample_names = channel.fromPath(params.sample_metadata_file).splitCsv(header: true).map{row -> row.sample_name}
@@ -169,9 +170,20 @@ workflow {
169170
coord_selecter_map[row.cell_file2]?.get(),
170171
row.cell_type_column2,
171172
row.cell_type2)}
172-
calculate_heterotypic_distances(heterotypic_metadata)
173+
calculate_heterotypic_distances(heterotypic_metadata, params.heterotypic_interactions_metadata)
173174
}
174-
175+
176+
if(!params.skip_permuted_interactions){
177+
if(!params.skip_heterotypic_interactions){
178+
heterotypic_interactions_file = calculate_heterotypic_distances.out.collected_heterotypic_interactions
179+
}
180+
else{
181+
heterotypic_interactions_file = params.heterotypic_interactions_file
182+
}
183+
permute_heterotypic_interactions(params.permutations, heterotypic_interactions_file,
184+
params.heterotypic_interactions_metadata, params.sample_metadata_file)
185+
}
186+
175187
if(!params.skip_visualization){
176188
if(!params.skip_area_visualization){
177189
if(params.skip_area){
@@ -271,5 +283,21 @@ workflow {
271283
visualize_heterotypic_interactions(heterotypic_interactions_file,
272284
params.heterotypic_interactions_metadata, params.sample_metadata_file)
273285
}
286+
if(!params.skip_permuted_visualization){
287+
if(!params.skip_permuted_interactions){
288+
shuffled_interactions_file = permute_heterotypic_interactions.out.permuted_heterotypic_interactions
289+
}
290+
else{
291+
shuffled_interactions_file = params.shuffled_interactions_file
292+
}
293+
if(!params.skip_heterotypic_interactions){
294+
heterotypic_interactions_file = calculate_heterotypic_distances.out.collected_heterotypic_interactions
295+
}
296+
else{
297+
heterotypic_interactions_file = params.heterotypic_interactions_file
298+
}
299+
visualize_permuted_interactions(heterotypic_interactions_file, shuffled_interactions_file,
300+
params.heterotypic_interactions_metadata, params.sample_metadata_file)
301+
}
274302
}
275303
}

scripts/Heterotypic_spatial_plotting.R

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ heterotypic_metadata_file_name <- arguments[[2]]
99
sample_metadata_file_name <- arguments[[3]]
1010
output_folder <- arguments[[4]]
1111

12-
######## Cell data #######
12+
######## Distances #######
1313
distances <- fread(distance_file_name)
1414
samples <- fread(sample_metadata_file_name)
1515
metadata <- fread(heterotypic_metadata_file_name)
@@ -22,26 +22,28 @@ distances <- distances[comparison != "NA",]
2222
distances[, pair := paste0(spatial_analysis_cell_type1, "-", spatial_analysis_cell_type2)]
2323

2424
metadata[, pair := paste0(cell_type1, "-", cell_type2)]
25-
tests <- metadata$pair
25+
pairs <- metadata$pair
2626

27-
test_colors <- metadata[, color]
28-
names(test_colors) <- tests
27+
pair_colors <- metadata[, color]
28+
names(pair_colors) <- pairs
2929

30-
n_categories <- length(unique(samples[comparison != "NA", comparison]))
30+
comps <- sort(unique(samples[!is.na(comparison), comparison]))
31+
n_categories <- length(comps)
3132

32-
for (test in tests){
33-
out_dir <- paste0(output_folder, "/", test)
33+
######## Distance plots #################
34+
for (my_pair in pairs){
35+
out_dir <- paste0(output_folder, "/Distance/", my_pair)
3436
dir.create(out_dir, recursive = T, showWarnings = F)
35-
plot_data <- distances[pair == test ,]
36-
my_plot <- histogram_density(plot_data, "distance", test, "Distance", "Cells", category_col = NULL,
37-
geom = "density", color = test_colors[[test]],fill = test_colors[[test]], alpha = 0.3, log_x = T, log_y = T,
37+
plot_data <- distances[pair == my_pair,]
38+
my_plot <- histogram_density(plot_data, "distance", my_pair, "Distance", "Cells", category_col = NULL,
39+
geom = "density", color = pair_colors[[my_pair]], fill = pair_colors[[my_pair]], alpha = 0.3, log_x = T, log_y = T,
3840
x_breaks = c(1, 10, 100, 500, 1000))
39-
pdf_plotter(filename = paste0(out_dir, "/", test, "-all-heterotypic.pdf"), plot = my_plot)
41+
pdf_plotter(filename = paste0(out_dir, "/", my_pair, "-all-heterotypic.pdf"), plot = my_plot)
4042
if(n_categories >= 1){
41-
my_plot <- histogram_density(plot_data, "distance", test, "Distance", "Cells", category_col = "comparison",
42-
geom = "density", color = "black", fill = "grey", alpha = 0.3, log_x = T, log_y = T,
43-
x_breaks = c(1, 10, 100, 500, 1000))
44-
pdf_plotter(filename = paste0(out_dir, "/", test, "-by_category-heterotypic.pdf"), plot = my_plot)
43+
my_plot <- histogram_density(plot_data, "distance", my_pair, "Distance", "Cells", category_col = "comparison",
44+
geom = "density", color = "black", fill = "grey", alpha = 0.3, log_x = T, log_y = T,
45+
x_breaks = c(1, 10, 100, 500, 1000))
46+
pdf_plotter(filename = paste0(out_dir, "/", my_pair, "-by_category-heterotypic.pdf"), plot = my_plot)
4547
}
4648
}
4749

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
###### Setup ######
2+
library(data.table)
3+
library(ggplot2)
4+
source("/opt/Plot_Functions.R")
5+
6+
arguments <- commandArgs(trailingOnly = TRUE)
7+
distance_file_name <- arguments[[1]]
8+
shuffled_distances_file_name <- arguments[[2]]
9+
heterotypic_metadata_file_name <- arguments[[3]]
10+
sample_metadata_file_name <- arguments[[4]]
11+
output_folder <- arguments[[5]]
12+
13+
######## Distances #######
14+
distances <- fread(distance_file_name)
15+
samples <- fread(sample_metadata_file_name)
16+
metadata <- fread(heterotypic_metadata_file_name)
17+
18+
samples[is.na(comparison), comparison := "NA" ]
19+
suppressWarnings(distances[, color := NULL])
20+
suppressWarnings(distances[, comparison := NULL])
21+
distances <- merge(distances, samples, by.x = "Metadata_sample_name", by.y = "sample_name")
22+
distances <- distances[comparison != "NA",]
23+
distances[, pair := paste0(spatial_analysis_cell_type1, "-", spatial_analysis_cell_type2)]
24+
25+
metadata[, pair := paste0(cell_type1, "-", cell_type2)]
26+
pairs <- metadata$pair
27+
28+
pair_colors <- metadata[, color]
29+
names(pair_colors) <- pairs
30+
31+
comps <- sort(unique(samples[!is.na(comparison), comparison]))
32+
n_categories <- length(comps)
33+
######## Shuffled Distances #######
34+
shuffled <- fread(shuffled_distances_file_name)
35+
36+
suppressWarnings(shuffled[, color := NULL])
37+
suppressWarnings(shuffled[, comparison := NULL])
38+
shuffled <- merge(shuffled, samples, by.x = "Metadata_sample_name", by.y = "sample_name")
39+
shuffled <- shuffled[comparison != "NA",]
40+
shuffled[, pair := paste0(spatial_analysis_cell_type1, "-", spatial_analysis_cell_type2)]
41+
42+
################# Median Distances ########################
43+
median_distances_ALL <- unique(distances[, .(median_distance = median(distance)), by = pair])
44+
median_distances_COMPARISON <- unique(distances[, .(median_distance = median(distance)), by = c("pair", "comparison")])
45+
46+
median_shuffled_ALL <- unique(shuffled[, .(median_distance = median(distance)), by = c("pair", "permutation")])
47+
median_shuffled_COMPARISON <- unique(shuffled[, .(median_distance = median(distance)), by = c("pair", "comparison", "permutation")])
48+
49+
if (n_categories == 2){
50+
median_distances_DIFFERENCE <- dcast(median_distances_COMPARISON, pair ~ comparison, value.var = "median_distance")
51+
median_distances_DIFFERENCE <- median_distances_DIFFERENCE[, .(pair, difference = get(comps[[1]]) - get(comps[[2]]))]
52+
median_shuffled_DIFFERENCE <- dcast(median_shuffled_COMPARISON, pair + permutation ~ comparison, value.var = "median_distance")
53+
median_shuffled_DIFFERENCE <- median_shuffled_DIFFERENCE[, .(pair, difference = get(comps[[1]]) - get(comps[[2]])), by = permutation]
54+
}
55+
56+
######################### Permutation tests #########################
57+
for (my_comp in comps){
58+
for(my_pair in pairs){
59+
median_distances_COMPARISON[pair == my_pair & comparison == my_comp, pval :=
60+
mean(abs(median_shuffled_COMPARISON[pair == my_pair & comparison == my_comp, median_distance]) > abs(median_distance))]
61+
}
62+
}
63+
median_distances_COMPARISON[, fdr := p.adjust(pval, "BH"), by = comparison]
64+
65+
for(my_pair in pairs){
66+
median_distances_ALL[pair == my_pair,
67+
pval := mean(abs(median_shuffled_ALL[pair == my_pair, median_distance]) > abs(median_distance))]
68+
}
69+
median_distances_ALL[, fdr := p.adjust(pval, "BH")]
70+
71+
if (length(comps) == 2){
72+
for(my_pair in pairs){
73+
median_distances_DIFFERENCE[pair == my_pair,
74+
pval := mean(abs(median_shuffled_DIFFERENCE[pair == my_pair, difference]) > abs(difference))]
75+
}
76+
median_distances_DIFFERENCE[, fdr := p.adjust(pval, "BH")]
77+
}
78+
79+
fwrite(x = median_distances_ALL, file = paste0(output_folder,"/median_distances_ALL.csv"))
80+
fwrite(x = median_shuffled_ALL, file = paste0(output_folder,"/median_shuffled_ALL.csv"))
81+
fwrite(x = median_distances_COMPARISON, file = paste0(output_folder,"/median_distances_COMPARISON.csv"))
82+
fwrite(x = median_shuffled_COMPARISON, file = paste0(output_folder,"/median_shuffled_COMPARISON.csv"))
83+
if (n_categories == 2){
84+
fwrite(x = median_distances_DIFFERENCE, file = paste0(output_folder,"/median_distances_DIFFERENCE.csv"))
85+
fwrite(x = median_shuffled_DIFFERENCE, file = paste0(output_folder,"/median_shuffled_DIFFERENCE.csv"))
86+
}
87+
88+
################### Permutation Plots ########################
89+
for (my_pair in pairs){
90+
out_dir <- paste0(output_folder, "/Permutations/", my_pair)
91+
dir.create(out_dir, recursive = T, showWarnings = F)
92+
observed_plot_data <- median_distances_ALL[pair == my_pair, .(pair, median_distance, fdr)]
93+
random_plot_data <- median_shuffled_ALL[pair == my_pair, .(pair, permutation, median_distance)]
94+
my_plot <- permutation_density(random_plot_data, observed_plot_data, "median_distance", my_pair,
95+
"Median Distance", "Permutations", pair_colors[[my_pair]])
96+
pdf_plotter(filename = paste0(out_dir, "/", my_pair, "-all-heterotypic_permutations.pdf"), plot = my_plot)
97+
if(n_categories >= 1){
98+
my_plots <- lapply(comps, function(my_comp){
99+
observed_plot_data <- median_distances_COMPARISON[comparison == my_comp & pair == my_pair,
100+
.(comparison, pair, median_distance, fdr)]
101+
random_plot_data <- median_shuffled_COMPARISON[comparison == my_comp & pair == my_pair,
102+
.(comparison, pair, permutation, median_distance)]
103+
permutation_density(random_plot_data, observed_plot_data, "median_distance", paste0(my_comp, " ", my_pair),
104+
"Median Distance", "Permutations", pair_colors[[my_pair]])
105+
})
106+
multi_pdf_plotter(filename = paste0(out_dir, "/", my_pair, "-category-heterotypic_permutations.pdf"),
107+
plots = my_plots, n_row = 2, n_col = 1)
108+
if(n_categories == 2){
109+
observed_plot_data <- median_distances_DIFFERENCE[pair == my_pair, .(pair, difference, fdr)]
110+
random_plot_data <- median_shuffled_DIFFERENCE[pair == my_pair, .(pair, permutation, difference)]
111+
my_plot <- permutation_density(random_plot_data, observed_plot_data, "difference", my_pair,
112+
"Median Distance Difference", "Permutations", pair_colors[[my_pair]])
113+
pdf_plotter(filename = paste0(out_dir, "/", my_pair, "-difference-heterotypic_permutations.pdf"), plot = my_plot)
114+
}
115+
}
116+
}

scripts/Plot_Functions.R

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,28 @@ histogram_density <- function(data, marker_col, plot_title, x_label, y_label,
295295
return(plt)
296296
}
297297

298+
################# Permutation plot ###############################################
299+
permutation_density <- function(random_data, observed_data, x_col, plot_title, x_label, y_label, color)
300+
{
301+
random_plot_data <- copy(random_data)
302+
observed_plot_data <- copy(observed_data)
303+
setnames(random_plot_data, x_col, "x_col")
304+
setnames(observed_plot_data, x_col, "x_col")
305+
my_breaks <- round(seq(min(c(random_plot_data$x_col, observed_plot_data$x_col)),
306+
max(c(random_plot_data$x_col, observed_plot_data$x_col))))
307+
plt <- ggplot(random_plot_data) +
308+
geom_line(aes(x = x_col, y = ..count..), stat = 'bin', bins = 50, colour = color) +
309+
geom_histogram(aes(x = x_col, y = ..count..), alpha = 0.2, bins = 50, fill = color) +
310+
geom_vline(aes(xintercept = median(x_col)), colour = color) +
311+
geom_vline(data = observed_plot_data, aes(xintercept = median(x_col)), color = "black", linetype = "dashed") +
312+
scale_x_continuous(name = x_label, breaks = my_breaks) +
313+
scale_y_continuous(name = y_label) +
314+
theme_classic() + theme(plot.margin = margin(t = 0, r = 0, b = 0, l = 0,
315+
unit = "pt"), axis.text = element_text(size = 8), axis.title = element_text(size = 8),
316+
legend.text = element_text(size = 8), legend.title = element_blank()) +
317+
labs(title = plot_title, subtitle = paste0("FDR = ", round_format_n(observed_plot_data$fdr)))
318+
}
319+
298320
################## PDF Output ##################
299321
single_pdf_w <- 8.27 / 2 # About 1/6th of an A4 (inches)
300322
single_pdf_h <- 11.69 / 3

scripts/permute_distances.R

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
###### Setup ######
2+
library(data.table)
3+
library(ggplot2)
4+
5+
arguments <- commandArgs(trailingOnly = TRUE)
6+
permutations <- as.integer(arguments[[1]])
7+
distance_file_name <- arguments[[2]]
8+
heterotypic_metadata_file_name <- arguments[[3]]
9+
sample_metadata_file_name <- arguments[[4]]
10+
output_folder <- arguments[[5]]
11+
12+
######## Cell data #######
13+
distances <- fread(distance_file_name)
14+
samples <- fread(sample_metadata_file_name)
15+
metadata <- fread(heterotypic_metadata_file_name)
16+
17+
samples[is.na(comparison), comparison := "NA" ]
18+
suppressWarnings(distances[, color := NULL])
19+
suppressWarnings(distances[, comparison := NULL])
20+
distances <- merge(distances, samples, by.x = "Metadata_sample_name", by.y = "sample_name")
21+
distances[, pair := paste0(spatial_analysis_cell_type1, "-", spatial_analysis_cell_type2)]
22+
distances <- distances[comparison != "NA",]
23+
metadata[, pair := paste0(cell_type1, "-", cell_type2)]
24+
tests <- metadata$pair
25+
26+
################ Shuffled Distances ###########################
27+
shuffled <- copy(distances)
28+
shuffled[, color := NULL]
29+
shuffled[, comparison := NULL]
30+
31+
shuffled_all <- lapply(1:permutations, function(n){
32+
my_shuffle <- copy(shuffled)
33+
my_shuffle[, spatial_analysis_cell_type1 := sample(spatial_analysis_cell_type1), by = Metadata_sample_name]
34+
my_shuffle[, spatial_analysis_cell_type2 := sample(spatial_analysis_cell_type2), by = Metadata_sample_name]
35+
return(my_shuffle)
36+
})
37+
shuffled_all <- rbindlist(shuffled_all, idcol = "permutation")
38+
39+
dir.create(output_folder, recursive = T, showWarnings = F)
40+
fwrite(file = paste0(output_folder, "/permuted_distances.csv"), x = shuffled_all)
41+

scripts/processes.nf

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,32 @@ process collect_heterotypic_distances {
762762
"""
763763
}
764764

765+
process permute_heterotypic_distances {
766+
767+
label 'big_memory'
768+
publishDir "$params.output_folder/Heterotypic_interactions", mode:'copy', overwrite: true
769+
container = 'library://michelebortol/default/simpli_r_bioconductor:cleaned'
770+
containerOptions = "--bind $script_folder:/opt,$workflow.launchDir/:/data"
771+
772+
input:
773+
val(permutations)
774+
path(distance_file_name)
775+
path(metadata_file_name)
776+
path(sample_file_name)
777+
778+
output:
779+
path("permuted_distances.csv", emit: permuted_heterotypic_interactions)
780+
script:
781+
"""
782+
Rscript --vanilla /opt/permute_distances.R \\
783+
$permutations \\
784+
$distance_file_name \\
785+
$metadata_file_name \\
786+
$sample_file_name \\
787+
./ > permute_heterotypic_distances_log.txt 2>&1
788+
"""
789+
}
790+
765791
process heterotypic_interaction_visualization {
766792

767793
label 'big_memory'
@@ -785,3 +811,29 @@ process heterotypic_interaction_visualization {
785811
./ > heterotypic_plotting_log.txt 2>&1
786812
"""
787813
}
814+
815+
process permuted_interaction_visualization {
816+
817+
label 'big_memory'
818+
publishDir "$params.output_folder/Plots/Heterotypic_Interaction_Plots", mode:'copy', overwrite: true
819+
container = 'library://michelebortol/default/simpli_r_bioconductor:cleaned'
820+
containerOptions = "--bind $script_folder:/opt,$workflow.launchDir/:/data"
821+
822+
input:
823+
path(distance_file_name)
824+
path(shuffled_distance_file_name)
825+
path(metadata_file_name)
826+
path(sample_file_name)
827+
828+
output:
829+
path("**/*.pdf", emit: permuted_interaction_plots)
830+
script:
831+
"""
832+
Rscript --vanilla /opt/Permuted_spatial_plotting.R \\
833+
$distance_file_name \\
834+
$shuffled_distance_file_name \\
835+
$metadata_file_name \\
836+
$sample_file_name \\
837+
./ > permuted_plotting_log.txt 2>&1
838+
"""
839+
}

0 commit comments

Comments
 (0)