1+ library(tidyverse )
2+ library(forcats )
3+ library(HCAquery )
4+ library(dittoSeq )
5+ library(sccomp )
6+
7+ source(" https://gist.githubusercontent.com/stemangiola/fc67b08101df7d550683a5100106561c/raw/a0853a1a4e8a46baf33bad6268b09001d49faf51/ggplot_theme_multipanel" )
8+
9+ cell_metadata_with_harmonised_annotation = readRDS(" dev/cell_metadata_with_harmonised_annotation.rds" )
10+
11+ data_for_plot_1 =
12+ cell_metadata_with_harmonised_annotation | >
13+
14+ left_join(
15+ get_metadata(" dev/metadata.SQLite" ) | >
16+ select(.cell , is_primary_data.y , name , cell_type , file_id , assay ) | >
17+ as_tibble()
18+ )
19+
20+ # - Number of datasets per tissue
21+ plot_count_dataset =
22+ data_for_plot_1 | >
23+ distinct(file_id , tissue_harmonised ) | >
24+ count(tissue_harmonised , name = " Number of datasets" ) | >
25+ ggplot(aes(fct_reorder(tissue_harmonised , desc(`Number of datasets` )), `Number of datasets` )) +
26+ geom_bar(stat = " identity" ) +
27+ xlab(" Tissue" ) +
28+ ylab(" Number of datasets (log10)" ) +
29+ scale_y_log10() +
30+ theme_multipanel +
31+ theme(axis.text.x = element_text(angle = 90 , hjust = 1 , vjust = 0.5 ))
32+
33+ # - Number of samples per tissue
34+ plot_sample_dataset =
35+ data_for_plot_1 | >
36+ distinct(.sample , tissue_harmonised ) | >
37+ count(tissue_harmonised , name = " Number of samples" ) | >
38+ ggplot(aes(fct_reorder(tissue_harmonised , desc(`Number of samples` )), `Number of samples` )) +
39+ geom_bar(stat = " identity" ) +
40+ xlab(" Tissue" ) +
41+ ylab(" Number of samples (log10)" ) +
42+ scale_y_log10() +
43+ theme_multipanel +
44+ theme(axis.text.x = element_text(angle = 90 , hjust = 1 , vjust = 0.5 ))
45+
46+ # - Histogram of cells per sample
47+ plot_cell_dataset =
48+ data_for_plot_1 | >
49+ count(.sample , assay ) | >
50+ ggplot(aes(n )) +
51+ geom_histogram(aes(fill = assay ), bins = 100 ) +
52+ scale_fill_manual(values = dittoSeq :: dittoColors()) +
53+ xlab(" Number of cells in sample (log10)" ) +
54+ ylab(" Count instances" ) +
55+ scale_x_log10() +
56+ theme_multipanel
57+
58+ # - Immune proportion per tissue
59+ data_for_immune_proportion =
60+ cell_metadata_with_harmonised_annotation | >
61+
62+ left_join(
63+ get_metadata(" dev/metadata.SQLite" ) | >
64+ select(.cell , is_primary_data.y , name , cell_type , file_id ) | >
65+ as_tibble()
66+ ) | >
67+
68+ # # Filter only whole tissue
69+ # filter(
70+ # !name |> str_detect(regex('immune', ignore_case = T)) |
71+ # tissue_harmonised %in% c("blood", "lymph node", "bone") |
72+ # is_primary_data.y == "PRIMARY"
73+ # ) |>
74+
75+ # Filter Immune enriched dataset
76+ filter(file_id != " e756c34a-abe7-4822-9a35-55ef12270247" ) | >
77+ filter(file_id != " ca4a7d56-739b-4e3c-8ecd-28704914cc14" ) | >
78+ filter(file_id != " 59dfc135-19c1-4380-a9e8-958908273756" | tissue_harmonised != " intestine" ) | >
79+
80+ # nest(data = -c(.sample, tissue_harmonised)) |>
81+ # filter(map_int(data, ~ .x |> filter(cell_type_harmonised == "non_immune") |> nrow()) > 0 | tissue_harmonised %in% c("blood", "lymph node", "bone")) |>
82+ # unnest(data) |>
83+
84+ mutate(is_immune = cell_type_harmonised != " non_immune" ) | >
85+
86+ # Fix hematopoietic misclassificsation
87+ mutate(is_immune = if_else(! is_immune & cell_type | > str_detect(" hematopoietic" ), TRUE , is_immune )) | >
88+
89+ # Filter out
90+ filter(! cell_type | > str_detect(" erythrocyte" )) | >
91+ filter(! cell_type | > str_detect(" platelet" ))
92+
93+ data_for_immune_proportion_count =
94+ data_for_immune_proportion | >
95+
96+ # Stats
97+ count(.sample , tissue_harmonised , is_immune , file_id ) | >
98+ with_groups(.sample , ~ .x | > mutate(proportion = n / sum(n ), sum = sum(n ))) | >
99+ filter(is_immune ) | >
100+ with_groups(tissue_harmonised , ~ .x | > mutate( median_proportion = mean(proportion )))
101+
102+ dropLeadingZero <- function (l ){ stringr :: str_replace(l , ' 0(?=.)' , ' ' ) }
103+ S_sqrt <- function (x ){sign(x )* sqrt(abs(x ))}
104+ IS_sqrt <- function (x ){x ^ 2 * sign(x )}
105+ S_sqrt_trans <- function () scales :: trans_new(" S_sqrt" ,S_sqrt ,IS_sqrt )
106+
107+
108+ plot_immune_proportion_dataset =
109+ data_for_immune_proportion_count | >
110+ ggplot(aes(fct_reorder(tissue_harmonised , desc(median_proportion )), proportion )) +
111+ geom_point(aes(size = sum , color = file_id )) +
112+ guides(color = " none" ) +
113+ scale_size(trans = " log10" , range = c(0.1 , 2.5 ), limits = c(1000 , 10000 )) +
114+ scale_color_manual(values = dittoSeq :: dittoColors()) +
115+ scale_y_continuous(trans = S_sqrt_trans(), labels = dropLeadingZero ) +
116+ theme_multipanel +
117+ theme(axis.text.x = element_text(angle = 90 , hjust = 1 , vjust = 0.5 ))
118+
119+ # - scatter plot of abundance vs variability per tissue
120+
121+
122+ # - Confidence class per cell type
123+ # -
124+
125+ # Study annotation
126+ res =
127+ data_for_immune_proportion | >
128+ mutate(is_immune = as.character(is_immune )) | >
129+ filter(tissue_harmonised %in% c(" blood" , " intestine" )) | >
130+ sccomp_glm(
131+ formula_composition = ~ 0 + tissue_harmonised ,
132+ formula_variability = ~ 1 ,
133+ .sample , is_immune ,
134+ check_outliers = FALSE ,
135+ approximate_posterior_inference = FALSE ,
136+ # contrasts = c("typecancer - typehealthy", "typehealthy - typecancer"),
137+ cores = 20 ,
138+ mcmc_seed = 42 , verbose = T
139+ )
140+
141+
142+ res | > plot_summary()
143+
144+ cell_metadata_with_harmonised_annotation | >
145+
146+
147+
148+ mutate(is_immune = cell_type_harmonised != " non_immune" ) | >
149+
150+ # Stats
151+ count(.sample , tissue_harmonised , is_immune ) | >
152+ with_groups(.sample , ~ .x | > mutate(proportion = n / sum(n ))) | >
153+ filter(tissue_harmonised == " heart" & proportion > 0.75 )
154+
0 commit comments