Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit 7fd65c9

Browse files
committed
update harmonise annotation
1 parent 73488a1 commit 7fd65c9

File tree

1 file changed

+96
-50
lines changed

1 file changed

+96
-50
lines changed

dev/annotation_harmonise.R

Lines changed: 96 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -476,77 +476,123 @@ curated_annotation =
476476
) |>
477477
select(.cell, cell_type, cell_type_harmonised, confidence_class, cell_annotation_azimuth_l2 = predicted.celltype.l2, cell_annotation_blueprint_singler = blueprint_singler)
478478

479-
curated_annotation |>
479+
480+
481+
# Reannotation of generic cell types
482+
reannotate_cd4 <-
483+
readRDS("dev/reannotate_cd4.rds")$scores |>
484+
as_tibble(rownames = ".cell") |>
485+
select("Central memory CD8 T cells", "Effector memory CD8 T cells" , "Naive CD8 T cells" ) |>
486+
mutate(.cell = rownames(readRDS("dev/reannotate_cd4.rds"))) |>
487+
pivot_longer(
488+
c(`Central memory CD8 T cells`, `Effector memory CD8 T cells` , `Naive CD8 T cells` ),
489+
names_to = "cell_type_Monaco", values_to = "score"
490+
) |>
491+
mutate(cell_type_Monaco = cell_type_Monaco |> str_replace_all("CD8", "CD4")) |>
492+
with_groups(.cell, ~ .x |> arrange(desc(score)) |> slice(1)) |>
493+
mutate(cell_type_Monaco = case_when(
494+
cell_type_Monaco == "Effector memory CD4 T cells" ~ "cd4 tem",
495+
cell_type_Monaco == "Central memory CD4 T cells" ~ "cd4 tcm",
496+
cell_type_Monaco == "Naive CD4 T cells" ~ "cd4 naive"
497+
)) |>
498+
mutate(cell_type_harmonised = "cd4 t")
499+
500+
reannotate_cd8 <-
501+
readRDS("dev/reannotate_cd8.rds")$scores |>
502+
as_tibble(rownames = ".cell") |>
503+
select("Central memory CD8 T cells", "Effector memory CD8 T cells" , "Naive CD8 T cells" ) |>
504+
mutate(.cell = rownames(readRDS("dev/reannotate_cd8.rds"))) |>
505+
pivot_longer(
506+
c(`Central memory CD8 T cells`, `Effector memory CD8 T cells` , `Naive CD8 T cells` ),
507+
names_to = "cell_type_Monaco", values_to = "score"
508+
) |>
509+
with_groups(.cell, ~ .x |> arrange(desc(score)) |> slice(1)) |>
510+
mutate(cell_type_Monaco = case_when(
511+
cell_type_Monaco == "Effector memory CD8 T cells" ~ "cd8 tem",
512+
cell_type_Monaco == "Central memory CD8 T cells" ~ "cd8 tcm",
513+
cell_type_Monaco == "Naive CD8 T cells" ~ "cd8 naive"
514+
)) |>
515+
mutate(cell_type_harmonised = "cd8 t")
516+
517+
reannotate_monocytes <-
518+
readRDS("dev/reannotate_monocytes.rds")$scores |>
519+
as_tibble(rownames = ".cell") |>
520+
select("Non classical monocytes", "Classical monocytes" ) |>
521+
mutate(.cell = rownames(readRDS("dev/reannotate_monocytes.rds"))) |>
522+
pivot_longer(
523+
c(`Non classical monocytes`, `Classical monocytes` ),
524+
names_to = "cell_type_Monaco", values_to = "score"
525+
) |>
526+
with_groups(.cell, ~ .x |> arrange(desc(score)) |> slice(1)) |>
527+
mutate(cell_type_Monaco = case_when(
528+
cell_type_Monaco == "Non classical monocytes" ~ "cd16 mono",
529+
cell_type_Monaco == "Classical monocytes" ~ "cd14 mono"
530+
)) |>
531+
mutate(cell_type_harmonised = "monocytes")
532+
533+
library(glue)
534+
535+
536+
curated_annotation =
537+
538+
# Fix cell ID
539+
get_metadata() |>
540+
select(.cell, .sample) |>
541+
as_tibble() |>
542+
mutate(.cell_combined = glue("{.cell}_{.sample}")) |>
543+
544+
# Add cell type
545+
inner_join(
546+
curated_annotation,
547+
by=c(".cell_combined" = ".cell")
548+
) |>
549+
left_join(
550+
reannotate_cd4 |>
551+
bind_rows(reannotate_cd8) |>
552+
bind_rows(reannotate_monocytes)
553+
) |>
554+
mutate(cell_type_harmonised = if_else(
555+
!is.na(cell_type_Monaco),
556+
cell_type_Monaco,
557+
cell_type_harmonised
558+
)) |>
559+
select(-.cell_combined) |>
560+
select(-cell_type_Monaco, -score)
561+
562+
job::job({
563+
curated_annotation |>
480564
saveRDS("dev/curated_annotation.rds")
565+
})
566+
481567

482-
cell_metadata_with_harmonised_annotation = curated_annotation |>
568+
cell_metadata_with_harmonised_annotation =
569+
curated_annotation |>
483570
left_join(
484-
metadata |>
571+
get_metadata() |>
485572
select(.cell, .sample, file_id, file_id_db, tissue) |>
486573
as_tibble()
487574
)
488-
489575
# xx = x |>
490576
# filter(cell_type_harmonised == "monocytes") |>
491577
# get_SingleCellExperiment()
492578

493-
data |>
494-
logNormCounts(assay.type = "X") |>
495-
SingleR(ref = MonacoImmuneData,
496-
labels = MonacoImmuneData$label.fine)
497579

498580

499581
annotated_samples =
500-
x |> filter(!is.na(cell_type_harmonised)) |> distinct( cell_type, .sample, file_id)
582+
cell_metadata_with_harmonised_annotation |> filter(!is.na(cell_type_harmonised)) |> distinct( cell_type, .sample, file_id)
501583

502584
# Cell types that most need attention
503-
x |> anti_join(annotated_samples) |> select(contains("cell_")) |> count(cell_type , cell_type_harmonised ,cell_annotation_azimuth_l2 ,cell_annotation_blueprint_singler) |> arrange(desc(n)) |> print(n=99)
585+
cell_metadata_with_harmonised_annotation |> anti_join(annotated_samples) |> select(contains("cell_")) |> count(cell_type , cell_type_harmonised ,cell_annotation_azimuth_l2 ,cell_annotation_blueprint_singler) |> arrange(desc(n)) |> print(n=99)
504586

505587
# How many samples miss annotation
506-
x |> anti_join(annotated_samples) |> distinct(cell_type, cell_type_harmonised, .sample) |> distinct( cell_type, .sample) |> count(cell_type) |> arrange(desc(n))
588+
cell_metadata_with_harmonised_annotation |> anti_join(annotated_samples) |> distinct(cell_type, cell_type_harmonised, .sample) |> distinct( cell_type, .sample) |> count(cell_type) |> arrange(desc(n))
507589

508590
# Histo of annotation
509-
x |> filter(!is.na(cell_type_harmonised)) |> distinct( cell_type_harmonised, .sample) |> count(.sample) |> pull(n) |> hist(breaks=30)
591+
cell_metadata_with_harmonised_annotation |> filter(!is.na(cell_type_harmonised)) |> distinct( cell_type_harmonised, .sample) |> count(.sample) |> pull(n) |> hist(breaks=30)
510592

511593
# Tissue with no immune
512-
x |> filter(!is.na(cell_type_harmonised)) |> distinct( cell_type_harmonised, tissue_harmonised) |> count(cell_type_harmonised) |> arrange(n) |> print(n=99)
513-
514-
x |> filter(!is.na(cell_type_harmonised)) |> distinct( cell_type_harmonised, tissue_harmonised) |> count(tissue_harmonised) |> arrange(n) |> print(n=99)
594+
cell_metadata_with_harmonised_annotation |> filter(!is.na(cell_type_harmonised)) |> distinct( cell_type_harmonised, tissue_harmonised) |> count(cell_type_harmonised) |> arrange(n) |> print(n=99)
515595

596+
cell_metadata_with_harmonised_annotation |> filter(!is.na(cell_type_harmonised)) |> distinct( cell_type_harmonised, tissue_harmonised) |> count(tissue_harmonised) |> arrange(n) |> print(n=99)
516597

517-
metadata_df |>
518-
distinct(tissue) |>
519-
as_tibble()
520598

521-
522-
x = metadata |>
523-
filter(.cell %in% (
524-
curated_annotation |>
525-
filter(cell_type_harmonised=="monocytes") |>
526-
pull(.cell)
527-
)) |>
528-
unite("file_id_db", c(file_id, cell_type), remove = FALSE) |>
529-
mutate(file_id_db = file_id_db |> md5() |> as.character()) |>
530-
531-
get_SingleCellExperiment("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data")
532-
533-
534-
library(scuttle)
535-
library(celldex)
536-
library(SingleR)
537-
library(BiocParallel)
538-
monocyte_reference =
539-
MonacoImmuneData() |>
540-
541-
542-
543-
annotation <-
544-
x |>
545-
slice(1:10000) |>
546-
logNormCounts(assay.type = "X") |>
547-
SingleR(
548-
ref = MonacoImmuneData,
549-
assay.type.test=1,
550-
labels = MonacoImmuneData$label.fine,
551-
BPPARAM=SnowParam(workers=10)
552-
)

0 commit comments

Comments
 (0)