@@ -476,77 +476,123 @@ curated_annotation =
476476 ) | >
477477 select(.cell , cell_type , cell_type_harmonised , confidence_class , cell_annotation_azimuth_l2 = predicted.celltype.l2 , cell_annotation_blueprint_singler = blueprint_singler )
478478
479- curated_annotation | >
479+
480+
481+ # Reannotation of generic cell types
482+ reannotate_cd4 <-
483+ readRDS(" dev/reannotate_cd4.rds" )$ scores | >
484+ as_tibble(rownames = " .cell" ) | >
485+ select(" Central memory CD8 T cells" , " Effector memory CD8 T cells" , " Naive CD8 T cells" ) | >
486+ mutate(.cell = rownames(readRDS(" dev/reannotate_cd4.rds" ))) | >
487+ pivot_longer(
488+ c(`Central memory CD8 T cells` , `Effector memory CD8 T cells` , `Naive CD8 T cells` ),
489+ names_to = " cell_type_Monaco" , values_to = " score"
490+ ) | >
491+ mutate(cell_type_Monaco = cell_type_Monaco | > str_replace_all(" CD8" , " CD4" )) | >
492+ with_groups(.cell , ~ .x | > arrange(desc(score )) | > slice(1 )) | >
493+ mutate(cell_type_Monaco = case_when(
494+ cell_type_Monaco == " Effector memory CD4 T cells" ~ " cd4 tem" ,
495+ cell_type_Monaco == " Central memory CD4 T cells" ~ " cd4 tcm" ,
496+ cell_type_Monaco == " Naive CD4 T cells" ~ " cd4 naive"
497+ )) | >
498+ mutate(cell_type_harmonised = " cd4 t" )
499+
500+ reannotate_cd8 <-
501+ readRDS(" dev/reannotate_cd8.rds" )$ scores | >
502+ as_tibble(rownames = " .cell" ) | >
503+ select(" Central memory CD8 T cells" , " Effector memory CD8 T cells" , " Naive CD8 T cells" ) | >
504+ mutate(.cell = rownames(readRDS(" dev/reannotate_cd8.rds" ))) | >
505+ pivot_longer(
506+ c(`Central memory CD8 T cells` , `Effector memory CD8 T cells` , `Naive CD8 T cells` ),
507+ names_to = " cell_type_Monaco" , values_to = " score"
508+ ) | >
509+ with_groups(.cell , ~ .x | > arrange(desc(score )) | > slice(1 )) | >
510+ mutate(cell_type_Monaco = case_when(
511+ cell_type_Monaco == " Effector memory CD8 T cells" ~ " cd8 tem" ,
512+ cell_type_Monaco == " Central memory CD8 T cells" ~ " cd8 tcm" ,
513+ cell_type_Monaco == " Naive CD8 T cells" ~ " cd8 naive"
514+ )) | >
515+ mutate(cell_type_harmonised = " cd8 t" )
516+
517+ reannotate_monocytes <-
518+ readRDS(" dev/reannotate_monocytes.rds" )$ scores | >
519+ as_tibble(rownames = " .cell" ) | >
520+ select(" Non classical monocytes" , " Classical monocytes" ) | >
521+ mutate(.cell = rownames(readRDS(" dev/reannotate_monocytes.rds" ))) | >
522+ pivot_longer(
523+ c(`Non classical monocytes` , `Classical monocytes` ),
524+ names_to = " cell_type_Monaco" , values_to = " score"
525+ ) | >
526+ with_groups(.cell , ~ .x | > arrange(desc(score )) | > slice(1 )) | >
527+ mutate(cell_type_Monaco = case_when(
528+ cell_type_Monaco == " Non classical monocytes" ~ " cd16 mono" ,
529+ cell_type_Monaco == " Classical monocytes" ~ " cd14 mono"
530+ )) | >
531+ mutate(cell_type_harmonised = " monocytes" )
532+
533+ library(glue )
534+
535+
536+ curated_annotation =
537+
538+ # Fix cell ID
539+ get_metadata() | >
540+ select(.cell , .sample ) | >
541+ as_tibble() | >
542+ mutate(.cell_combined = glue(" {.cell}_{.sample}" )) | >
543+
544+ # Add cell type
545+ inner_join(
546+ curated_annotation ,
547+ by = c(" .cell_combined" = " .cell" )
548+ ) | >
549+ left_join(
550+ reannotate_cd4 | >
551+ bind_rows(reannotate_cd8 ) | >
552+ bind_rows(reannotate_monocytes )
553+ ) | >
554+ mutate(cell_type_harmonised = if_else(
555+ ! is.na(cell_type_Monaco ),
556+ cell_type_Monaco ,
557+ cell_type_harmonised
558+ )) | >
559+ select(- .cell_combined ) | >
560+ select(- cell_type_Monaco , - score )
561+
562+ job :: job({
563+ curated_annotation | >
480564 saveRDS(" dev/curated_annotation.rds" )
565+ })
566+
481567
482- cell_metadata_with_harmonised_annotation = curated_annotation | >
568+ cell_metadata_with_harmonised_annotation =
569+ curated_annotation | >
483570 left_join(
484- metadata | >
571+ get_metadata() | >
485572 select(.cell , .sample , file_id , file_id_db , tissue ) | >
486573 as_tibble()
487574 )
488-
489575# xx = x |>
490576# filter(cell_type_harmonised == "monocytes") |>
491577# get_SingleCellExperiment()
492578
493- data | >
494- logNormCounts(assay.type = " X" ) | >
495- SingleR(ref = MonacoImmuneData ,
496- labels = MonacoImmuneData $ label.fine )
497579
498580
499581annotated_samples =
500- x | > filter(! is.na(cell_type_harmonised )) | > distinct( cell_type , .sample , file_id )
582+ cell_metadata_with_harmonised_annotation | > filter(! is.na(cell_type_harmonised )) | > distinct( cell_type , .sample , file_id )
501583
502584# Cell types that most need attention
503- x | > anti_join(annotated_samples ) | > select(contains(" cell_" )) | > count(cell_type , cell_type_harmonised ,cell_annotation_azimuth_l2 ,cell_annotation_blueprint_singler ) | > arrange(desc(n )) | > print(n = 99 )
585+ cell_metadata_with_harmonised_annotation | > anti_join(annotated_samples ) | > select(contains(" cell_" )) | > count(cell_type , cell_type_harmonised ,cell_annotation_azimuth_l2 ,cell_annotation_blueprint_singler ) | > arrange(desc(n )) | > print(n = 99 )
504586
505587# How many samples miss annotation
506- x | > anti_join(annotated_samples ) | > distinct(cell_type , cell_type_harmonised , .sample ) | > distinct( cell_type , .sample ) | > count(cell_type ) | > arrange(desc(n ))
588+ cell_metadata_with_harmonised_annotation | > anti_join(annotated_samples ) | > distinct(cell_type , cell_type_harmonised , .sample ) | > distinct( cell_type , .sample ) | > count(cell_type ) | > arrange(desc(n ))
507589
508590# Histo of annotation
509- x | > filter(! is.na(cell_type_harmonised )) | > distinct( cell_type_harmonised , .sample ) | > count(.sample ) | > pull(n ) | > hist(breaks = 30 )
591+ cell_metadata_with_harmonised_annotation | > filter(! is.na(cell_type_harmonised )) | > distinct( cell_type_harmonised , .sample ) | > count(.sample ) | > pull(n ) | > hist(breaks = 30 )
510592
511593# Tissue with no immune
512- x | > filter(! is.na(cell_type_harmonised )) | > distinct( cell_type_harmonised , tissue_harmonised ) | > count(cell_type_harmonised ) | > arrange(n ) | > print(n = 99 )
513-
514- x | > filter(! is.na(cell_type_harmonised )) | > distinct( cell_type_harmonised , tissue_harmonised ) | > count(tissue_harmonised ) | > arrange(n ) | > print(n = 99 )
594+ cell_metadata_with_harmonised_annotation | > filter(! is.na(cell_type_harmonised )) | > distinct( cell_type_harmonised , tissue_harmonised ) | > count(cell_type_harmonised ) | > arrange(n ) | > print(n = 99 )
515595
596+ cell_metadata_with_harmonised_annotation | > filter(! is.na(cell_type_harmonised )) | > distinct( cell_type_harmonised , tissue_harmonised ) | > count(tissue_harmonised ) | > arrange(n ) | > print(n = 99 )
516597
517- metadata_df | >
518- distinct(tissue ) | >
519- as_tibble()
520598
521-
522- x = metadata | >
523- filter(.cell %in% (
524- curated_annotation | >
525- filter(cell_type_harmonised == " monocytes" ) | >
526- pull(.cell )
527- )) | >
528- unite(" file_id_db" , c(file_id , cell_type ), remove = FALSE ) | >
529- mutate(file_id_db = file_id_db | > md5() | > as.character()) | >
530-
531- get_SingleCellExperiment(" /vast/projects/RCP/human_cell_atlas/splitted_DB2_data" )
532-
533-
534- library(scuttle )
535- library(celldex )
536- library(SingleR )
537- library(BiocParallel )
538- monocyte_reference =
539- MonacoImmuneData() | >
540-
541-
542-
543- annotation <-
544- x | >
545- slice(1 : 10000 ) | >
546- logNormCounts(assay.type = " X" ) | >
547- SingleR(
548- ref = MonacoImmuneData ,
549- assay.type.test = 1 ,
550- labels = MonacoImmuneData $ label.fine ,
551- BPPARAM = SnowParam(workers = 10 )
552- )
0 commit comments