@@ -18,7 +18,7 @@ library(glmGamPoi)
1818source(" utility.R" )
1919library(HCAquery )
2020library(BiocParallel )
21-
21+ library( scuttle )
2222
2323# Read arguments
2424args = commandArgs(trailingOnly = TRUE )
@@ -30,72 +30,150 @@ cell_type_df = args[[length(args)-2]]
3030output_dir = args [[length(args )- 1 ]]
3131memory_Mb = args [[length(args )]] | > as.integer()
3232
33- library(future )
34- plan(" multicore" , workers = 4 )
35- options(future.globals.maxSize = (memory_Mb - 10000 ) * 1024 ^ 2 )
33+ # library(future)
34+ # plan("multicore", workers = 4)
35+ # options(future.globals.maxSize = (memory_Mb - 10000) * 1024^2)
36+
37+ # print(file_id_column )
38+ # print(light_data_directory )
39+ # print(file_for_annotation_workflow )
40+ # print(cell_type_df )
41+ # print(output_dir)
3642
37- print(file_id_column )
38- print(light_data_directory )
39- print(file_for_annotation_workflow )
40- print(cell_type_df )
41- print(output_dir )
4243# Create directory
4344output_dir | > dir.create( showWarnings = FALSE , recursive = TRUE )
4445
45- reference_azimuth = readRDS(" reference_azimuth_NEW_UWOT.rds" )
46-
4746metadata_few_columns = readRDS(file_for_annotation_workflow )
4847
49- data =
48+
49+ # SINGLER
50+ blueprint <- BlueprintEncodeData()
51+ MonacoImmuneData = MonacoImmuneData()
52+
53+ print(" Start SingleR" )
54+
55+ data_singler =
5056 sample_files | >
5157 enframe(value = " input_file" ) | >
5258 mutate(file_id = file_id_column ) | >
53- mutate(sce = map2(
59+ mutate(singler = map2(
5460 input_file , file_id ,
5561 ~ {
56- .x | >
62+ print(.x )
63+ sce =
64+ .x | >
5765 loadHDF5SummarizedExperiment() | >
58- # sce[rownames(sce) %in% VariableFeatures(reference_azimuth),] |>
66+ # sce[rownames(sce) %in% VariableFeatures(reference_azimuth),] |>
5967
6068 # Filter Immune
6169 left_join(metadata_few_columns | > filter(file_id == .y ) | > select(- file_id )) | >
6270 left_join(read_csv(cell_type_df )) | >
63- filter(lineage_1 == " immune" )
64- })) | >
71+ filter(lineage_1 == " immune" ) | >
72+
73+ logNormCounts(assay.type = " X" )
74+
75+ # If no cell kill
76+ if (ncol(sce )== 0 )
77+ return (tibble(.cell = character ()))
78+
79+ # If I have negative values
80+ if (( sce @ assays @ data $ X [,1 : min(100 , ncol(sce ))] | > min()) < 0 )
81+ sce @ assays @ data $ X [ sce @ assays @ data $ X < 0 ] <- 0
82+
83+ if (ncol(sce )== 1 ){
84+ sce = cbind(sce , sce )
85+ colnames(sce )[2 ]= " dummy___"
86+ }
87+
88+ left_join(
89+ sce | >
90+ SingleR(
91+ ref = blueprint ,
92+ assay.type.test = 1 ,
93+ labels = blueprint $ label.fine
94+ ) | >
95+ as_tibble(rownames = " .cell" ) | >
96+ nest(blueprint_scores = starts_with(" score" )) | >
97+ select(- one_of(" delta.next" ),- pruned.labels ) | >
98+ rename( blueprint_singler = labels ),
99+
100+ sce | >
101+ SingleR(
102+ ref = MonacoImmuneData ,
103+ assay.type.test = 1 ,
104+ labels = MonacoImmuneData $ label.fine
105+ ) | >
106+ as_tibble(rownames = " .cell" ) | >
107+
108+ nest(monaco_scores = starts_with(" score" )) | >
109+ select(- delta.next ,- pruned.labels ) | >
110+ rename( monaco_singler = labels )
111+ ) | >
112+ filter(.cell != " dummy___" )
113+
114+ })) | >
115+
116+ unnest(singler ) | >
117+ select(- name )
118+
119+ # If not immune cells
120+ if (nrow(data_singler ) == 0 ){
121+
122+ sample_files | >
123+ enframe(value = " file" ) | >
124+ mutate(.sample = file | > basename() | > tools :: file_path_sans_ext()) | >
125+ mutate(
126+ saved = map(.sample , ~ tibble(.cell = character ()) | > saveRDS(glue(" {output_dir}/{.x}.rds" )))
127+ )
65128
66- unnest_single_cell_experiment( sce )
129+ } else if (nrow( data_singler ) < = 30 ){
67130
68- # pull(sce) %>%
69- # do.call(cbind, .)
131+ # If too little immune cells
70132
71- if (ncol(data ) < = 30 ){
72133
73- data | >
74- distinct(file_id , .sample ) | >
134+ data_singler | >
135+ mutate(.sample = input_file | > basename() | > tools :: file_path_sans_ext()) | >
136+ nest(data = - c(.sample , file_id )) | >
75137 mutate(
76138 saved = map(.sample , ~ tibble(.cell = character ()) | > saveRDS(glue(" {output_dir}/{.x}.rds" )))
77- )
139+ )
78140
79- } else {
80141
81- # CHANGE REFERENCE
82- # reference_azimuth=
83- # readRDS("/stornext/Bioinf/data/bioinf-data/Papenfuss_lab/projects/reference_azimuth.rds") |>
84- # RunUMAP(dims = 1:30, spread = 0.5,min.dist = 0.01, n.neighbors = 10L, return.model=TRUE, umap.method = 'uwot')
142+ } else {
85143
144+ print(" Start Seurat" )
86145
87- data_seurat = data
146+ reference_azimuth = readRDS( " reference_azimuth_NEW_UWOT.rds " )
88147
89- # Selectonly interesting genes
90- data_seurat = data_seurat [rownames(data_seurat ) %in% VariableFeatures(reference_azimuth ),]
91148
92- # Convert to Seurat matrix
93- data_seurat @ assays @ data $ X = data_seurat @ assays @ data $ X | > as(" dgCMatrix" )
94- data_seurat =
95- data_seurat | >
149+ data_seurat =
150+ sample_files | >
151+
152+ enframe(value = " input_file" ) | >
153+ mutate(file_id = file_id_column ) | >
154+ mutate(seu = map2(
155+ input_file , file_id ,
156+ ~ {
157+ sce =
158+ .x | >
159+ loadHDF5SummarizedExperiment() | >
160+ # sce[rownames(sce) %in% VariableFeatures(reference_azimuth),] |>
161+
162+ # Filter Immune
163+ left_join(metadata_few_columns | > filter(file_id == .y ) | > select(- file_id )) | >
164+ left_join(read_csv(cell_type_df )) | >
165+ filter(lineage_1 == " immune" ) %> %
166+
167+ . [rownames(. ) %in% VariableFeatures(reference_azimuth ),]
168+
169+ sce @ assays @ data $ X = sce @ assays @ data $ X | > as(" dgCMatrix" )
170+
171+ sce | >
172+ as.Seurat(counts = " X" , data = NULL )
173+ })) | >
174+
175+ unnest_seurat(seu )
96176
97- # Convert
98- as.Seurat(counts = " X" , data = NULL )
99177
100178 # If I have negative values
101179 if ((data_seurat @ assays $ originalexp @ counts | > as.matrix() | > min()) < 0 )
@@ -139,56 +217,18 @@ if(ncol(data) <= 30){
139217 data_seurat
140218 }
141219 ) | >
142- as_tibble()
143-
144- gc()
145-
146- blueprint <- BlueprintEncodeData()
147-
148- library(scuttle )
149-
150- annotation_blueprint <-
151- data | >
152- logNormCounts(assay.type = " X" ) | >
153- SingleR(ref = blueprint , assay.type.test = 1 ,
154- labels = blueprint $ label.fine ,
155- BPPARAM = MulticoreParam(4 )
156- )
157-
158- rm(blueprint )
159- gc()
160-
161- MonacoImmuneData = MonacoImmuneData()
162-
163- annotation_monaco <-
164- data | >
165- logNormCounts(assay.type = " X" ) | >
166- SingleR(ref = MonacoImmuneData , assay.type.test = 1 ,
167- labels = MonacoImmuneData $ label.fine ,
168- BPPARAM = MulticoreParam(4 )
169- )
220+ as_tibble() | >
221+ select(.cell , .sample , one_of(" predicted.celltype.l1" , " predicted.celltype.l2" ), contains(" refUMAP" ))
170222
171- rm(data )
172- gc()
173223
174224 data_seurat | >
175- left_join(
176- annotation_blueprint | >
177- as_tibble(rownames = " .cell" ) | >
178- select(.cell , blueprint_singler = labels )
179- ) | >
180- left_join(
181- annotation_monaco | >
182- as_tibble(rownames = " .cell" ) | >
183- select(.cell , monaco_singler = labels )
184- ) | >
225+
226+ left_join( data_singler , by = join_by(.cell ) ) | >
185227
186228 # Just select essential information
187- as_tibble() | >
188- select(.cell , .sample , one_of(" predicted.celltype.l1" , " predicted.celltype.l2" ), blueprint_singler , monaco_singler , contains(" refUMAP" )) | >
189229
190230 # Save
191- nest(data = - .sample ) | >
231+ nest(data = - c( .sample , file_id ) ) | >
192232 mutate(saved = map2(
193233 data , .sample ,
194234 ~ .x | > saveRDS(glue(" {output_dir}/{.y}.rds" ))
0 commit comments