Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit 865461b

Browse files
committed
update pipeline
1 parent 88a7de2 commit 865461b

File tree

3 files changed

+382
-290
lines changed

3 files changed

+382
-290
lines changed

dev/annotate_files.R

Lines changed: 121 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ library(glmGamPoi)
1818
source("utility.R")
1919
library(HCAquery)
2020
library(BiocParallel)
21-
21+
library(scuttle)
2222

2323
# Read arguments
2424
args = commandArgs(trailingOnly=TRUE)
@@ -30,72 +30,150 @@ cell_type_df = args[[length(args)-2]]
3030
output_dir = args[[length(args)-1]]
3131
memory_Mb = args[[length(args)]] |> as.integer()
3232

33-
library(future)
34-
plan("multicore", workers = 4)
35-
options(future.globals.maxSize = (memory_Mb - 10000) * 1024^2)
33+
# library(future)
34+
# plan("multicore", workers = 4)
35+
# options(future.globals.maxSize = (memory_Mb - 10000) * 1024^2)
36+
37+
# print(file_id_column )
38+
# print(light_data_directory )
39+
# print(file_for_annotation_workflow )
40+
# print(cell_type_df )
41+
# print(output_dir)
3642

37-
print(file_id_column )
38-
print(light_data_directory )
39-
print(file_for_annotation_workflow )
40-
print(cell_type_df )
41-
print(output_dir)
4243
# Create directory
4344
output_dir |> dir.create( showWarnings = FALSE, recursive = TRUE)
4445

45-
reference_azimuth = readRDS("reference_azimuth_NEW_UWOT.rds")
46-
4746
metadata_few_columns = readRDS(file_for_annotation_workflow)
4847

49-
data =
48+
49+
# SINGLER
50+
blueprint <- BlueprintEncodeData()
51+
MonacoImmuneData = MonacoImmuneData()
52+
53+
print("Start SingleR")
54+
55+
data_singler =
5056
sample_files |>
5157
enframe(value = "input_file") |>
5258
mutate(file_id = file_id_column) |>
53-
mutate(sce = map2(
59+
mutate(singler = map2(
5460
input_file, file_id,
5561
~ {
56-
.x |>
62+
print(.x)
63+
sce =
64+
.x |>
5765
loadHDF5SummarizedExperiment() |>
58-
#sce[rownames(sce) %in% VariableFeatures(reference_azimuth),] |>
66+
#sce[rownames(sce) %in% VariableFeatures(reference_azimuth),] |>
5967

6068
# Filter Immune
6169
left_join(metadata_few_columns |> filter(file_id == .y) |> select(-file_id)) |>
6270
left_join(read_csv(cell_type_df)) |>
63-
filter(lineage_1 == "immune")
64-
})) |>
71+
filter(lineage_1 == "immune") |>
72+
73+
logNormCounts(assay.type = "X")
74+
75+
# If no cell kill
76+
if(ncol(sce)==0)
77+
return(tibble(.cell = character()))
78+
79+
# If I have negative values
80+
if(( sce@assays@data$X[,1:min(100, ncol(sce))] |> min()) < 0)
81+
sce@assays@data$X[ sce@assays@data$X<0] <- 0
82+
83+
if(ncol(sce)==1){
84+
sce = cbind(sce, sce)
85+
colnames(sce)[2]= "dummy___"
86+
}
87+
88+
left_join(
89+
sce |>
90+
SingleR(
91+
ref = blueprint,
92+
assay.type.test= 1,
93+
labels = blueprint$label.fine
94+
) |>
95+
as_tibble(rownames=".cell") |>
96+
nest(blueprint_scores = starts_with("score")) |>
97+
select(-one_of("delta.next"),- pruned.labels) |>
98+
rename( blueprint_singler = labels),
99+
100+
sce |>
101+
SingleR(
102+
ref = MonacoImmuneData,
103+
assay.type.test= 1,
104+
labels = MonacoImmuneData$label.fine
105+
) |>
106+
as_tibble(rownames=".cell") |>
107+
108+
nest(monaco_scores = starts_with("score")) |>
109+
select(-delta.next,- pruned.labels) |>
110+
rename( monaco_singler = labels)
111+
) |>
112+
filter(.cell!="dummy___")
113+
114+
})) |>
115+
116+
unnest(singler) |>
117+
select(-name)
118+
119+
# If not immune cells
120+
if(nrow(data_singler) == 0){
121+
122+
sample_files |>
123+
enframe(value = "file") |>
124+
mutate(.sample = file |> basename() |> tools::file_path_sans_ext()) |>
125+
mutate(
126+
saved = map(.sample, ~ tibble(.cell = character()) |> saveRDS(glue("{output_dir}/{.x}.rds")))
127+
)
65128

66-
unnest_single_cell_experiment(sce)
129+
} else if(nrow(data_singler) <= 30){
67130

68-
# pull(sce) %>%
69-
# do.call(cbind, .)
131+
# If too little immune cells
70132

71-
if(ncol(data) <= 30){
72133

73-
data |>
74-
distinct(file_id, .sample) |>
134+
data_singler |>
135+
mutate(.sample = input_file |> basename() |> tools::file_path_sans_ext()) |>
136+
nest(data = -c(.sample, file_id)) |>
75137
mutate(
76138
saved = map(.sample, ~ tibble(.cell = character()) |> saveRDS(glue("{output_dir}/{.x}.rds")))
77-
)
139+
)
78140

79-
} else {
80141

81-
# CHANGE REFERENCE
82-
# reference_azimuth=
83-
# readRDS("/stornext/Bioinf/data/bioinf-data/Papenfuss_lab/projects/reference_azimuth.rds") |>
84-
# RunUMAP(dims = 1:30, spread = 0.5,min.dist = 0.01, n.neighbors = 10L, return.model=TRUE, umap.method = 'uwot')
142+
} else{
85143

144+
print("Start Seurat")
86145

87-
data_seurat = data
146+
reference_azimuth = readRDS("reference_azimuth_NEW_UWOT.rds")
88147

89-
# Selectonly interesting genes
90-
data_seurat = data_seurat[rownames(data_seurat) %in% VariableFeatures(reference_azimuth),]
91148

92-
# Convert to Seurat matrix
93-
data_seurat@assays@data$X = data_seurat@assays@data$X |> as("dgCMatrix")
94-
data_seurat =
95-
data_seurat |>
149+
data_seurat =
150+
sample_files |>
151+
152+
enframe(value = "input_file") |>
153+
mutate(file_id = file_id_column) |>
154+
mutate(seu = map2(
155+
input_file, file_id,
156+
~ {
157+
sce =
158+
.x |>
159+
loadHDF5SummarizedExperiment() |>
160+
#sce[rownames(sce) %in% VariableFeatures(reference_azimuth),] |>
161+
162+
# Filter Immune
163+
left_join(metadata_few_columns |> filter(file_id == .y) |> select(-file_id)) |>
164+
left_join(read_csv(cell_type_df)) |>
165+
filter(lineage_1 == "immune") %>%
166+
167+
.[rownames(.) %in% VariableFeatures(reference_azimuth),]
168+
169+
sce@assays@data$X = sce@assays@data$X |> as("dgCMatrix")
170+
171+
sce |>
172+
as.Seurat(counts = "X", data = NULL)
173+
})) |>
174+
175+
unnest_seurat(seu)
96176

97-
# Convert
98-
as.Seurat(counts = "X", data = NULL)
99177

100178
# If I have negative values
101179
if((data_seurat@assays$originalexp@counts |> as.matrix() |> min()) < 0)
@@ -139,56 +217,18 @@ if(ncol(data) <= 30){
139217
data_seurat
140218
}
141219
) |>
142-
as_tibble()
143-
144-
gc()
145-
146-
blueprint <- BlueprintEncodeData()
147-
148-
library(scuttle)
149-
150-
annotation_blueprint <-
151-
data |>
152-
logNormCounts(assay.type = "X") |>
153-
SingleR(ref = blueprint, assay.type.test=1,
154-
labels = blueprint$label.fine,
155-
BPPARAM=MulticoreParam(4)
156-
)
157-
158-
rm(blueprint)
159-
gc()
160-
161-
MonacoImmuneData = MonacoImmuneData()
162-
163-
annotation_monaco <-
164-
data |>
165-
logNormCounts(assay.type = "X") |>
166-
SingleR(ref = MonacoImmuneData, assay.type.test=1,
167-
labels = MonacoImmuneData$label.fine,
168-
BPPARAM=MulticoreParam(4)
169-
)
220+
as_tibble() |>
221+
select(.cell, .sample, one_of("predicted.celltype.l1", "predicted.celltype.l2"), contains("refUMAP"))
170222

171-
rm(data)
172-
gc()
173223

174224
data_seurat |>
175-
left_join(
176-
annotation_blueprint |>
177-
as_tibble(rownames=".cell") |>
178-
select(.cell, blueprint_singler = labels)
179-
) |>
180-
left_join(
181-
annotation_monaco |>
182-
as_tibble(rownames=".cell") |>
183-
select(.cell, monaco_singler = labels)
184-
) |>
225+
226+
left_join( data_singler , by = join_by(.cell) ) |>
185227

186228
# Just select essential information
187-
as_tibble() |>
188-
select(.cell, .sample, one_of("predicted.celltype.l1", "predicted.celltype.l2"), blueprint_singler, monaco_singler, contains("refUMAP")) |>
189229

190230
# Save
191-
nest(data = -.sample) |>
231+
nest(data = -c(.sample, file_id)) |>
192232
mutate(saved = map2(
193233
data, .sample,
194234
~ .x |> saveRDS(glue("{output_dir}/{.y}.rds"))

0 commit comments

Comments
 (0)