Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit e6dcb2b

Browse files
committed
replace name
1 parent 3d894ee commit e6dcb2b

19 files changed

+1302
-505
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
human_cell_atlas.Rproj
66
local_file.rds
77
slurm.status.*
8-
HCAquery.Rproj
8+
CuratedAtlasQuery.Rproj
99
split_files.makeflow
1010
._*
1111
.DS_Store

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Type: Package
2-
Package: HCAquery
2+
Package: CuratedAtlasQuery
33
Title: Queries the Human Cell Atlas
44
Version: 0.1.0
55
Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
@@ -66,6 +66,6 @@ biocViews:
6666
Encoding: UTF-8
6767
RoxygenNote: 7.2.3
6868
LazyDataCompression: xz
69-
URL: https://github.com/stemangiola/HCAquery
70-
BugReports: https://github.com/stemangiola/HCAquery/issues
69+
URL: https://github.com/stemangiola/CuratedAtlasQuery
70+
BugReports: https://github.com/stemangiola/CuratedAtlasQuery/issues
7171
VignetteBuilder: knitr

NEWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
# HCAquery 0.1.0
1+
# CuratedAtlasQuery 0.1.0
22

33
* Added a `NEWS.md` file to track changes to the package.

dev/annotate_files.R

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,28 @@ library(celldex)
1616
library(SingleR)
1717
library(glmGamPoi)
1818
source("utility.R")
19-
19+
library(HCAquery)
2020

2121

2222

2323
# Read arguments
2424
args = commandArgs(trailingOnly=TRUE)
2525
input_file = args[[1]]
26-
`metadata` = args[[2]]
26+
file_for_annotation_workflow = args[[2]]
2727
cell_type_df = args[[3]]
2828
output_file = args[[4]]
2929

3030
# Create directory
3131
output_file |> dirname() |> dir.create( showWarnings = FALSE, recursive = TRUE)
32+
.sample = basename(input_file) |> tools::file_path_sans_ext()
3233

3334
# Read file_cell_types
3435
data =
3536
loadHDF5SummarizedExperiment(input_file ) |>
37+
mutate(.sample = !! .sample ) |>
3638

3739
# add lineage 1
38-
left_join(readRDS(metadata) |> distinct(.cell, .sample, cell_type)) |>
40+
left_join(readRDS(file_for_annotation_workflow) |> dplyr::select(-one_of("cell_type_harmonised"))) |>
3941
left_join(read_csv(cell_type_df)) |>
4042
filter(lineage_1 == "immune")
4143

@@ -111,26 +113,44 @@ if(ncol(data) <= 30){
111113

112114

113115
blueprint <- BlueprintEncodeData()
114-
#MonacoImmuneData = MonacoImmuneData()
115116

116117
library(scuttle)
117118

118-
annotation <-
119+
annotation_blueprint <-
119120
data |>
120121
logNormCounts(assay.type = "X") |>
121122
SingleR(ref = blueprint, assay.type.test=1,
122123
labels = blueprint$label.fine)
123124

124-
data |>
125+
rm(blueprint)
126+
gc()
127+
128+
MonacoImmuneData = MonacoImmuneData()
129+
130+
annotation_monaco <-
131+
data |>
132+
logNormCounts(assay.type = "X") |>
133+
SingleR(ref = MonacoImmuneData, assay.type.test=1,
134+
labels = MonacoImmuneData$label.fine)
135+
136+
rm(data)
137+
gc()
138+
139+
data_seurat |>
125140
left_join(
126-
annotation |>
141+
annotation_blueprint |>
127142
as_tibble(rownames=".cell") |>
128143
select(.cell, blueprint_singler = first.labels)
129144
) |>
145+
left_join(
146+
annotation_monaco |>
147+
as_tibble(rownames=".cell") |>
148+
select(.cell, monaco_singler = first.labels)
149+
) |>
130150

131-
# Just select essential information
132-
as_tibble() |>
133-
select(.cell, one_of("predicted.celltype.l1", "predicted.celltype.l2"), blueprint_singler, contains("refUMAP")) |>
151+
# Just select essential information
152+
as_tibble() |>
153+
select(.cell, one_of("predicted.celltype.l1", "predicted.celltype.l2"), blueprint_singler, monaco_singler, contains("refUMAP")) |>
134154

135155
# Save
136156
saveRDS(output_file)

dev/annotation_harmonise.R

Lines changed: 189 additions & 225 deletions
Large diffs are not rendered by default.

dev/build_makefile_annotate_files.R

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -16,49 +16,68 @@ library(tidyseurat)
1616
library(celldex)
1717
library(SingleR)
1818
library(glmGamPoi)
19-
source("utility.R")
19+
# source("utility.R")
2020

2121

2222
# # CREATE MAKEFILE
2323
tab = "\t"
24-
root_directory = "/vast/scratch/users/mangiola.s/human_cell_atlas"
25-
annotated_data_directory = glue("{root_directory}/annotated_data")
26-
light_data_directory = glue("{root_directory}/splitted_light_data")
27-
metadata = glue("{root_directory}/metadata.rds")
24+
root_directory = "/vast/projects/RCP/human_cell_atlas"
25+
annotated_data_directory = glue("{root_directory}/annotated_data_0.2")
26+
light_data_directory = glue("{root_directory}/splitted_light_data_0.2")
27+
metadata = glue("{root_directory}/metadata_0.2.rds")
2828
cell_type_df = "metadata_cell_type.csv"
2929

3030
light_file_paths = dir(light_data_directory, full.names = TRUE)
3131
.sample = basename(light_file_paths) |> tools::file_path_sans_ext()
3232
annotated_file_paths = glue("{annotated_data_directory}/{.sample}.rds")
33+
file_for_annotation_workflow = glue("{root_directory}/cell_sample_cell_type_df_for_annotation_workflow.rds")
3334

3435
metadata_df = readRDS(metadata)
3536

37+
metadata_df |> distinct(.cell, .sample, cell_type, cell_type_harmonised) |>
38+
saveRDS(file_for_annotation_workflow)
39+
3640
metadata_df |>
3741
distinct(.sample, file_id) |>
38-
filter(.sample %in% samples_to_use) |>
42+
# filter(.sample %in% samples_to_use) |>
3943
mutate(
4044
input_file_path = glue("{light_data_directory}/{.sample}") |> as.character(),
4145
output_file_path = glue("{annotated_data_directory}/{.sample}.rds" |> as.character())
4246
) |>
4347

44-
mutate(Mb = map_dbl(input_file_path, ~ (file.info(glue("{.x}/se.rds"))$size /1e6) |> as.integer() )) |>
45-
mutate(memory = pmax(Mb * 20 + 20000, 40000)) |>
48+
mutate(Mb = map_dbl(input_file_path, ~
49+
( (file.info(glue("{.x}/se.rds"))$size /1e6) |> as.integer() ) +
50+
( (file.info(glue("{.x}/assays.h5"))$size /1e6) |> as.integer() )
51+
)) |>
52+
mutate(memory = pmax(Mb * 20 + 10000, 20000)) |>
53+
54+
mutate(memory = case_when(
55+
.sample %in%
4656

47-
# mutate(memory = case_when(
57+
c('80507b531a4665ba331ff13f7996f2e0', 'fcbada6e796bb52c7bc3b9cef6532b1e', 'ff612ffdd90b7c4a082ab85e9b54b924', '6d812c89125c54cf915ba8330d74273a', '7a1c50a4cfd567e8559ad46bbd01c1e0', '8bb69081caaf38a3abbb4407589ab5b6', 'b93d5a0d64f34aada9d372ca70f681e8', 'c265cb6e68b8f747bddb10a56fee19c7', '6e9d4cef82b994569d2c98ce75218743', 'a77d2ea6a66417cf8cd63e7e726cf5f0', '078f35a53e9d14b71a4e2b716f557138', '6327cd460c7e5f3b50e8ec07b45389b8', 'a232ee361076b8092ac20118b6dd5aae', 'cb899529c5bf8e6db8cdab2bb24ce0ca', '7cd9204e50d1b12fb1884ab8b9924960', 'ffbfb187f9d7e743a74b03f30d819d34', 'b8eafa88efa0e3c600e36b52a7da3e35', '73ffcdfb290afd536614a159410c6267', 'bb5f7468db4e3110f3399e5c2ab09350', 'b7d54c16cfad21869c8166c410a62e5e', '486eebeda7bdebce5927d7918eb12df0', '25de28cdd79d796cfecb3d3180e1c677', '2bb2ad830713fc3d7d1bfdfddf4ff742', 'b4f6e5212241973692c9cc2f7aeebb3d', 'cb2d45b523a31d3e3a33dab5dc0bd342', '88ca1f6a444a590e695a95e763ce6dc3', '3d2ef9fd4c49b8eef3634cfdd6391ff7',
58+
'bd72dd64cddad9218397a1cad2f26d67', 'f18fba81d42fe306475c95a239b675c4', '7c9a311a483f8e09e855d093a64f1cc9', '71b9c02e332f0ac479e6e7172e6f5888', 'e22d7bb7b5fdf9dc5a559092767f43aa', '828d4e3d3dcd8de36fcf21fdb0871112', 'ca70d83f2e1df93a98499d18230cb4f5', 'ce3dbb49a1d2d56bc86e67f3b5cae390', '622b67c70869c4f501308efaa92f08a7', '0a6eddf091d95fccb1d5b6660f8e19ce', '70a021e3c1128b46eba2acb77dc02a05', 'f3b104a8f147e678859ae13fb1e92e39', '9750e782e1085296a5a350819f78af97', 'de87abf02112258da54326de0dad1f13', 'edb8ccfa9f2156b02546824c89e8b4ac', 'd060d78733d18e6d4a1e91b562bbb83a', '91d9efed34f1b27d595a479d2e50c886', 'd5f9ab4d42dac4898b7fd2dfdb90f7f0', '6ff2e5eb72d1e258edbef4d7fa000307', '41564528ffbab06b4c91010e95d7c172', 'e1825cb9f73f0cbe40b0b3a85aa350d5', 'b2dca115cc815b7c5a072b7ae06da968', '0cffaa5dbc6dc5d4039b5738cc8452eb', '42759990c5b59cdc1df3a8b92f0d7514', 'f578ec86c5856bad34aaa117524e0815', '54ba38ed501bd4c8af57f1ed587a2431', '14973806ff12d5a58612a4a34ab6e859', 'c9609b8b49020d9072240ac9a4411770', '6b68d3a7e2a6e5f444853f04202e7406', '0ab56037d72ed9139e60a4b9a02aaae0', 'ed2cf4b0d9be99a1b92fb0063909ac66', 'c4b0f90b9e961b9c1ab2006d587c6c6b', '4fcfaf21f34ffc5af7b0ce633ab0ab80', 'c83f157deace6e1931d274c8368648c0', '6e57e21b10cd40ef13fe63c22b58b2b0', 'fbc148d5367f63bb52cc70cd4d0ffd68', '30f109f93bff686b8c1f7bb3f48ff8af', 'f6d1dc92da6cca9960875e392cc8b420', 'fd96b94edeea6ab72ccd7d57a4b503d4', '6464c5c5220928b6cc0b38ada7fb191a', '2d0cfac6f34789c4bf9ca33a983c37a0', 'fdc44855792439d966accdbf7df9cac9',
59+
'85275069150b6110ad5db5410f1a89e7', '516350a4f6a2d659b2204c1b0b1ac533', '1175a3cd6b2579e263324b639f12b1b1', 'fd6ca7ea36b6b478f905f091c62583c3', '9fd65311516c59f984276101d0b80318', 'c41cb335fc85785d8eca7513c35ebd2d', '2db63e5254393a64cbddec0a6721afee', '5c3d0504c1226f51ae021e6a190d7704', '680d389dbbe7cadea204672c28e9a449', '5021486f9dbc708f8c33f088f26e3758', '7dcceaa2fb865568f73494f5e2b3aa8f', 'e8fe4460a4af317fc2849c4f2077547a', '1afb07303d1ae1d0b52a5ba7a245075d', 'ef6876826a441cbbb2c7c84ebe85d325', 'a657082b863a036686415659a295a66a', '0adc338efd606eb4691aac68bffb97f8', 'd835a01a7f879b62db0a61b8ab574d78', '79c02d7a10ff735fe64dfc5bf88b8f41', '40fba5da714de22ae3aca0d1ca41a80b', '944b2d55e96bd62f205bd006754d3322', '9843e641df41e746a68c3df99a0da789', 'a245885261b384ce0261082eb7a0b229', 'e74a84462d89b7db4a7d07011c42b03c', 'a77e0fb892c34bdcb561da344a13b8ef', '33d9a38728c6c3c0fde2ff58bf86c44a', '7a9ee4c76bf5f27e1eba348b1d191ea6', '3220304cec9621329939086695c360a4', '256686225636c8e7dfb32c69d8dcfa3c', 'd31eb3a29018429c09baeb78c6edea43', '05e6a4d998c123286beb1dca770a741c', '1f86910976e3c2b983a2823fc427834e', 'd6a43e21614625dddede6dc631fd82f0', '783a3b9af01864747d7de20dc55262f6', '88d0a7ca73b95c483a879d67b57cb436', '3b3ed94e2932f27653e814135f55b2c9', 'cbd9ddf771e2f427d2054d491b47ee57', '93d1f9df324981115727aae376278726', '4ff1f53e149edbd0bae956d5d93943e3', '7cfef32e9318fcff73c2bd2884e3cff2', '0583b837fee41bd285c9f29462566c04', 'b4e8336d9e5e4d6bbf30293c07e001b8', '860fdf1db1b377ee854557afbf5c797d', '22d4b286b892ef8b5138072bd9566cd2', '18631218fc1e8c8b8201f5eebe637eeb', '98d25b1a524d63fac90f670dbd2a99a8', 'a5401d0c2f3a6ec0de28a5bf02845fc7', 'd7056438ec8650c2862f0417903863cd',
60+
'e2830f5e44b5d390e7d7bbf5ba4fc9fe', '0460dc7b199f9265ed173f3ac6ffdd03', '871a9b8c0ec355163d594a4da160bc0c', '1f291c12284a4ae96e154793c811d1d8', 'd2480ab8bce610edee93722824dd7c6c', '6f1aad9a7d84c9ca6a107a6b530175a6', 'bc5f32bc5bf3f46142123f146f1ca136', 'c6906bd160cf94942422974ebf3b57e3', '5572c1c9acaeb29de4764f22cdc7e566', '05aed84debb5078d9aa6c8b20ff62663', '67bb75769570b3a47ee3e83f3998bad2', 'a1d98925e8acd3e7131e677e8718c325', '4ed49a161d65b6dae132877112dbccd9', '6d505bad20d48653f1d095856ef2eb81', 'dd28df166c8bc00f4af59c5687edc5c9', 'a7a375fb168bd68b5eae2a9b69116a4c', 'c7df9c073269959dcea83877c2bea3b7', '8f47d0f5f0a50856b0aebfd0d32c3024', 'a9852fc70bb43641991126f95231051c', '918f1c89b13b74148f21f6b5697c509e', '094b9ba158a3c0728c7d7ace42e0b8f8', '0d5d079fd4fc3743b288fa472e8053d8', 'e525bb6af01cd8659e94df65d6b05cfc', 'a97bf1010beb6c6916316a782cdda57e', '43f3cced1cbbfb710499449703d63d21', 'e98c033fd7a510119bfdb7b48e88f0af', '15e5c907583d76232d7ff970c3a1c7cd', '45160921927eda11f62a7d06129c66ee', '572d4065692d96070e3dcac2868fd206', 'd792a7d0a4d3ce02711f08b5c556913d', '13aed27c8e15d8683a26590275977d14', '896ac37da8f88e33f05ebd5283d4b806', '34b4c632e6befd44d9741d3b5571fa29', '64fa87c0da688c9b4e1722c5ba66b555', '7c5a341956a86aae089954a057c010f9', '1bf6aa8f66fe2383c0fc7e9cb68d328f', '38d2b7349e1f0771f3205725d1f0dbc8', '474fe9670783f8d1a551ff96f0cb5c7a', '4c9f3c2b964ab525ed5b8f68d9ec9d71', 'e9f517c60c53c26fa13b479b5bd4ac43', 'a27097a860f150681d686fc7b8aeb3b4', '8cb6533eeea450f4b2d706c8bb8f3c2c', '58d0f39601731f290778a3b51766ec6d', 'aafb785305edb6bf99bba386dfb7edef', 'b25a5f6f8196daafbe940daf95691e97', '10000a7099938e8483d1cc832aa17b81', '6bfcb65bb93b7690c41bc57ae6f271c0',
61+
'd461b1af552977ce8755d9d15c15308e',
62+
'ac8b2e1209f000a1aabb0919ba08ae73', 'd899b5bc64bc0ea8defa003750ab96e7' ) ~ 40000,
63+
TRUE ~ memory
64+
)) |>
65+
66+
# mutate(memory = case_when(
4867
# output_file_path %in% c(
49-
# "/vast/scratch/users/mangiola.s/human_cell_atlas/annotated_data/305a8bd9b8e529a967feb5f73cc8c4df.rds" ,
50-
# "/vast/scratch/users/mangiola.s/human_cell_atlas/annotated_data/087c2093be040a404c9685af1ecb3c65.rds",
51-
# "/vast/scratch/users/mangiola.s/human_cell_atlas/annotated_data/86a6d20305d912e98318ad4d1d5d1814.rds",
52-
# "/vast/scratch/users/mangiola.s/human_cell_atlas/annotated_data/829b99a569ec9ebb5fdd1b0b29208aaf.rds",
53-
# "/vast/scratch/users/mangiola.s/human_cell_atlas/annotated_data/176f8892a21bec1bd7bdbc4181af75ed.rds",
54-
# "/vast/scratch/users/mangiola.s/human_cell_atlas/annotated_data/23c822334c194bceb576a9ccb1db5929.rds",
55-
# "/vast/scratch/users/mangiola.s/human_cell_atlas/annotated_data/3c20ba18525fb5e0b41cb8ea189b5d33.rds",
56-
# "/vast/scratch/users/mangiola.s/human_cell_atlas/annotated_data/522dde7ab389d65b265d4cd598576f31.rds",
57-
# "/vast/scratch/users/mangiola.s/human_cell_atlas/annotated_data/2cf3bb4ffbb2024a9ca04baec073ae14.rds",
58-
# "/vast/scratch/users/mangiola.s/human_cell_atlas/annotated_data/c8ff7c63b3152a25c338cc279b31ab07.rds",
59-
# "/vast/scratch/users/mangiola.s/human_cell_atlas/annotated_data/5be263dbc1384b3cec21c5d3c580f838.rds",
60-
# "/vast/scratch/users/mangiola.s/human_cell_atlas/annotated_data/024d53b702b1846a476cabe5d691f992.rds",
61-
# "/vast/scratch/users/mangiola.s/human_cell_atlas/annotated_data/9da244f06591fa49e5649c65ed3b0934.rds",
68+
# "/vast/projects/RCP/human_cell_atlas/annotated_data/305a8bd9b8e529a967feb5f73cc8c4df.rds" ,
69+
# "/vast/projects/RCP/human_cell_atlas/annotated_data/087c2093be040a404c9685af1ecb3c65.rds",
70+
# "/vast/projects/RCP/human_cell_atlas/annotated_data/86a6d20305d912e98318ad4d1d5d1814.rds",
71+
# "/vast/projects/RCP/human_cell_atlas/annotated_data/829b99a569ec9ebb5fdd1b0b29208aaf.rds",
72+
# "/vast/projects/RCP/human_cell_atlas/annotated_data/176f8892a21bec1bd7bdbc4181af75ed.rds",
73+
# "/vast/projects/RCP/human_cell_atlas/annotated_data/23c822334c194bceb576a9ccb1db5929.rds",
74+
# "/vast/projects/RCP/human_cell_atlas/annotated_data/3c20ba18525fb5e0b41cb8ea189b5d33.rds",
75+
# "/vast/projects/RCP/human_cell_atlas/annotated_data/522dde7ab389d65b265d4cd598576f31.rds",
76+
# "/vast/projects/RCP/human_cell_atlas/annotated_data/2cf3bb4ffbb2024a9ca04baec073ae14.rds",
77+
# "/vast/projects/RCP/human_cell_atlas/annotated_data/c8ff7c63b3152a25c338cc279b31ab07.rds",
78+
# "/vast/projects/RCP/human_cell_atlas/annotated_data/5be263dbc1384b3cec21c5d3c580f838.rds",
79+
# "/vast/projects/RCP/human_cell_atlas/annotated_data/024d53b702b1846a476cabe5d691f992.rds",
80+
# "/vast/projects/RCP/human_cell_atlas/annotated_data/9da244f06591fa49e5649c65ed3b0934.rds",
6281
# )~ 160000,
6382
# TRUE ~ memory
6483
# )) |>
@@ -67,16 +86,12 @@ metadata_df |>
6786
mutate(commands = pmap(list(output_file_path, input_file_path, memory, rowid, file_id), ~
6887
c(
6988
glue("CATEGORY=light_data{..4}\nMEMORY={..3}\nCORES=1\nWALL_TIME=30000"),
70-
glue("{..1}:{..2} {metadata} {cell_type_df}\n{tab}Rscript annotate_files.R {..2} {metadata} {cell_type_df} {..1}")
89+
glue("{..1}:{..2} {file_for_annotation_workflow} {cell_type_df}\n{tab}Rscript annotate_files.R {..2} {file_for_annotation_workflow} {cell_type_df} {..1}")
7190
)
7291
)) |>
7392
pull(commands) |>
7493
unlist() |>
75-
write_lines(glue("annotate_files.makeflow"))
94+
write_lines(glue("~/PostDoc/HCAquery/dev/annotate_files.makeflow"))
95+
7696

7797

78-
# c(
79-
# glue("CATEGORY=light_data\nMEMORY=30024\nCORES=1\nWALL_TIME=10000"),
80-
# glue("{annotated_file_paths}:{light_file_paths} {metadata} {cell_type_df}\n{tab}Rscript annotate_files.R {light_file_paths} {metadata} {cell_type_df} {annotated_file_paths}")
81-
# ) |>
82-
# write_lines(glue("annotate_files.makeflow"))

0 commit comments

Comments
 (0)