Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit 2f66076

Browse files
committed
update dev scripts
1 parent bd4b1d2 commit 2f66076

File tree

4 files changed

+557
-233
lines changed

4 files changed

+557
-233
lines changed

dev/cellxgene_to_metadata.R

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -977,38 +977,38 @@ job::job({
977977

978978
# Perform optimised joins within DuckDB
979979
copy_query <- "
980-
COPY (
981-
SELECT
982-
cell_to_refined_sample_from_Mengyuan.cell_,
983-
cell_to_refined_sample_from_Mengyuan.observation_joinid,
984-
cell_to_refined_sample_from_Mengyuan.dataset_id,
985-
cell_to_refined_sample_from_Mengyuan.sample_id,
986-
cell_to_refined_sample_from_Mengyuan.cell_type,
987-
cell_to_refined_sample_from_Mengyuan.cell_type_ontology_term_id,
988-
sample_metadata.*,
989-
age_days_tbl.age_days,
990-
tissue_grouped.tissue_groups
991-
992-
FROM cell_to_refined_sample_from_Mengyuan
993-
994-
LEFT JOIN cell_ids_for_metadata
995-
ON cell_ids_for_metadata.cell_ = cell_to_refined_sample_from_Mengyuan.cell_
996-
AND cell_ids_for_metadata.observation_joinid = cell_to_refined_sample_from_Mengyuan.observation_joinid
997-
AND cell_ids_for_metadata.dataset_id = cell_to_refined_sample_from_Mengyuan.dataset_id
998-
999-
LEFT JOIN sample_metadata
1000-
ON cell_ids_for_metadata.sample_ = sample_metadata.sample_
1001-
AND cell_ids_for_metadata.donor_id = sample_metadata.donor_id
1002-
AND cell_ids_for_metadata.dataset_id = sample_metadata.dataset_id
1003-
1004-
LEFT JOIN age_days_tbl
1005-
ON age_days_tbl.development_stage = sample_metadata.development_stage
1006-
1007-
LEFT JOIN tissue_grouped
1008-
ON tissue_grouped.tissue = sample_metadata.tissue
1009-
1010-
) TO '/vast/projects/cellxgene_curated/metadata_cellxgenedp_Apr_2024/cell_metadata_new.parquet'
1011-
(FORMAT PARQUET, COMPRESSION 'gzip');
980+
COPY (
981+
SELECT
982+
cell_to_refined_sample_from_Mengyuan.cell_,
983+
cell_to_refined_sample_from_Mengyuan.observation_joinid,
984+
cell_to_refined_sample_from_Mengyuan.dataset_id,
985+
cell_to_refined_sample_from_Mengyuan.sample_id,
986+
cell_to_refined_sample_from_Mengyuan.cell_type,
987+
cell_to_refined_sample_from_Mengyuan.cell_type_ontology_term_id,
988+
sample_metadata.*,
989+
age_days_tbl.age_days,
990+
tissue_grouped.tissue_groups
991+
992+
FROM cell_to_refined_sample_from_Mengyuan
993+
994+
LEFT JOIN cell_ids_for_metadata
995+
ON cell_ids_for_metadata.cell_ = cell_to_refined_sample_from_Mengyuan.cell_
996+
AND cell_ids_for_metadata.observation_joinid = cell_to_refined_sample_from_Mengyuan.observation_joinid
997+
AND cell_ids_for_metadata.dataset_id = cell_to_refined_sample_from_Mengyuan.dataset_id
998+
999+
LEFT JOIN sample_metadata
1000+
ON cell_ids_for_metadata.sample_ = sample_metadata.sample_
1001+
AND cell_ids_for_metadata.donor_id = sample_metadata.donor_id
1002+
AND cell_ids_for_metadata.dataset_id = sample_metadata.dataset_id
1003+
1004+
LEFT JOIN age_days_tbl
1005+
ON age_days_tbl.development_stage = sample_metadata.development_stage
1006+
1007+
LEFT JOIN tissue_grouped
1008+
ON tissue_grouped.tissue = sample_metadata.tissue
1009+
1010+
) TO '/vast/projects/cellxgene_curated/metadata_cellxgenedp_Apr_2024/cell_metadata.parquet'
1011+
(FORMAT PARQUET, COMPRESSION 'gzip');
10121012
"
10131013

10141014
# Execute the final query to write the result to a Parquet file
@@ -1024,7 +1024,7 @@ job::job({
10241024

10251025
cell_metadata = tbl(
10261026
dbConnect(duckdb::duckdb(), dbdir = ":memory:"),
1027-
sql("SELECT * FROM read_parquet('/vast/projects/cellxgene_curated/metadata_cellxgenedp_Apr_2024/cell_metadata_new.parquet')")
1027+
sql("SELECT * FROM read_parquet('/vast/projects/cellxgene_curated/metadata_cellxgenedp_Apr_2024/cell_metadata.parquet')")
10281028
)
10291029

10301030
tissues_grouped = get_tissue_grouped()
@@ -1081,7 +1081,7 @@ dbDisconnect(con, shutdown = TRUE)
10811081
non_immune_harmonisation =
10821082
read_csv("/vast/projects/mangiola_immune_map/PostDoc/CuratedAtlasQueryR/dev/cell_type_harmonisation_non_immune.csv")
10831083

1084-
system("~/bin/rclone copy /vast/projects/mangiola_immune_map/PostDoc/CuratedAtlasQueryR/dev/cell_type_harmonisation_non_immune.csv box_adelaide:/Mangiola_ImmuneAtlas/reannotation_consensus/")
1084+
# system("~/bin/rclone copy /vast/projects/mangiola_immune_map/PostDoc/CuratedAtlasQueryR/dev/cell_type_harmonisation_non_immune.csv box_adelaide:/Mangiola_ImmuneAtlas/reannotation_consensus/")
10851085

10861086

10871087
tbl(

dev/execute_hpcell_on_census_and_defining_data_tranformation.R

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -218,19 +218,19 @@ tar_script({
218218
library(crew.cluster)
219219
tar_option_set(
220220
memory = "transient",
221-
garbage_collection = 100,
221+
garbage_collection = 1000,
222222
storage = "worker",
223223
retrieval = "worker",
224224
error = "continue",
225-
# debug = "dataset_id_sce_b5312463451d7ee3",
225+
debug = "annotation_tbl_light",
226226
cue = tar_cue(mode = "never"),
227227
controller = crew_controller_group(
228228
list(
229229
crew_controller_slurm(
230230
name = "tier_1",
231231
script_lines = "#SBATCH --mem 8G",
232232
slurm_cpus_per_task = 1,
233-
workers = 300,
233+
workers = 500,
234234
tasks_max = 10,
235235
verbose = T,
236236
launch_max = 5
@@ -276,7 +276,8 @@ tar_script({
276276
unnest(blueprint_scores_fine) |>
277277
select(.cell, blueprint_first.labels.fine, monaco_first.labels.fine, any_of("azimuth_predicted.celltype.l2"), monaco_scores_fine, contains("macro"), contains("CD4") ) |>
278278
unnest(monaco_scores_fine) |>
279-
select(.cell, blueprint_first.labels.fine, monaco_first.labels.fine, any_of("azimuth_predicted.celltype.l2"), contains("macro") , contains("CD4"), contains("helper"), contains("Th"))
279+
select(.cell, blueprint_first.labels.fine, monaco_first.labels.fine, any_of("azimuth_predicted.celltype.l2"), contains("macro") , contains("CD4"), contains("helper"), contains("Th")) |>
280+
rename(cell_ = .cell)
280281
}
281282

282283
list(
@@ -313,7 +314,7 @@ job::job({
313314
tar_make(
314315
script = "/vast/scratch/users/mangiola.s/lighten_annotation_tbl_target.R",
315316
store = "/vast/scratch/users/mangiola.s/lighten_annotation_tbl_target",
316-
reporter = "summary"
317+
reporter = "summary", callr_function = NULL
317318
)
318319

319320
})
@@ -323,16 +324,26 @@ library(arrow)
323324
library(dplyr)
324325
library(duckdb)
325326

326-
cell_type_original <- tbl(
327+
# Write annotation light
328+
tar_read(annotation_tbl_light, store = "/vast/scratch/users/mangiola.s/lighten_annotation_tbl_target") |>
329+
rename(
330+
blueprint_first_labels_fine = blueprint_first.labels.fine,
331+
monaco_first_labels_fine = monaco_first.labels.fine,
332+
azimuth_predicted_celltype_l2 = azimuth_predicted.celltype.l2
333+
) |>
334+
write_parquet("/vast/projects/cellxgene_curated/metadata_cellxgenedp_Apr_2024/annotation_tbl_light.parquet")
335+
336+
cell_metadata <- tbl(
327337
dbConnect(duckdb::duckdb(), dbdir = ":memory:"),
328338
sql("SELECT * FROM read_parquet('/vast/projects/cellxgene_curated/metadata_cellxgenedp_Apr_2024/cell_metadata.parquet')")
329339
) |>
330340
mutate(cell_ = paste0(cell_, "___", dataset_id)) |>
331-
select(.cell = cell_, observation_joinid, contains("cell_type"), dataset_id, self_reported_ethnicity, tissue, donor_id, sample_id, is_primary_data, assay)
341+
select(cell_, observation_joinid, contains("cell_type"), dataset_id, self_reported_ethnicity, tissue, donor_id, sample_id, is_primary_data, assay)
342+
332343

333344

334345
tar_read(annotation_tbl_light, store = "/vast/scratch/users/mangiola.s/lighten_annotation_tbl_target") |>
335-
left_join(cell_type_original, copy = TRUE) |>
346+
left_join(cell_metadata, copy = TRUE) |>
336347
write_parquet("/vast/projects/cellxgene_curated/metadata_cellxgenedp_Apr_2024/cell_annotation.parquet")
337348

338349
system("~/bin/rclone copy /vast/projects/cellxgene_curated/metadata_cellxgenedp_Apr_2024/cell_annotation.parquet box_adelaide:/Mangiola_ImmuneAtlas/reannotation_consensus/")

0 commit comments

Comments
 (0)