Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit 8b7e5d1

Browse files
committed
update pipeline, 0.2.2
1 parent ba26a3f commit 8b7e5d1

14 files changed

+583
-235
lines changed

dev/DB2_files.R

Lines changed: 140 additions & 66 deletions
Large diffs are not rendered by default.

dev/DB_files.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ library(scMerge)
1010
library(glue)
1111
library(DelayedArray)
1212
library(HDF5Array)
13-
library(HCAquery)
13+
# library(CuratedAtlasQueryR)
1414
library(openssl)
1515

1616

1717
# CREATE MAKEFILE
1818
tab = "\t"
19-
root_directory = "/vast/scratch/users/mangiola.s/human_cell_atlas"
19+
root_directory = "/vast/projects/cellxgene_curated"
2020
splitted_light_data_directory = "/vast/projects/RCP/human_cell_atlas/splitted_light_data" #glue("{root_directory}/splitted_light_data")
2121
DB_data_directory = glue("{root_directory}/splitted_DB_data")
2222
gene_names = glue("{root_directory}/gene_names.rds")

dev/annotate_files.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ library(celldex)
1616
library(SingleR)
1717
library(glmGamPoi)
1818
source("utility.R")
19-
library(HCAquery)
19+
library(CuratedAtlasQueryR)
2020
library(BiocParallel)
2121
library(scuttle)
2222

dev/annotation_harmonise.R

Lines changed: 373 additions & 30 deletions
Large diffs are not rendered by default.

dev/build_makefile_annotate_files.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ metadata_df |>
127127
)) |>
128128
pull(commands) |>
129129
unlist() |>
130-
write_lines(glue("~/PostDoc/HCAquery/dev/annotate_files.makeflow"))
130+
write_lines(glue("~/PostDoc/CuratedAtlasQueryR/dev/annotate_files.makeflow"))
131131

132132

133133

dev/get_gene_names.R

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,29 @@ library(SingleCellExperiment)
33
library(tidyverse)
44
library(purrr)
55
library(glue)
6-
library(HCAquery)
6+
library(CuratedAtlasQueryR)
7+
library(HDF5Array)
8+
9+
library(dbplyr)
10+
library(DBI)
11+
library(duckdb)
12+
713

814
# Read arguments
915
args = commandArgs(trailingOnly=TRUE)
10-
root_directory = "/vast/projects/RCP/human_cell_atlas" # args[[1]]
11-
metadata_sql = glue("{root_directory}/metadata_annotated.sqlite")
16+
metadata_DB = "/vast/projects/cellxgene_curated/metadata_annotated_0.2.3.parquet"
17+
root_directory = "/vast/projects/cellxgene_curated" # args[[1]]
1218
raw_data_directory = glue("{root_directory}/splitted_data_0.2")
1319

14-
15-
1620
samples =
17-
# get_metadata(metadata_sql) |>
18-
readRDS("/vast/projects/RCP/human_cell_atlas/metadata_annotated.rds") |>
19-
distinct(file_id, .sample) |>
21+
duckdb() |>
22+
dbConnect(drv = _, read_only = TRUE) |>
23+
tbl(metadata_DB) |>
24+
distinct(file_id, sample_) |>
25+
as_tibble() |>
2026
group_by(file_id) |>
2127
slice(1) |>
22-
pull(.sample)
28+
pull(sample_)
2329

2430
# Read gene names
2531
dir(raw_data_directory, full.names = TRUE) |>

dev/get_metadata.R

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ library(openssl)
1313

1414
# # CREATE MAKEFILE
1515
# tab = "\t"
16-
# root_directory = "/vast/projects/RCP/human_cell_atlas"
17-
# # my_root_directory = "/vast/scratch/users/mangiola.s/human_cell_atlas"
16+
# root_directory = "/vast/projects/cellxgene_curated"
1817
# metadata_directory = glue("{root_directory}/metadata_0.2")
1918
# raw_data_directory = glue("{root_directory}/raw_data")
2019
# files_metadata = glue("{root_directory}/files_metadata.rds")
@@ -54,7 +53,7 @@ library(openssl)
5453
# glue("CATEGORY=merge_metadata\nMEMORY=80024\nCORES=1\nWALL_TIME=10000"),
5554
# glue("{metadata_path}:{paste(output_files_path, collapse = \" \")} {files_metadata}\n{tab}Rscript merge_metadata.R {paste(output_files_path, collapse = \" \")} {files_metadata} {metadata_path}")
5655
# ) |>
57-
# write_lines(glue("~/PostDoc/HCAquery/dev/get_metadata.makeflow"))
56+
# write_lines(glue("~/PostDoc/CuratedAtlasQueryR/dev/get_metadata.makeflow"))
5857

5958
source("utility.R")
6059

dev/light_files.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ library(HDF5Array)
108108
# )) |>
109109
# pull(commands) |>
110110
# unlist() |>
111-
# write_lines(glue("~/PostDoc/HCAquery/dev/light_files.makeflow"))
111+
# write_lines(glue("~/PostDoc/CuratedAtlasQueryR/dev/light_files.makeflow"))
112112

113113

114114

dev/merge_metadata.R

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,28 @@ common_colnames =
4242

4343
print(common_colnames)
4444

45+
# # # Get the rest of uncommon metadata
46+
# uncommon_metadata =
47+
# input_file_paths |>
48+
# enframe(value = "file") |>
49+
# mutate(metadata_not_harmonised = imap(
50+
# file,
51+
# ~ .x %>%
52+
# readRDS() |>
53+
# select(-one_of(common_colnames), cell_ = .cell, file_id) |> select(cell_, file_id, everything()) |>
54+
# mutate(file_id = file_id |> as.character())
55+
# )) |>
56+
# mutate(file_id = map_chr(metadata_not_harmonised, ~ .x |> distinct(file_id) |> pull(file_id))) |>
57+
# select(-name, -file) |>
58+
# select(file_id, metadata_not_harmonised) |>
59+
# mutate(saved = map2(
60+
# metadata_not_harmonised, file_id,
61+
# ~ .x %>%
62+
# {print(.y); (.)} |>
63+
# saveRDS(glue("/vast/projects/cellxgene_curated/metadata_non_harmonised_0.2/{.y}.rds"), compress = "xz")
64+
# ))
65+
66+
4567
# Get all metadata
4668

4769
metadata =

dev/metadata_cell_type.csv

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -302,10 +302,10 @@ contractile cell,
302302
cord blood hematopoietic stem cell,
303303
cortical cell of adrenal gland,
304304
cultured cell,
305-
DN3 thymocyte,
306-
DN4 thymocyte,
307-
double negative thymocyte,
308-
"double-positive, alpha-beta thymocyte",
305+
DN3 thymocyte,immune
306+
DN4 thymocyte,immune
307+
double negative thymocyte,immune
308+
"double-positive, alpha-beta thymocyte",immune
309309
duodenum glandular cell,
310310
early promyelocyte,
311311
embryonic stem cell,
@@ -316,16 +316,16 @@ enterocyte,
316316
enterocyte of epithelium of large intestine,
317317
enterocyte of epithelium of small intestine,
318318
enteroendocrine cell,
319-
enucleate erythrocyte,
319+
enucleate erythrocyte,immune
320320
enucleated reticulocyte,
321321
epicardial adipocyte,
322322
epidermal cell,
323-
epidermal Langerhans cell,
324-
erythroblast,
325-
erythrocyte,
326-
erythroid lineage cell,
327-
erythroid progenitor cell,
328-
"erythroid progenitor cell, mammalian",
323+
epidermal Langerhans cell,immune
324+
erythroblast,immune
325+
erythrocyte,immune
326+
erythroid lineage cell,immune
327+
erythroid progenitor cell,immune
328+
"erythroid progenitor cell, mammalian",immune
329329
eukaryotic cell,
330330
extravillous trophoblast,
331331
eye photoreceptor cell,
@@ -363,7 +363,7 @@ kidney granular cell,
363363
kidney interstitial cell,
364364
kidney interstitial fibroblast,
365365
Kupffer cell,
366-
Langerhans cell,
366+
Langerhans cell,immune
367367
large intestine goblet cell,
368368
late promyelocyte,
369369
lens fiber cell,
@@ -472,7 +472,7 @@ syncytiotrophoblast cell,
472472
taste receptor cell,
473473
tendon cell,
474474
theca cell,
475-
thymocyte,
475+
thymocyte,immune
476476
thyroid follicular cell,
477477
tongue muscle cell,
478478
tracheal goblet cell,
@@ -492,4 +492,8 @@ vasa recta ascending limb cell,
492492
vasa recta descending limb cell,
493493
vascular leptomeningeal cell,
494494
vascular lymphangioblast,
495-
ventricular cardiac muscle cell,
495+
ventricular cardiac muscle cell,
496+
enterocyte of epithelium proper of ileum,
497+
smooth muscle fiber of ileum ,
498+
ileal goblet cell ,
499+
enteroendocrine cell of small intestine ,

0 commit comments

Comments
 (0)