@@ -16,20 +16,21 @@ library(CuratedAtlasQueryR)
1616library(fs )
1717library(HPCell )
1818library(crew.cluster )
19- directory = " /home/ users/allstaff/ shen.m/scratch /Census_rerun/split_h5ad_based_on_sample_id/"
19+ directory = " /vast/scratch/ users/shen.m/Census_rerun/split_h5ad_based_on_sample_id/"
2020sample_anndata <- dir(glue(" {directory}" ), full.names = T )
21- downloaded_samples_tbl <- read_parquet(" /home/ users/allstaff/ shen.m/scratch /Census_rerun/census_samples_to_download_groups.parquet" )
21+ downloaded_samples_tbl <- read_parquet(" /vast/scratch/ users/shen.m/Census_rerun/census_samples_to_download_groups.parquet" )
2222downloaded_samples_tbl <- downloaded_samples_tbl | >
23- rename(cell_number = list_length ) | >
23+ rename(cell_number = list_length ) | >
2424 mutate(cell_number = cell_number | > as.integer(),
25- file_name = glue(" {directory}{sample_2}.h5ad" ) | > as.character(),
25+ file_name = glue(" {directory}{sample_2}.h5ad" ) | > as.character(),
2626 tier = case_when(
2727 cell_number < 500 ~ " tier_1" , cell_number > = 500 &
2828 cell_number < 1000 ~ " tier_2" , cell_number > = 1000 &
2929 cell_number < 10000 ~ " tier_3" , cell_number > = 10000 ~ " tier_4"
3030 ))
3131
3232result_directory = " /vast/projects/cellxgene_curated/metadata_cellxgenedp_Apr_2024"
33+
3334sample_meta <- tar_read(metadata_dataset_id_common_sample_columns , store = glue(" {result_directory}/_targets" ))
3435sample_tbl = downloaded_samples_tbl | > left_join(get_metadata() | > select(dataset_id , contains(" norm" )) | >
3536 distinct() | > filter(! is.na(x_normalization )) | >
@@ -108,8 +109,7 @@ sample_tbl <- sample_tbl |> mutate(transformation_function = map(
108109 eval()
109110 ))
110111
111- # sample_tbl |> saveRDS("~/scratch/Census_rerun/sample_tbl_input_for_hpcell.rds")
112- sample_tbl <- readRDS(" ~/scratch/Census_rerun/sample_tbl_input_for_hpcell.rds" )
112+ sample_tbl <- readRDS(" /vast/scratch/users/shen.m/Census_rerun/sample_tbl_input_for_hpcell.rds" )
113113
114114# Set the parent directory where the subdirectories will be created
115115# parent_dir <- "~/scratch/Census_rerun/"
@@ -128,13 +128,14 @@ sample_tbl <- readRDS("~/scratch/Census_rerun/sample_tbl_input_for_hpcell.rds")
128128# }
129129
130130# Run 1000 samples per run. Save log and result in the corresponding store
131- store = " ~/scratch/Census_rerun/run3/ "
131+ store = " /vast/projects/mangiola_immune_map/PostDoc/CuratedAtlasQueryR/dev/debug_hpcell/target_store "
132132setwd(glue(" {store}" ))
133133sliced_sample_tbl = sample_tbl | > slice(2001 : 3000 ) | > select(file_name , tier , cell_number , dataset_id ,
134134 sample_2 , transformation_function )
135135
136136# Enable sample_names.rds to store sample names for the input
137137sample_names <- sliced_sample_tbl | > pull(file_name ) | > set_names(sliced_sample_tbl | > pull(sample_2 ))
138+ sample_names = sample_names | > str_replace(" /home/users/allstaff/shen.m/scratch" , " /vast/scratch/users/shen.m" )
138139
139140sample_names | >
140141 initialise_hpc(
@@ -178,29 +179,17 @@ sample_names |>
178179 )
179180 )
180181
181- ) | >
182+ ) | >
182183 tranform_assay(fx = sliced_sample_tbl | >
183184 pull(transformation_function ),
184- target_output = " sce_transformed" ) | >
185+ target_output = " sce_transformed" )
186+
187+ | >
185188
186189 # Remove empty outliers based on RNA count threshold per cell
187- remove_empty_threshold(target_input = " sce_transformed" , RNA_feature_threshold = 200 ) | >
188-
189- # Remove dead cells
190- remove_dead_scuttle(target_input = " sce_transformed" ) | >
191-
192- # Score cell cycle
193- score_cell_cycle_seurat(target_input = " sce_transformed" ) | >
194-
195- # Remove doublets
196- remove_doublets_scDblFinder(target_input = " sce_transformed" ) | >
190+ remove_empty_DropletUtils(target_input = " sce_transformed" , RNA_feature_threshold = 200 ) | >
197191
198192 # Annotation
199- annotate_cell_type(target_input = " sce_transformed" , azimuth_reference = " pbmcref" ) | >
200-
201- normalise_abundance_seurat_SCT(
202- factors_to_regress = c(" subsets_Mito_percent" , " subsets_Ribo_percent" , " G2M.Score" ),
203- target_input = " sce_transformed"
204- )
193+ annotate_cell_type(target_input = " sce_transformed" , azimuth_reference = " pbmcref" )
205194
206195
0 commit comments