Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit 2c59f35

Browse files
committed
update test debug
1 parent 2fed3a7 commit 2c59f35

File tree

1 file changed

+14
-25
lines changed

1 file changed

+14
-25
lines changed

dev/execute_hpcell_on_census_and_defining_data_tranformation.R

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,21 @@ library(CuratedAtlasQueryR)
1616
library(fs)
1717
library(HPCell)
1818
library(crew.cluster)
19-
directory = "/home/users/allstaff/shen.m/scratch/Census_rerun/split_h5ad_based_on_sample_id/"
19+
directory = "/vast/scratch/users/shen.m/Census_rerun/split_h5ad_based_on_sample_id/"
2020
sample_anndata <- dir(glue("{directory}"), full.names = T)
21-
downloaded_samples_tbl <- read_parquet("/home/users/allstaff/shen.m/scratch/Census_rerun/census_samples_to_download_groups.parquet")
21+
downloaded_samples_tbl <- read_parquet("/vast/scratch/users/shen.m/Census_rerun/census_samples_to_download_groups.parquet")
2222
downloaded_samples_tbl <- downloaded_samples_tbl |>
23-
rename(cell_number = list_length) |>
23+
rename(cell_number = list_length) |>
2424
mutate(cell_number = cell_number |> as.integer(),
25-
file_name = glue("{directory}{sample_2}.h5ad") |> as.character(),
25+
file_name = glue("{directory}{sample_2}.h5ad") |> as.character(),
2626
tier = case_when(
2727
cell_number < 500 ~ "tier_1", cell_number >= 500 &
2828
cell_number < 1000 ~ "tier_2", cell_number >= 1000 &
2929
cell_number < 10000 ~ "tier_3", cell_number >= 10000 ~ "tier_4"
3030
))
3131

3232
result_directory = "/vast/projects/cellxgene_curated/metadata_cellxgenedp_Apr_2024"
33+
3334
sample_meta <- tar_read(metadata_dataset_id_common_sample_columns, store = glue("{result_directory}/_targets"))
3435
sample_tbl = downloaded_samples_tbl |> left_join(get_metadata() |> select(dataset_id, contains("norm")) |>
3536
distinct() |> filter(!is.na(x_normalization)) |>
@@ -108,8 +109,7 @@ sample_tbl <- sample_tbl |> mutate(transformation_function = map(
108109
eval()
109110
))
110111

111-
#sample_tbl |> saveRDS("~/scratch/Census_rerun/sample_tbl_input_for_hpcell.rds")
112-
sample_tbl <- readRDS("~/scratch/Census_rerun/sample_tbl_input_for_hpcell.rds")
112+
sample_tbl <- readRDS("/vast/scratch/users/shen.m/Census_rerun/sample_tbl_input_for_hpcell.rds")
113113

114114
# Set the parent directory where the subdirectories will be created
115115
# parent_dir <- "~/scratch/Census_rerun/"
@@ -128,13 +128,14 @@ sample_tbl <- readRDS("~/scratch/Census_rerun/sample_tbl_input_for_hpcell.rds")
128128
# }
129129

130130
# Run 1000 samples per run. Save log and result in the corresponding store
131-
store = "~/scratch/Census_rerun/run3/"
131+
store = "/vast/projects/mangiola_immune_map/PostDoc/CuratedAtlasQueryR/dev/debug_hpcell/target_store"
132132
setwd(glue("{store}"))
133133
sliced_sample_tbl = sample_tbl |> slice(2001:3000) |> select(file_name, tier, cell_number, dataset_id,
134134
sample_2, transformation_function)
135135

136136
# Enable sample_names.rds to store sample names for the input
137137
sample_names <- sliced_sample_tbl |> pull(file_name) |> set_names(sliced_sample_tbl |> pull(sample_2))
138+
sample_names = sample_names |> str_replace("/home/users/allstaff/shen.m/scratch", "/vast/scratch/users/shen.m")
138139

139140
sample_names |>
140141
initialise_hpc(
@@ -178,29 +179,17 @@ sample_names |>
178179
)
179180
)
180181

181-
) |>
182+
) |>
182183
tranform_assay(fx = sliced_sample_tbl |>
183184
pull(transformation_function),
184-
target_output = "sce_transformed") |>
185+
target_output = "sce_transformed")
186+
187+
|>
185188

186189
# Remove empty outliers based on RNA count threshold per cell
187-
remove_empty_threshold(target_input = "sce_transformed", RNA_feature_threshold = 200) |>
188-
189-
# Remove dead cells
190-
remove_dead_scuttle(target_input = "sce_transformed") |>
191-
192-
# Score cell cycle
193-
score_cell_cycle_seurat(target_input = "sce_transformed") |>
194-
195-
# Remove doublets
196-
remove_doublets_scDblFinder(target_input = "sce_transformed") |>
190+
remove_empty_DropletUtils(target_input = "sce_transformed", RNA_feature_threshold = 200) |>
197191

198192
# Annotation
199-
annotate_cell_type(target_input = "sce_transformed", azimuth_reference = "pbmcref") |>
200-
201-
normalise_abundance_seurat_SCT(
202-
factors_to_regress = c("subsets_Mito_percent", "subsets_Ribo_percent", "G2M.Score"),
203-
target_input = "sce_transformed"
204-
)
193+
annotate_cell_type(target_input = "sce_transformed", azimuth_reference = "pbmcref")
205194

206195

0 commit comments

Comments
 (0)