@@ -16,12 +16,13 @@ library(CuratedAtlasQueryR)
1616library(fs )
1717library(HPCell )
1818library(crew.cluster )
19- directory = " ~ /scratch/Census_rerun/split_h5ad_based_on_sample_id/"
19+ directory = " /home/users/allstaff/shen.m /scratch/Census_rerun/split_h5ad_based_on_sample_id/"
2020sample_anndata <- dir(glue(" {directory}" ), full.names = T )
21- downloaded_samples_tbl <- read_parquet(" ~ /scratch/Census_rerun/census_samples_to_download_groups.parquet" )
21+ downloaded_samples_tbl <- read_parquet(" /home/users/allstaff/shen.m /scratch/Census_rerun/census_samples_to_download_groups.parquet" )
2222downloaded_samples_tbl <- downloaded_samples_tbl | >
2323 rename(cell_number = list_length ) | >
24- mutate(file_name = glue(" {directory}{sample_2}.h5ad" ) | > as.character(),
24+ mutate(cell_number = cell_number | > as.integer(),
25+ file_name = glue(" {directory}{sample_2}.h5ad" ) | > as.character(),
2526 tier = case_when(
2627 cell_number < 500 ~ " tier_1" , cell_number > = 500 &
2728 cell_number < 1000 ~ " tier_2" , cell_number > = 1000 &
@@ -36,7 +37,7 @@ sample_tbl = downloaded_samples_tbl |> left_join(get_metadata() |> select(datase
3637
3738
3839sample_tbl <- sample_tbl | > left_join(sample_meta , by = " dataset_id" ) | > distinct(file_name , tier , cell_number , dataset_id , sample_2 ,
39- observation_joinid , x_normalization , x_approximate_distribution ) | >
40+ x_normalization , x_approximate_distribution ) | >
4041 mutate(transform_method = case_when(str_like(x_normalization , " C%" ) ~ " log" ,
4142 x_normalization == " none" ~ " log" ,
4243 x_normalization == " normalized" ~ " log" ,
0 commit comments