@@ -17,14 +17,16 @@ library(HCAquery)
1717options(scipen = 999 )
1818#
1919
20- # # CREATE MAKEFILE
21- # tab = "\t"
22- # root_directory = "/vast/projects/RCP/human_cell_atlas"
23- # raw_data_directory = glue("{root_directory}/raw_data")
24- # splitted_DB2_data_directory = glue("{root_directory}/splitted_DB2_data")
25- # file_cell_types_directory = glue("{root_directory}/file_cell_types")
26- # input_files_path = dir(file_cell_types_directory, full.names = TRUE)
27- # #
20+ # CREATE MAKEFILE
21+ tab = " \t "
22+ root_directory = " /vast/projects/RCP/human_cell_atlas"
23+ raw_data_directory = glue(" {root_directory}/raw_data" )
24+ splitted_DB2_data_directory = glue(" {root_directory}/splitted_DB2_data" )
25+ file_cell_types_directory = glue(" {root_directory}/file_cell_types" )
26+ input_files_path = dir(file_cell_types_directory , full.names = TRUE )
27+ gene_names = glue(" {root_directory}/gene_names.rds" )
28+ #
29+ #
2830# ## metadata = readRDS(metadata_path)
2931#
3032# get_metadata() |>
@@ -67,7 +69,7 @@ options(scipen = 999)
6769# c(
6870# glue("CATEGORY=split_data{..4}\nMEMORY={..3}\nCORES=1\nWALL_TIME=30000"),
6971# glue(
70- # "{..1}:{..2}\n{tab}Rscript DB2_files.R {..2} {..1}"
72+ # "{..1}:{..2}\n{tab}Rscript DB2_files.R {..2} {gene_names} { ..1}"
7173# )
7274# )
7375# )) |>
@@ -80,7 +82,8 @@ options(scipen = 999)
8082# Read arguments
8183args = commandArgs(trailingOnly = TRUE )
8284input_file = args [[1 ]]
83- output_file = args [[2 ]]
85+ all_gene_names = args [[2 ]]
86+ output_file = args [[3 ]]
8487
8588output_file | > dirname() | > dir.create( showWarnings = FALSE , recursive = TRUE )
8689file_id = basename(input_file ) | > tools :: file_path_sans_ext() | > str_split(" ___" ) %> % . [[1 ]] %> % . [1 ]
@@ -157,6 +160,21 @@ colnames(sce) = colnames(X)
157160rm(X )
158161gc()
159162
163+ # Add missing genes
164+ missing_genes = readRDS(all_gene_names ) | > setdiff(rownames(sce ))
165+ missing_matrix =
166+ HDF5RealizationSink(c(length(missing_genes ),ncol(sce )), as.sparse = TRUE ) | >
167+ as(" DelayedArray" )
168+
169+ rownames(missing_matrix ) = missing_genes
170+ colnames(missing_matrix ) = colnames(sce )
171+
172+ missing_sce = SingleCellExperiment(list (X = missing_matrix ), colData = colData(sce ))
173+ missing_sce @ int_colData = sce @ int_colData
174+
175+ # Make cell name unique
176+ sce = sce | > rbind(missing_sce )
177+
160178sce | > saveHDF5SummarizedExperiment(output_file , replace = TRUE )
161179
162180
0 commit comments