@@ -126,3 +126,61 @@ update_unharmonised <- function(unharmonised_parquet_dir, ...){
126126 ...
127127 )
128128}
129+
130+ # ' Converts a series of HDF5Array-serialized SingleCellExperiments to AnnData
131+ # ' @param src A character scalar. The path to a directory containing one or more
132+ # ' directories created by [HDF5Array::saveHDF5SummarizedExperiment()].
133+ # ' @param dest A character scalar. The path to a directory in which to save the
134+ # ' created anndata files.
135+ # ' @keywords internal
136+ # ' @return A character vector of the newly-created anndata files
137+ # ' @examples
138+ # ' \donttest{
139+ # ' dir_to_anndata(
140+ # ' "/vast/projects/cellxgene_curated/splitted_DB2_data_0.2.1",
141+ # ' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_0.2.1"
142+ # ' )
143+ # ' dir_to_anndata(
144+ # ' "/vast/projects/cellxgene_curated/splitted_DB2_data_scaled_0.2.1",
145+ # ' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0.2.1"
146+ # ' )
147+ # ' }
148+ dir_to_anndata = function (src , dest ){
149+ dir.create(dest , showWarnings = FALSE )
150+ # This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |> purrr::walk(function(dir){
151+ basilisk :: basiliskRun(fun = function (sce ) {
152+ list.dirs(src )[- 1 ] | >
153+ purrr :: map_chr(function (sce_dir ){
154+ cli :: cli_alert_info(" Processing {sce_dir}." )
155+ prefix <- basename(sce_dir )
156+ out_path <- glue :: glue(" {prefix}.h5ad" ) | >
157+ file.path(dest , name = _)
158+
159+ if (file.exists(out_path )) {
160+ cli :: cli_alert_info(" {out_path} already exists. Skipping" )
161+ }
162+ else {
163+ sce <- HDF5Array :: loadHDF5SummarizedExperiment(sce_dir )
164+ single_column <- length(colnames(sce )) == 1
165+ if (single_column ){
166+ # Hack, so that single-column SCEs will convert
167+ # correctly
168+ cli :: cli_alert_info(
169+ " {sce_dir} has only 1 column. Duplicating column."
170+ )
171+ sce <- cbind(sce , sce )
172+ single_column <- TRUE
173+ }
174+ ad <- zellkonverter :: SCE2AnnData(sce )
175+ if (single_column ){
176+ # Remove the duplicate column
177+ sce $ X <- sce $ X [1 ]
178+ }
179+ # TODO: customize chunking here, when anndata supports it
180+ # (see https://github.com/scverse/anndata/issues/961)
181+ ad $ write_h5ad(out_path )
182+ }
183+ out_path
184+ }, .progress = " Converting files" )
185+ }, env = zellkonverter :: zellkonverterAnnDataEnv())
186+ }
0 commit comments