Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit f9c5349

Browse files
authored
Merge pull request #105 from stemangiola/anndata-convert
Add dir_to_anndata function
2 parents 8a94a69 + 38898b5 commit f9c5349

File tree

2 files changed

+58
-10
lines changed

2 files changed

+58
-10
lines changed

DESCRIPTION

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -93,20 +93,10 @@ Imports:
9393
duckdb,
9494
stringr
9595
Suggests:
96-
here,
97-
tidyseurat,
9896
zellkonverter,
99-
scMerge,
100-
DelayedArray,
101-
openssl,
102-
cellxgenedp,
103-
celldex,
104-
SingleR,
10597
rmarkdown,
10698
knitr,
10799
testthat,
108-
tidySingleCellExperiment,
109-
ggplot2,
110100
basilisk,
111101
arrow,
112102
reticulate

R/dev.R

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,61 @@ update_unharmonised <- function(unharmonised_parquet_dir, ...){
126126
...
127127
)
128128
}
129+
130+
#' Converts a series of HDF5Array-serialized SingleCellExperiments to AnnData
131+
#' @param src A character scalar. The path to a directory containing one or more
132+
#' directories created by [HDF5Array::saveHDF5SummarizedExperiment()].
133+
#' @param dest A character scalar. The path to a directory in which to save the
134+
#' created anndata files.
135+
#' @keywords internal
136+
#' @return A character vector of the newly-created anndata files
137+
#' @examples
138+
#' \donttest{
139+
#' dir_to_anndata(
140+
#' "/vast/projects/cellxgene_curated/splitted_DB2_data_0.2.1",
141+
#' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_0.2.1"
142+
#' )
143+
#' dir_to_anndata(
144+
#' "/vast/projects/cellxgene_curated/splitted_DB2_data_scaled_0.2.1",
145+
#' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0.2.1"
146+
#' )
147+
#' }
148+
dir_to_anndata = function(src, dest){
149+
dir.create(dest, showWarnings = FALSE)
150+
# This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |> purrr::walk(function(dir){
151+
basilisk::basiliskRun(fun = function(sce) {
152+
list.dirs(src)[-1] |>
153+
purrr::map_chr(function(sce_dir){
154+
cli::cli_alert_info("Processing {sce_dir}.")
155+
prefix <- basename(sce_dir)
156+
out_path <- glue::glue("{prefix}.h5ad") |>
157+
file.path(dest, name=_)
158+
159+
if (file.exists(out_path)) {
160+
cli::cli_alert_info("{out_path} already exists. Skipping")
161+
}
162+
else {
163+
sce <- HDF5Array::loadHDF5SummarizedExperiment(sce_dir)
164+
single_column <- length(colnames(sce)) == 1
165+
if (single_column){
166+
# Hack, so that single-column SCEs will convert
167+
# correctly
168+
cli::cli_alert_info(
169+
"{sce_dir} has only 1 column. Duplicating column."
170+
)
171+
sce <- cbind(sce, sce)
172+
single_column <- TRUE
173+
}
174+
ad <- zellkonverter::SCE2AnnData(sce)
175+
if (single_column){
176+
# Remove the duplicate column
177+
sce$X <- sce$X[1]
178+
}
179+
# TODO: customize chunking here, when anndata supports it
180+
# (see https://github.com/scverse/anndata/issues/961)
181+
ad$write_h5ad(out_path)
182+
}
183+
out_path
184+
}, .progress = "Converting files")
185+
}, env = zellkonverter::zellkonverterAnnDataEnv())
186+
}

0 commit comments

Comments
 (0)