Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit ba26a3f

Browse files
committed
add H5Seurat to Anndata
1 parent 52baec1 commit ba26a3f

File tree

1 file changed

+77
-8
lines changed

1 file changed

+77
-8
lines changed

R/dev.R

Lines changed: 77 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -128,33 +128,33 @@ update_unharmonised <- function(unharmonised_parquet_dir, ...){
128128
}
129129

130130
#' Converts a series of HDF5Array-serialized SingleCellExperiments to AnnData
131-
#' @param src A character scalar. The path to a directory containing one or more
131+
#' @param input_directory A character scalar. The path to a directory containing one or more
132132
#' directories created by [HDF5Array::saveHDF5SummarizedExperiment()].
133-
#' @param dest A character scalar. The path to a directory in which to save the
133+
#' @param output_directory A character scalar. The path to a directory in which to save the
134134
#' created anndata files.
135135
#' @keywords internal
136136
#' @return A character vector of the newly-created anndata files
137137
#' @examples
138138
#' \donttest{
139-
#' dir_to_anndata(
139+
#' hdf5_to_anndata(
140140
#' "/vast/projects/cellxgene_curated/splitted_DB2_data_0.2.1",
141141
#' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_0.2.1"
142142
#' )
143-
#' dir_to_anndata(
143+
#' hdf5_to_anndata(
144144
#' "/vast/projects/cellxgene_curated/splitted_DB2_data_scaled_0.2.1",
145145
#' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0.2.1"
146146
#' )
147147
#' }
148-
dir_to_anndata = function(src, dest){
149-
dir.create(dest, showWarnings = FALSE)
148+
hdf5_to_anndata = function(input_directory, output_directory){
149+
dir.create(output_directory, showWarnings = FALSE)
150150
# This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |> purrr::walk(function(dir){
151151
basilisk::basiliskRun(fun = function(sce) {
152-
list.dirs(src)[-1] |>
152+
list.dirs(input_directory)[-1] |>
153153
purrr::map_chr(function(sce_dir){
154154
cli::cli_alert_info("Processing {sce_dir}.")
155155
prefix <- basename(sce_dir)
156156
out_path <- glue::glue("{prefix}.h5ad") |>
157-
file.path(dest, name=_)
157+
file.path(output_directory, name=_)
158158

159159
if (file.exists(out_path)) {
160160
cli::cli_alert_info("{out_path} already exists. Skipping")
@@ -184,3 +184,72 @@ dir_to_anndata = function(src, dest){
184184
}, .progress = "Converting files")
185185
}, env = zellkonverter::zellkonverterAnnDataEnv())
186186
}
187+
188+
189+
#' Converts a series of H5-serialized Seurat to AnnData
190+
#' @param input_directory A character scalar. The path to a directory containing one or more
191+
#' directories created by [SeuratDisk::SaveH5Seurat()].
192+
#' @param output_directory A character scalar. The path to a directory in which to save the
193+
#' created anndata files.
194+
#' @keywords internal
195+
#' @return A character vector of the newly-created anndata files
196+
#' @examples
197+
#' \donttest{
198+
#' h5seurat_to_anndata(
199+
#' "/vast/projects/cellxgene_curated/splitted_DB2_data_0.2.1",
200+
#' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_0.2.1"
201+
#' )
202+
#' h5seurat_to_anndata(
203+
#' "/vast/projects/cellxgene_curated/splitted_DB2_data_scaled_0.2.1",
204+
#' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0.2.1"
205+
#' )
206+
#' }
207+
h5seurat_to_anndata = function(input_directory, output_directory, assays = "RNA"){
208+
209+
# Check if package is loaded
210+
if(!"SeuratDisk" %in% (.packages()))
211+
stop("CuratedCellAtlas says: please manually load the SeuratDisk package first. Execute `library(SeuratDisk)`")
212+
213+
214+
dir.create(output_directory, showWarnings = FALSE)
215+
# This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |> purrr::walk(function(dir){
216+
basilisk::basiliskRun(fun = function(sce) {
217+
dir(input_directory, full.names = TRUE) |>
218+
purrr::map_chr(function(seurat_file){
219+
cli::cli_alert_info("Processing {seurat_file}.")
220+
prefix <- basename(seurat_file)
221+
out_path <- glue::glue("{prefix}.h5ad") |>
222+
file.path(output_directory, name=_)
223+
224+
if (file.exists(out_path)) {
225+
cli::cli_alert_info("{out_path} already exists. Skipping")
226+
}
227+
else {
228+
sce <-
229+
LoadH5Seurat(seurat_file, assays = assays) |>
230+
Seurat::as.SingleCellExperiment()
231+
232+
single_column <- length(colnames(sce)) == 1
233+
if (single_column){
234+
# Hack, so that single-column SCEs will convert
235+
# correctly
236+
cli::cli_alert_info(
237+
"{seurat_file} has only 1 column. Duplicating column."
238+
)
239+
sce <- cbind(sce, sce)
240+
single_column <- TRUE
241+
}
242+
ad <- zellkonverter::SCE2AnnData(sce)
243+
if (single_column){
244+
# Remove the duplicate column
245+
sce$X <- sce$X[1]
246+
}
247+
# TODO: customize chunking here, when anndata supports it
248+
# (see https://github.com/scverse/anndata/issues/961)
249+
ad$write_h5ad(out_path)
250+
}
251+
out_path
252+
}, .progress = "Converting files")
253+
}, env = zellkonverter::zellkonverterAnnDataEnv())
254+
}
255+

0 commit comments

Comments
 (0)