@@ -128,33 +128,33 @@ update_unharmonised <- function(unharmonised_parquet_dir, ...){
128128}
129129
130130# ' Converts a series of HDF5Array-serialized SingleCellExperiments to AnnData
131- # ' @param src A character scalar. The path to a directory containing one or more
131+ # ' @param input_directory A character scalar. The path to a directory containing one or more
132132# ' directories created by [HDF5Array::saveHDF5SummarizedExperiment()].
133- # ' @param dest A character scalar. The path to a directory in which to save the
133+ # ' @param output_directory A character scalar. The path to a directory in which to save the
134134# ' created anndata files.
135135# ' @keywords internal
136136# ' @return A character vector of the newly-created anndata files
137137# ' @examples
138138# ' \donttest{
139- # ' dir_to_anndata (
139+ # ' hdf5_to_anndata (
140140# ' "/vast/projects/cellxgene_curated/splitted_DB2_data_0.2.1",
141141# ' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_0.2.1"
142142# ' )
143- # ' dir_to_anndata (
143+ # ' hdf5_to_anndata (
144144# ' "/vast/projects/cellxgene_curated/splitted_DB2_data_scaled_0.2.1",
145145# ' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0.2.1"
146146# ' )
147147# ' }
148- dir_to_anndata <- function (src , dest ){
149- dir.create(dest , showWarnings = FALSE )
148+ hdf5_to_anndata = function (input_directory , output_directory ){
149+ dir.create(output_directory , showWarnings = FALSE )
150150 # This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |> purrr::walk(function(dir){
151151 basilisk :: basiliskRun(fun = function (sce ) {
152- list.dirs(src )[- 1 ] | >
152+ list.dirs(input_directory )[- 1 ] | >
153153 purrr :: map_chr(function (sce_dir ){
154154 cli :: cli_alert_info(" Processing {sce_dir}." )
155155 prefix <- basename(sce_dir )
156156 out_path <- glue :: glue(" {prefix}.h5ad" ) | >
157- file.path(dest , name = _)
157+ file.path(output_directory , name = _)
158158
159159 if (file.exists(out_path )) {
160160 cli :: cli_alert_info(" {out_path} already exists. Skipping" )
@@ -185,6 +185,73 @@ dir_to_anndata <- function(src, dest){
185185 }, env = zellkonverter :: zellkonverterAnnDataEnv())
186186}
187187
188+ # ' Converts a series of H5-serialized Seurat to AnnData
189+ # ' @param input_directory A character scalar. The path to a directory containing one or more
190+ # ' directories created by [SeuratDisk::SaveH5Seurat()].
191+ # ' @param output_directory A character scalar. The path to a directory in which to save the
192+ # ' created anndata files.
193+ # ' @keywords internal
194+ # ' @return A character vector of the newly-created anndata files
195+ # ' @examples
196+ # ' \donttest{
197+ # ' h5seurat_to_anndata(
198+ # ' "/vast/projects/cellxgene_curated/splitted_DB2_data_0.2.1",
199+ # ' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_0.2.1"
200+ # ' )
201+ # ' h5seurat_to_anndata(
202+ # ' "/vast/projects/cellxgene_curated/splitted_DB2_data_scaled_0.2.1",
203+ # ' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0.2.1"
204+ # ' )
205+ # ' }
206+ h5seurat_to_anndata = function (input_directory , output_directory , assays = " RNA" ){
207+
208+ # Check if package is loaded
209+ if (! " SeuratDisk" %in% (.packages()))
210+ stop(" CuratedCellAtlas says: please manually load the SeuratDisk package first. Execute `library(SeuratDisk)`" )
211+
212+
213+ dir.create(output_directory , showWarnings = FALSE )
214+ # This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |> purrr::walk(function(dir){
215+ basilisk :: basiliskRun(fun = function (sce ) {
216+ dir(input_directory , full.names = TRUE ) | >
217+ purrr :: map_chr(function (seurat_file ){
218+ cli :: cli_alert_info(" Processing {seurat_file}." )
219+ prefix <- basename(seurat_file )
220+ out_path <- glue :: glue(" {prefix}.h5ad" ) | >
221+ file.path(output_directory , name = _)
222+
223+ if (file.exists(out_path )) {
224+ cli :: cli_alert_info(" {out_path} already exists. Skipping" )
225+ }
226+ else {
227+ sce <-
228+ LoadH5Seurat(seurat_file , assays = assays ) | >
229+ Seurat :: as.SingleCellExperiment()
230+
231+ single_column <- length(colnames(sce )) == 1
232+ if (single_column ){
233+ # Hack, so that single-column SCEs will convert
234+ # correctly
235+ cli :: cli_alert_info(
236+ " {seurat_file} has only 1 column. Duplicating column."
237+ )
238+ sce <- cbind(sce , sce )
239+ single_column <- TRUE
240+ }
241+ ad <- zellkonverter :: SCE2AnnData(sce )
242+ if (single_column ){
243+ # Remove the duplicate column
244+ sce $ X <- sce $ X [1 ]
245+ }
246+ # TODO: customize chunking here, when anndata supports it
247+ # (see https://github.com/scverse/anndata/issues/961)
248+ ad $ write_h5ad(out_path )
249+ }
250+ out_path
251+ }, .progress = " Converting files" )
252+ }, env = zellkonverter :: zellkonverterAnnDataEnv())
253+ }
254+
188255# ' Makes a "downsampled" metadata file that only contains the minimal data
189256# ' needed to run the vignette.
190257# ' @param output Character scalar. Path to the output file.
@@ -234,3 +301,4 @@ downsample_metadata <- function(output = "sample_meta.parquet"){
234301
235302 NULL
236303}
304+
0 commit comments