@@ -128,33 +128,33 @@ update_unharmonised <- function(unharmonised_parquet_dir, ...){
128128}
129129
130130# ' Converts a series of HDF5Array-serialized SingleCellExperiments to AnnData
131- # ' @param src A character scalar. The path to a directory containing one or more
131+ # ' @param input_directory A character scalar. The path to a directory containing one or more
132132# ' directories created by [HDF5Array::saveHDF5SummarizedExperiment()].
133- # ' @param dest A character scalar. The path to a directory in which to save the
133+ # ' @param output_directory A character scalar. The path to a directory in which to save the
134134# ' created anndata files.
135135# ' @keywords internal
136136# ' @return A character vector of the newly-created anndata files
137137# ' @examples
138138# ' \donttest{
139- # ' dir_to_anndata (
139+ # ' hdf5_to_anndata (
140140# ' "/vast/projects/cellxgene_curated/splitted_DB2_data_0.2.1",
141141# ' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_0.2.1"
142142# ' )
143- # ' dir_to_anndata (
143+ # ' hdf5_to_anndata (
144144# ' "/vast/projects/cellxgene_curated/splitted_DB2_data_scaled_0.2.1",
145145# ' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0.2.1"
146146# ' )
147147# ' }
148- dir_to_anndata = function (src , dest ){
149- dir.create(dest , showWarnings = FALSE )
148+ hdf5_to_anndata = function (input_directory , output_directory ){
149+ dir.create(output_directory , showWarnings = FALSE )
150150 # This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |> purrr::walk(function(dir){
151151 basilisk :: basiliskRun(fun = function (sce ) {
152- list.dirs(src )[- 1 ] | >
152+ list.dirs(input_directory )[- 1 ] | >
153153 purrr :: map_chr(function (sce_dir ){
154154 cli :: cli_alert_info(" Processing {sce_dir}." )
155155 prefix <- basename(sce_dir )
156156 out_path <- glue :: glue(" {prefix}.h5ad" ) | >
157- file.path(dest , name = _)
157+ file.path(output_directory , name = _)
158158
159159 if (file.exists(out_path )) {
160160 cli :: cli_alert_info(" {out_path} already exists. Skipping" )
@@ -184,3 +184,72 @@ dir_to_anndata = function(src, dest){
184184 }, .progress = " Converting files" )
185185 }, env = zellkonverter :: zellkonverterAnnDataEnv())
186186}
187+
188+
189+ # ' Converts a series of H5-serialized Seurat to AnnData
190+ # ' @param input_directory A character scalar. The path to a directory containing one or more
191+ # ' directories created by [SeuratDisk::SaveH5Seurat()].
192+ # ' @param output_directory A character scalar. The path to a directory in which to save the
193+ # ' created anndata files.
194+ # ' @keywords internal
195+ # ' @return A character vector of the newly-created anndata files
196+ # ' @examples
197+ # ' \donttest{
198+ # ' h5seurat_to_anndata(
199+ # ' "/vast/projects/cellxgene_curated/splitted_DB2_data_0.2.1",
200+ # ' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_0.2.1"
201+ # ' )
202+ # ' h5seurat_to_anndata(
203+ # ' "/vast/projects/cellxgene_curated/splitted_DB2_data_scaled_0.2.1",
204+ # ' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0.2.1"
205+ # ' )
206+ # ' }
207+ h5seurat_to_anndata = function (input_directory , output_directory , assays = " RNA" ){
208+
209+ # Check if package is loaded
210+ if (! " SeuratDisk" %in% (.packages()))
211+ stop(" CuratedCellAtlas says: please manually load the SeuratDisk package first. Execute `library(SeuratDisk)`" )
212+
213+
214+ dir.create(output_directory , showWarnings = FALSE )
215+ # This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |> purrr::walk(function(dir){
216+ basilisk :: basiliskRun(fun = function (sce ) {
217+ dir(input_directory , full.names = TRUE ) | >
218+ purrr :: map_chr(function (seurat_file ){
219+ cli :: cli_alert_info(" Processing {seurat_file}." )
220+ prefix <- basename(seurat_file )
221+ out_path <- glue :: glue(" {prefix}.h5ad" ) | >
222+ file.path(output_directory , name = _)
223+
224+ if (file.exists(out_path )) {
225+ cli :: cli_alert_info(" {out_path} already exists. Skipping" )
226+ }
227+ else {
228+ sce <-
229+ LoadH5Seurat(seurat_file , assays = assays ) | >
230+ Seurat :: as.SingleCellExperiment()
231+
232+ single_column <- length(colnames(sce )) == 1
233+ if (single_column ){
234+ # Hack, so that single-column SCEs will convert
235+ # correctly
236+ cli :: cli_alert_info(
237+ " {seurat_file} has only 1 column. Duplicating column."
238+ )
239+ sce <- cbind(sce , sce )
240+ single_column <- TRUE
241+ }
242+ ad <- zellkonverter :: SCE2AnnData(sce )
243+ if (single_column ){
244+ # Remove the duplicate column
245+ sce $ X <- sce $ X [1 ]
246+ }
247+ # TODO: customize chunking here, when anndata supports it
248+ # (see https://github.com/scverse/anndata/issues/961)
249+ ad $ write_h5ad(out_path )
250+ }
251+ out_path
252+ }, .progress = " Converting files" )
253+ }, env = zellkonverter :: zellkonverterAnnDataEnv())
254+ }
255+
0 commit comments