@@ -145,7 +145,7 @@ update_unharmonised <- function(unharmonised_parquet_dir, ...){
145145# ' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0.2.1"
146146# ' )
147147# ' }
148- hdf5_to_anndata = function (input_directory , output_directory ){
148+ hdf5_to_anndata <- function (input_directory , output_directory ){
149149 dir.create(output_directory , showWarnings = FALSE )
150150 # This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |> purrr::walk(function(dir){
151151 basilisk :: basiliskRun(fun = function (sce ) {
@@ -185,72 +185,72 @@ hdf5_to_anndata = function(input_directory, output_directory){
185185 }, env = zellkonverter :: zellkonverterAnnDataEnv())
186186}
187187
188- # ' Converts a series of H5-serialized Seurat to AnnData
189- # ' @param input_directory A character scalar. The path to a directory containing one or more
190- # ' directories created by [SeuratDisk::SaveH5Seurat()].
191- # ' @param output_directory A character scalar. The path to a directory in which to save the
192- # ' created anndata files.
193- # ' @keywords internal
194- # ' @return A character vector of the newly-created anndata files
195- # ' @examples
196- # ' \donttest{
197- # ' h5seurat_to_anndata(
198- # ' "/vast/projects/cellxgene_curated/splitted_DB2_data_0.2.1",
199- # ' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_0 .2.1"
200- # ' )
201- # ' h5seurat_to_anndata(
202- # ' "/vast/projects/cellxgene_curated/splitted_DB2_data_scaled_0.2.1",
203- # ' "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0 .2.1"
204- # ' )
205- # ' }
206- h5seurat_to_anndata = function ( input_directory , output_directory , assays = " RNA " ){
207-
208- # Check if package is loaded
209- if ( ! " SeuratDisk " %in% (.packages()))
210- stop( " CuratedCellAtlas says: please manually load the SeuratDisk package first. Execute `library(SeuratDisk)` " )
211-
212-
213- dir.create( output_directory , showWarnings = FALSE )
214- # This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |> purrr::walk(function(dir){
215- basilisk :: basiliskRun( fun = function (sce ) {
216- dir( input_directory , full.names = TRUE ) | >
217- purrr :: map_chr( function ( seurat_file ){
218- cli :: cli_alert_info( " Processing { seurat_file}. " )
219- prefix <- basename( seurat_file )
220- out_path <- glue :: glue( " {prefix}.h5ad " ) | >
221- file.path( output_directory , name = _)
222-
223- if (file.exists( out_path )) {
224- cli :: cli_alert_info( " {out_path} already exists. Skipping " )
225- }
226- else {
227- sce <-
228- LoadH5Seurat(seurat_file , assays = assays ) | >
229- Seurat :: as.SingleCellExperiment()
230-
231- single_column <- length(colnames(sce )) == 1
232- if (single_column ){
233- # Hack, so that single-column SCEs will convert
234- # correctly
235- cli :: cli_alert_info(
236- " {seurat_file} has only 1 column. Duplicating column."
237- )
238- sce <- cbind(sce , sce )
239- single_column <- TRUE
240- }
241- ad <- zellkonverter :: SCE2AnnData(sce )
242- if (single_column ){
243- # Remove the duplicate column
244- sce $ X <- sce $ X [1 ]
245- }
246- # TODO: customize chunking here, when anndata supports it
247- # (see https://github.com/scverse/anndata/issues/961)
248- ad $ write_h5ad(out_path )
249- }
250- out_path
251- }, .progress = " Converting files" )
252- }, env = zellkonverter :: zellkonverterAnnDataEnv())
253- }
188+ # Converts a series of H5-serialized Seurat to AnnData
189+ # @param input_directory A character scalar. The path to a directory containing one or more
190+ # directories created by [SeuratDisk::SaveH5Seurat()].
191+ # @param output_directory A character scalar. The path to a directory in which to save the
192+ # created anndata files.
193+ # @keywords internal
194+ # @return A character vector of the newly-created anndata files
195+ # @noRd
196+ # @examples
197+ # \donttest{
198+ # h5seurat_to_anndata(
199+ # "/vast/projects/cellxgene_curated/splitted_DB2_data_0 .2.1",
200+ # "/vast/projects/cellxgene_curated/splitted_DB2_anndata_0.2.1"
201+ # )
202+ # h5seurat_to_anndata(
203+ # "/vast/projects/cellxgene_curated/splitted_DB2_data_scaled_0 .2.1",
204+ # "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0.2.1"
205+ # )
206+ # }
207+ # h5seurat_to_anndata <- function(input_directory, output_directory, assays = "RNA"){
208+ #
209+ # # Check if package is loaded
210+ # if(!" SeuratDisk" %in% (.packages()) )
211+ # stop("CuratedCellAtlas says: please manually load the SeuratDisk package first. Execute `library(SeuratDisk)`")
212+ #
213+ #
214+ # dir.create(output_directory, showWarnings = FALSE)
215+ # # This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |> purrr::walk( function(dir) {
216+ # basilisk::basiliskRun(fun = function(sce) {
217+ # dir(input_directory, full.names = TRUE) |>
218+ # purrr::map_chr(function( seurat_file){
219+ # cli::cli_alert_info("Processing { seurat_file}." )
220+ # prefix <- basename(seurat_file)
221+ # out_path <- glue::glue("{prefix}.h5ad") |>
222+ # file.path(output_directory, name=_)
223+ #
224+ # if (file.exists(out_path)) {
225+ # cli::cli_alert_info("{out_path} already exists. Skipping")
226+ # }
227+ # else {
228+ # sce <- SeuratDisk:: LoadH5Seurat(seurat_file, assays = assays) |>
229+ # Seurat::as.SingleCellExperiment()
230+ #
231+ # single_column <- length(colnames(sce)) == 1
232+ # if (single_column){
233+ # # Hack, so that single-column SCEs will convert
234+ # # correctly
235+ # cli::cli_alert_info(
236+ # "{seurat_file} has only 1 column. Duplicating column."
237+ # )
238+ # sce <- cbind(sce, sce)
239+ # single_column <- TRUE
240+ # }
241+ # ad <- zellkonverter::SCE2AnnData(sce)
242+ # if (single_column){
243+ # # Remove the duplicate column
244+ # sce$X <- sce$X[1]
245+ # }
246+ # # TODO: customize chunking here, when anndata supports it
247+ # # (see https://github.com/scverse/anndata/issues/961)
248+ # ad$write_h5ad(out_path)
249+ # }
250+ # out_path
251+ # }, .progress = "Converting files")
252+ # }, env = zellkonverter::zellkonverterAnnDataEnv())
253+ # }
254254
255255# ' Makes a "downsampled" metadata file that only contains the minimal data
256256# ' needed to run the vignette.
0 commit comments