@@ -11,15 +11,19 @@ assay_map <- c(
1111)
1212
1313REMOTE_URL <- " https://swift.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/harmonised-human-atlas"
14+ COUNTS_VERSION <- " 0.2"
1415
15- # ' Given a data frame of HCA metadata, returns a SingleCellExperiment object
16- # ' corresponding to the samples in that data frame
16+ # ' Gets a SingleCellExperiment from curated metadata
17+ # '
18+ # ' Given a data frame of Curated Atlas metadata obtained from [get_metadata()],
19+ # ' returns a [`SingleCellExperiment::SingleCellExperiment-class`] object corresponding to the samples in that
20+ # ' data frame
1721# '
1822# ' @param data A data frame containing, at minimum, a `.sample` column, which
1923# ' corresponds to a single cell sample ID. This can be obtained from the
2024# ' [get_metadata()] function.
21- # ' @param assays A character vector whose elements must be either "counts" and/or
22- # ' "cpm", representing the corresponding assay(s) you want to request.
25+ # ' @param assays A character vector whose elements must be either "counts"
26+ # ' and/or "cpm", representing the corresponding assay(s) you want to request.
2327# ' @param repository A character vector of length one. If provided, it should be
2428# ' an HTTP URL pointing to the location where the single cell data is stored.
2529# ' @param cache_directory An optional character vector of length one. If
@@ -51,7 +55,7 @@ REMOTE_URL <- "https://swift.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3e
5155# '
5256# ' @export
5357# '
54- # '
58+ # '
5559get_SingleCellExperiment <- function (
5660 data ,
5761 assays = c(" counts" , " cpm" ),
@@ -79,9 +83,10 @@ get_SingleCellExperiment <- function(
7983 cli_alert_info(" Realising metadata." )
8084 raw_data <- collect(data )
8185 inherits(raw_data , " tbl" ) | > assert_that()
82- has_name(raw_data , c(" .cell " , " file_id_db" )) | > assert_that()
86+ has_name(raw_data , c(" _cell " , " file_id_db" )) | > assert_that()
8387
84- cache_directory | > dir.create(showWarnings = FALSE )
88+ versioned_cache_directory = file.path(cache_directory , COUNTS_VERSION )
89+ versioned_cache_directory | > dir.create(showWarnings = FALSE , recursive = TRUE )
8590
8691 subdirs <- assay_map [assays ]
8792
@@ -100,7 +105,7 @@ get_SingleCellExperiment <- function(
100105 as.character() | >
101106 sync_assay_files(
102107 url = parsed_repo ,
103- cache_dir = cache_directory ,
108+ cache_dir = versioned_cache_directory ,
104109 files = _,
105110 subdirs = subdirs
106111 )
@@ -111,7 +116,7 @@ get_SingleCellExperiment <- function(
111116 imap(function (current_subdir , current_assay ) {
112117 # Build up an SCE for each assay
113118 dir_prefix <- file.path(
114- cache_directory ,
119+ versioned_cache_directory ,
115120 current_subdir
116121 )
117122
@@ -172,14 +177,14 @@ group_to_sce <- function(i, df, dir_prefix, features) {
172177 sce <- loadHDF5SummarizedExperiment(sce_path )
173178 # The cells we select here are those that are both available in the SCE
174179 # object, and requested for this particular file
175- cells <- colnames(sce ) | > intersect(df $ .cell )
180+ cells <- colnames(sce ) | > intersect(df $ `_cell` )
176181 # We need to make the cell names globally unique, which we can guarantee
177182 # by adding a suffix that is derived from file_id_db, which is the grouping
178183 # variable
179184 new_cellnames <- paste0(cells , " _" , i )
180185 new_coldata <- df | >
181- mutate(original_cell_id = .data $ .cell , .cell = new_cellnames ) | >
182- column_to_rownames(" .cell " ) | >
186+ mutate(original_cell_id = .data $ `_cell` , `_cell` = new_cellnames ) | >
187+ column_to_rownames(" _cell " ) | >
183188 as(" DataFrame" )
184189
185190 features | >
@@ -333,10 +338,12 @@ get_seurat <- function(...) {
333338 get_SingleCellExperiment(... ) | > as.Seurat(data = NULL )
334339}
335340
341+ # ' Gets the Curated Atlas metadata as a data frame.
342+ # '
336343# ' Downloads a parquet database of the Human Cell Atlas metadata to a local
337344# ' cache, and then opens it as a data frame. It can then be filtered and
338345# ' passed into [get_SingleCellExperiment()]
339- # ' to obtain a [`SingleCellExperiment`](SingleCellExperiment ::SingleCellExperiment-class)
346+ # ' to obtain a [`SingleCellExperiment::SingleCellExperiment-class`]
340347# '
341348# ' @param remote_url Optional character vector of length 1. An HTTP URL pointing
342349# ' to the location of the parquet database.
@@ -364,10 +371,10 @@ get_seurat <- function(...) {
364371# ' @importFrom httr progress
365372# ' @importFrom cli cli_alert_info
366373get_metadata <- function (
367- remote_url = " https://object-store.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/metadata-sqlite /metadata.parquet" ,
374+ remote_url = " https://object-store.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/metadata/metadata.0.2.2 .parquet" ,
368375 cache_directory = get_default_cache_dir()
369376) {
370- db_path <- file.path(cache_directory , " metadata.parquet" )
377+ db_path <- file.path(cache_directory , " metadata.0.2.2. parquet" )
371378 sync_remote_file(
372379 remote_url ,
373380 db_path ,
0 commit comments