1- # These are hacks to force the above packages to be loaded, and also to
1+ # Functions that relate to downloading count data into SingleCellExperiments
2+
3+ # We need to load utils now so it can be used at the top level
4+ # ' @include utils.R
5+ # This is a hack to force Seurat packages to be loaded, and also to
26# satisfy R CMD check. We don't need to attach them at all.
3- # ' @import dbplyr
4- # ' @import Seurat
7+ # ' @importFrom Seurat as.SingleCellExperiment
58NULL
69
710# Maps user provided assay names to their corresponding paths in the repository
@@ -10,22 +13,28 @@ assay_map <- c(
1013 cpm = " cpm"
1114)
1215
13- REMOTE_URL <- " https://swift.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/harmonised-human-atlas"
16+ # ' Base URL pointing to the count data
17+ COUNTS_URL <- single_line_str(
18+ " https://swift.rc.nectar.org.au/v1/
19+ AUTH_06d6e008e3e642da99d806ba3ea629c5/harmonised-human-atlas"
20+ )
21+ # ' Current version of the counts. This will be incremented when a newer
22+ # ' version is released
1423COUNTS_VERSION <- " 0.2"
1524
1625# ' Gets a SingleCellExperiment from curated metadata
1726# '
1827# ' Given a data frame of Curated Atlas metadata obtained from [get_metadata()],
19- # ' returns a [`SingleCellExperiment::SingleCellExperiment-class`] object corresponding to the samples in that
20- # ' data frame
28+ # ' returns a [`SingleCellExperiment::SingleCellExperiment-class`] object
29+ # ' corresponding to the samples in that data frame
2130# '
2231# ' @param data A data frame containing, at minimum, a `sample_` column, which
2332# ' corresponds to a single cell sample ID. This can be obtained from the
2433# ' [get_metadata()] function.
2534# ' @param assays A character vector whose elements must be either "counts"
26- # ' and/or "cpm", representing the corresponding assay(s) you want to request.
27- # ' By default only the count assay is downloaded. If you are interested in comparing a limited amount of genes,
28- # ' the "cpm" assay is more appropriate.
35+ # ' and/or "cpm", representing the corresponding assay(s) you want to request.
36+ # ' By default only the count assay is downloaded. If you are interested in
37+ # ' comparing a limited amount of genes, the "cpm" assay is more appropriate.
2938# ' @param repository A character vector of length one. If provided, it should be
3039# ' an HTTP URL pointing to the location where the single cell data is stored.
3140# ' @param cache_directory An optional character vector of length one. If
@@ -54,15 +63,12 @@ COUNTS_VERSION <- "0.2"
5463# ' @importFrom rlang .data
5564# ' @importFrom stats setNames
5665# ' @importFrom S4Vectors DataFrame
57- # '
5866# ' @export
59- # '
60- # '
6167get_SingleCellExperiment <- function (
6268 data ,
6369 assays = " counts" ,
6470 cache_directory = get_default_cache_dir(),
65- repository = REMOTE_URL ,
71+ repository = COUNTS_URL ,
6672 features = NULL
6773) {
6874 # Parameter validation
@@ -88,7 +94,10 @@ get_SingleCellExperiment <- function(
8894 has_name(raw_data , c(" cell_" , " file_id_db" )) | > assert_that()
8995
9096 versioned_cache_directory <- file.path(cache_directory , COUNTS_VERSION )
91- versioned_cache_directory | > dir.create(showWarnings = FALSE , recursive = TRUE )
97+ versioned_cache_directory | > dir.create(
98+ showWarnings = FALSE ,
99+ recursive = TRUE
100+ )
92101
93102 subdirs <- assay_map [assays ]
94103
@@ -150,7 +159,8 @@ get_SingleCellExperiment <- function(
150159# ' Converts a data frame into a single SCE
151160# ' @param i Suffix to be added to the column names, to make them unique
152161# ' @param df The data frame to be converted
153- # ' @param dir_prefix The path to the single cell experiment, minus the final segment
162+ # ' @param dir_prefix The path to the single cell experiment, minus the final
163+ # ' segment
154164# ' @param features The list of genes/rows of interest
155165# ' @return A SingleCellExperiment object
156166# ' @importFrom dplyr mutate filter
@@ -160,6 +170,7 @@ get_SingleCellExperiment <- function(
160170# ' @importFrom utils head
161171# ' @importFrom cli cli_alert_warning cli_abort
162172# ' @importFrom glue glue
173+ # ' @importFrom stringr str_replace_all
163174# ' @noRd
164175group_to_sce <- function (i , df , dir_prefix , features ) {
165176 sce_path <- df $ file_id_db | >
@@ -183,8 +194,12 @@ group_to_sce <- function(i, df, dir_prefix, features) {
183194 cells <- colnames(sce ) | > intersect(df $ cell_ )
184195
185196 if (length(cells ) < nrow(df )){
186- cli_alert_warning(" Some cells were filtered out because of extremely low counts. The number of cells in the SingleCellExperiment will be less than the number of cells you have selected from the metadata." )
187- df = filter(df , .data $ cell_ %in% cells )
197+ str_replace_all(
198+ " Some cells were filtered out because of extremely low counts. The
199+ number of cells in the SingleCellExperiment will be less than the
200+ number of cells you have selected from the metadata."
201+ )
202+ df <- filter(df , .data $ cell_ %in% cells )
188203 }
189204 else if (length(cells ) > nrow(df )){
190205 cli_abort(" This should never happen" )
@@ -195,8 +210,8 @@ group_to_sce <- function(i, df, dir_prefix, features) {
195210
196211 new_coldata <- df | >
197212 # We need to make the cell names globally unique, which we can guarantee
198- # by adding a suffix that is derived from file_id_db, which is the grouping
199- # variable
213+ # by adding a suffix that is derived from file_id_db, which is the
214+ # grouping variable
200215 mutate(original_cell_id = .data $ cell_ , cell_ = glue(" {cell_}_{i}" )) | >
201216 column_to_rownames(" cell_" ) | >
202217 as(" DataFrame" )
@@ -231,14 +246,14 @@ group_to_sce <- function(i, df, dir_prefix, features) {
231246# ' @noRd
232247# '
233248sync_assay_files <- function (
234- url = parse_url(REMOTE_URL ),
249+ url = parse_url(COUNTS_URL ),
235250 cache_dir ,
236251 subdirs ,
237252 files
238253) {
239254 # Find every combination of file name, sample id, and assay, since each
240255 # will be a separate file we need to download
241- files = expand.grid(
256+ files <- expand.grid(
242257 filename = c(" assays.h5" , " se.rds" ),
243258 sample_id = files ,
244259 subdir = subdirs ,
@@ -284,69 +299,3 @@ sync_assay_files <- function(
284299 output_file
285300 }, .progress = list (name = " Downloading files" ))
286301}
287-
288- # ' Synchronises a single remote file with a local path
289- # ' @importFrom httr write_disk GET stop_for_status
290- # ' @importFrom cli cli_abort cli_alert_info
291- # ' @noRd
292- sync_remote_file <- function (full_url , output_file , ... ) {
293- if (! file.exists(output_file )) {
294- output_dir <- dirname(output_file )
295- dir.create(output_dir ,
296- recursive = TRUE ,
297- showWarnings = FALSE
298- )
299- cli_alert_info(" Downloading {full_url} to {output_file}" )
300-
301- tryCatch(
302- GET(full_url , write_disk(output_file ), ... ) | > stop_for_status(),
303- error = function (e ) {
304- # Clean up if we had an error
305- file.remove(output_file )
306- cli_abort(" File {full_url} could not be downloaded. {e}" )
307- }
308- )
309- }
310- }
311-
312- # ' Returns the default cache directory
313- # '
314- # ' @return A length one character vector.
315- # ' @importFrom tools R_user_dir
316- # ' @importFrom utils packageName
317- # ' @noRd
318- # '
319- get_default_cache_dir <- function () {
320- packageName() | >
321- R_user_dir(
322- " cache"
323- ) | >
324- normalizePath() | >
325- suppressWarnings()
326- }
327-
328- # ' @importFrom assertthat assert_that
329- # ' @importFrom methods as
330- # ' @importFrom SeuratObject as.sparse
331- # ' @exportS3Method
332- as.sparse.DelayedMatrix <- function (x ) {
333- # This is glue to ensure the SCE -> Seurat conversion works properly with
334- # DelayedArray types
335- as(x , " dgCMatrix" )
336- }
337-
338- # ' Given a data frame of HCA metadata, returns a Seurat object corresponding to
339- # ' the samples in that data frame
340- # '
341- # ' @inheritDotParams get_SingleCellExperiment
342- # ' @importFrom SeuratObject as.Seurat
343- # ' @export
344- # ' @return A Seurat object containing the same data as a call to
345- # ' get_SingleCellExperiment.
346- # ' @examples
347- # ' meta <- get_metadata() |> head(2)
348- # ' seurat <- get_seurat(meta)
349- # '
350- get_seurat <- function (... ) {
351- get_SingleCellExperiment(... ) | > as.Seurat(data = NULL )
352- }
0 commit comments