@@ -47,29 +47,46 @@ get_SingleCellExperiment <- function(...){
4747
4848# ' Gets a SingleCellExperiment from curated metadata
4949# '
50- # ' @inheritDotParams get_summarized_experiment
50+ # ' @param data A data frame containing, at minimum, `cell_`, `file_id_db` columns,
51+ # ' which correspond to a single cell ID, file subdivision for internal use.
52+ # ' They can be obtained from the [get_metadata()] function.
53+ # ' @inheritDotParams get_data_container
5154# ' @examples
5255# ' meta <- get_metadata() |> head(2)
5356# ' sce <- get_single_cell_experiment(meta)
5457# ' @export
55- get_single_cell_experiment <- function (... ){
56- get_summarized_experiment(... , type = " sce" )
58+ get_single_cell_experiment <- function (data , ... ){
59+ raw_data <- collect(data )
60+ assert_that(
61+ inherits(raw_data , " tbl" ),
62+ has_name(raw_data , c(" cell_" , " file_id_db" ))
63+ )
64+ get_data_container(data , ... , repository = pseudobulk_url , grouping_column = " file_id_db" )
5765}
5866
5967# ' Gets a Pseudobulk from curated metadata
6068# '
61- # ' @inheritDotParams get_summarized_experiment
69+ # ' @param data A data frame containing, at minimum, `cell_`, `file_id`,
70+ # ' `sample_`, `cell_type_harmonised` columns, which correspond to a single cell ID,
71+ # ' file subdivision for internal use, a singlel cell sample ID and harmonised cell type.
72+ # ' They can be obtained from the [get_metadata()] function.
73+ # ' @inheritDotParams get_data_container
6274# ' @examples
6375# ' \dontrun{
6476# ' meta <- get_metadata() |> filter(tissue_harmonised == "lung")
6577# ' pseudobulk <- meta |> get_pseudobulk()
6678# ' }
6779# ' @export
68- get_pseudobulk <- function (... ) {
69- get_summarized_experiment(... , type = " pseudobulk" )
80+ get_pseudobulk <- function (data , ... ) {
81+ raw_data <- collect(data )
82+ assert_that(
83+ inherits(raw_data , " tbl" ),
84+ has_name(raw_data , c(" cell_" , " file_id" , " sample_" , " cell_type_harmonised" ))
85+ )
86+ get_data_container(data , ... , repository = COUNTS_URL , grouping_column = " file_id" )
7087}
7188
72- # ' Gets a Summarized Experiment from curated metadata
89+ # ' Gets data from curated metadata container
7390# '
7491# ' Given a data frame of Curated Atlas metadata obtained from [get_metadata()],
7592# ' returns a [`SummarizedExperiment::SummarizedExperiment-class`] object
@@ -86,10 +103,10 @@ get_pseudobulk <- function(...) {
86103# ' files should be copied.
87104# ' @param repository A character vector of length one. If provided, it should be
88105# ' an HTTP URL pointing to the location where the single cell data is stored.
106+ # ' @param grouping_column A character vector of metadata column for grouping. "file_id_db" for
107+ # ' for Single Cell Experiment, "file_id" for pseudobulk.
89108# ' @param features An optional character vector of features (ie genes) to return
90109# ' the counts for. By default counts for all features will be returned.
91- # ' @param type A character vector of length one. Either "sce" for Single Cell Experiment,
92- # ' or "pseudobulk".
93110# ' @importFrom dplyr pull filter as_tibble inner_join collect
94111# ' @importFrom tibble column_to_rownames
95112# ' @importFrom purrr reduce map map_int imap keep
@@ -101,17 +118,15 @@ get_pseudobulk <- function(...) {
101118# ' @importFrom cli cli_alert_success cli_alert_info
102119# ' @importFrom rlang .data
103120# ' @importFrom S4Vectors DataFrame
104- get_summarized_experiment <- function (
121+ get_data_container <- function (
105122 data ,
106123 assays = " counts" ,
107124 cache_directory = get_default_cache_dir(),
108- repository = COUNTS_URL ,
109- features = NULL ,
110- type = " sce"
125+ repository ,
126+ grouping_column ,
127+ features = NULL
128+
111129) {
112- repository <- if (type == " sce" ) COUNTS_URL else if (type == " pseudobulk" ) pseudobulk_url
113- file_id_col <- if (type == " sce" ) " file_id_db" else if (type == " pseudobulk" ) " file_id"
114-
115130 # Parameter validation
116131 assays %in% names(assay_map ) | >
117132 all() | >
@@ -133,11 +148,6 @@ get_summarized_experiment <- function(
133148 # # operations will fail when passed a database connection
134149 cli_alert_info(" Realising metadata." )
135150 raw_data <- collect(data )
136- assert_that(
137- inherits(raw_data , " tbl" ),
138- has_name(raw_data , c(" cell_" , file_id_col ))
139- )
140-
141151 versioned_cache_directory <- cache_directory
142152 versioned_cache_directory | > dir.create(
143153 showWarnings = FALSE ,
@@ -156,7 +166,7 @@ get_summarized_experiment <- function(
156166
157167 files_to_read <-
158168 raw_data | >
159- pull(.data [[file_id_col ]]) | >
169+ pull(.data [[grouping_column ]]) | >
160170 unique() | >
161171 as.character() | >
162172 sync_assay_files(
@@ -175,16 +185,16 @@ get_summarized_experiment <- function(
175185 versioned_cache_directory ,
176186 current_subdir
177187 )
178-
188+
179189 experiment_list <- raw_data | >
180- dplyr :: group_by(.data [[file_id_col ]]) | >
190+ dplyr :: group_by(.data [[grouping_column ]]) | >
181191 dplyr :: summarise(experiments = list (
182- group_to_sme (
192+ group_to_data_container (
183193 dplyr :: cur_group_id(),
184194 dplyr :: cur_data_all(),
185195 dir_prefix ,
186196 features ,
187- type
197+ grouping_column
188198 )
189199 )) | >
190200 dplyr :: pull(experiments )
@@ -199,6 +209,7 @@ get_summarized_experiment <- function(
199209 cli_alert_info(" Compiling Experiment." )
200210 # Combine all the assays
201211 experiment <- experiments [[1 ]]
212+
202213 SummarizedExperiment :: assays(experiment ) <- map(experiments , function (exp ) {
203214 SummarizedExperiment :: assays(exp )[[1 ]]
204215 })
@@ -218,6 +229,7 @@ get_summarized_experiment <- function(
218229# ' @param dir_prefix The path to the single cell experiment, minus the final
219230# ' segment
220231# ' @param features The list of genes/rows of interest
232+ # ' @param grouping_column A character vector of metadata column for grouping
221233# ' @return A `SummarizedExperiment` object
222234# ' @importFrom dplyr mutate filter
223235# ' @importFrom HDF5Array loadHDF5SummarizedExperiment
@@ -228,10 +240,9 @@ get_summarized_experiment <- function(
228240# ' @importFrom glue glue
229241# ' @importFrom stringr str_replace_all
230242# ' @noRd
231- group_to_sme <- function (i , df , dir_prefix , features , type = " sce " ) {
243+ group_to_data_container <- function (i , df , dir_prefix , features , grouping_column ) {
232244 # Set file name based on type
233- file_id_col <- if (type == " sce" ) " file_id_db" else " file_id"
234- experiment_path <- df [[file_id_col ]] | >
245+ experiment_path <- df [[grouping_column ]] | >
235246 head(1 ) | >
236247 file.path(
237248 dir_prefix ,
@@ -253,7 +264,7 @@ group_to_sme <- function(i, df, dir_prefix, features, type = "sce") {
253264 # Fix for https://github.com/tidyverse/dplyr/issues/6746
254265 force(i )
255266
256- if (type == " sce " ) {
267+ if (grouping_column == " file_id_db " ) {
257268 # Process specific to SCE
258269 cells <- colnames(experiment ) | > intersect(df $ cell_ )
259270
@@ -287,7 +298,8 @@ group_to_sme <- function(i, df, dir_prefix, features, type = "sce") {
287298 `colnames<-`(new_coldata $ cell_ ) | >
288299 `colData<-`(value = new_coldata )
289300 }
290- else if (type == " pseudobulk" ) {
301+ else if (grouping_column == " file_id" ) {
302+ # Process specific to Pseudobulk
291303 # remove cell-level annotations
292304 cell_level_anno <- c(" cell_" , " cell_type" , " confidence_class" , " file_id_db" ,
293305 " cell_annotation_blueprint_singler" ,
@@ -297,7 +309,7 @@ group_to_sme <- function(i, df, dir_prefix, features, type = "sce") {
297309 " sample_id_db" )
298310
299311 new_coldata <- df | >
300- select(- dplyr :: all_of(cell_level_anno )) | >
312+ select(- dplyr :: all_of(intersect(names( df ), cell_level_anno ) )) | >
301313 distinct() | >
302314 mutate(
303315 sample_identifier = glue(" {sample_}___{cell_type_harmonised}" ),
0 commit comments