Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit 97fd5a9

Browse files
committed
optimise arguments and asserts
1 parent cc82070 commit 97fd5a9

File tree

6 files changed

+77
-58
lines changed

6 files changed

+77
-58
lines changed

R/counts.R

Lines changed: 44 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -47,29 +47,46 @@ get_SingleCellExperiment <- function(...){
4747

4848
#' Gets a SingleCellExperiment from curated metadata
4949
#'
50-
#' @inheritDotParams get_summarized_experiment
50+
#' @param data A data frame containing, at minimum, `cell_`, `file_id_db` columns,
51+
#' which correspond to a single cell ID, file subdivision for internal use.
52+
#' They can be obtained from the [get_metadata()] function.
53+
#' @inheritDotParams get_data_container
5154
#' @examples
5255
#' meta <- get_metadata() |> head(2)
5356
#' sce <- get_single_cell_experiment(meta)
5457
#' @export
55-
get_single_cell_experiment <- function(...){
56-
get_summarized_experiment(..., type = "sce")
58+
get_single_cell_experiment <- function(data, ...){
59+
raw_data <- collect(data)
60+
assert_that(
61+
inherits(raw_data, "tbl"),
62+
has_name(raw_data, c("cell_", "file_id_db"))
63+
)
64+
get_data_container(data, ..., repository = pseudobulk_url, grouping_column = "file_id_db")
5765
}
5866

5967
#' Gets a Pseudobulk from curated metadata
6068
#'
61-
#' @inheritDotParams get_summarized_experiment
69+
#' @param data A data frame containing, at minimum, `cell_`, `file_id`,
70+
#' `sample_`, `cell_type_harmonised` columns, which correspond to a single cell ID,
71+
#' file subdivision for internal use, a singlel cell sample ID and harmonised cell type.
72+
#' They can be obtained from the [get_metadata()] function.
73+
#' @inheritDotParams get_data_container
6274
#' @examples
6375
#' \dontrun{
6476
#' meta <- get_metadata() |> filter(tissue_harmonised == "lung")
6577
#' pseudobulk <- meta |> get_pseudobulk()
6678
#' }
6779
#' @export
68-
get_pseudobulk <- function(...) {
69-
get_summarized_experiment(..., type = "pseudobulk")
80+
get_pseudobulk <- function(data, ...) {
81+
raw_data <- collect(data)
82+
assert_that(
83+
inherits(raw_data, "tbl"),
84+
has_name(raw_data, c("cell_", "file_id", "sample_", "cell_type_harmonised"))
85+
)
86+
get_data_container(data, ..., repository = COUNTS_URL, grouping_column = "file_id")
7087
}
7188

72-
#' Gets a Summarized Experiment from curated metadata
89+
#' Gets data from curated metadata container
7390
#'
7491
#' Given a data frame of Curated Atlas metadata obtained from [get_metadata()],
7592
#' returns a [`SummarizedExperiment::SummarizedExperiment-class`] object
@@ -86,10 +103,10 @@ get_pseudobulk <- function(...) {
86103
#' files should be copied.
87104
#' @param repository A character vector of length one. If provided, it should be
88105
#' an HTTP URL pointing to the location where the single cell data is stored.
106+
#' @param grouping_column A character vector of metadata column for grouping. "file_id_db" for
107+
#' for Single Cell Experiment, "file_id" for pseudobulk.
89108
#' @param features An optional character vector of features (ie genes) to return
90109
#' the counts for. By default counts for all features will be returned.
91-
#' @param type A character vector of length one. Either "sce" for Single Cell Experiment,
92-
#' or "pseudobulk".
93110
#' @importFrom dplyr pull filter as_tibble inner_join collect
94111
#' @importFrom tibble column_to_rownames
95112
#' @importFrom purrr reduce map map_int imap keep
@@ -101,17 +118,15 @@ get_pseudobulk <- function(...) {
101118
#' @importFrom cli cli_alert_success cli_alert_info
102119
#' @importFrom rlang .data
103120
#' @importFrom S4Vectors DataFrame
104-
get_summarized_experiment <- function(
121+
get_data_container <- function(
105122
data,
106123
assays = "counts",
107124
cache_directory = get_default_cache_dir(),
108-
repository = COUNTS_URL,
109-
features = NULL,
110-
type = "sce"
125+
repository,
126+
grouping_column,
127+
features = NULL
128+
111129
) {
112-
repository <- if (type == "sce") COUNTS_URL else if (type == "pseudobulk") pseudobulk_url
113-
file_id_col <- if (type == "sce") "file_id_db" else if (type == "pseudobulk") "file_id"
114-
115130
# Parameter validation
116131
assays %in% names(assay_map) |>
117132
all() |>
@@ -133,11 +148,6 @@ get_summarized_experiment <- function(
133148
## operations will fail when passed a database connection
134149
cli_alert_info("Realising metadata.")
135150
raw_data <- collect(data)
136-
assert_that(
137-
inherits(raw_data, "tbl"),
138-
has_name(raw_data, c("cell_", file_id_col))
139-
)
140-
141151
versioned_cache_directory <- cache_directory
142152
versioned_cache_directory |> dir.create(
143153
showWarnings = FALSE,
@@ -156,7 +166,7 @@ get_summarized_experiment <- function(
156166

157167
files_to_read <-
158168
raw_data |>
159-
pull(.data[[file_id_col]]) |>
169+
pull(.data[[grouping_column]]) |>
160170
unique() |>
161171
as.character() |>
162172
sync_assay_files(
@@ -175,16 +185,16 @@ get_summarized_experiment <- function(
175185
versioned_cache_directory,
176186
current_subdir
177187
)
178-
188+
179189
experiment_list <- raw_data |>
180-
dplyr::group_by(.data[[file_id_col]]) |>
190+
dplyr::group_by(.data[[grouping_column]]) |>
181191
dplyr::summarise(experiments = list(
182-
group_to_sme(
192+
group_to_data_container(
183193
dplyr::cur_group_id(),
184194
dplyr::cur_data_all(),
185195
dir_prefix,
186196
features,
187-
type
197+
grouping_column
188198
)
189199
)) |>
190200
dplyr::pull(experiments)
@@ -199,6 +209,7 @@ get_summarized_experiment <- function(
199209
cli_alert_info("Compiling Experiment.")
200210
# Combine all the assays
201211
experiment <- experiments[[1]]
212+
202213
SummarizedExperiment::assays(experiment) <- map(experiments, function(exp) {
203214
SummarizedExperiment::assays(exp)[[1]]
204215
})
@@ -218,6 +229,7 @@ get_summarized_experiment <- function(
218229
#' @param dir_prefix The path to the single cell experiment, minus the final
219230
#' segment
220231
#' @param features The list of genes/rows of interest
232+
#' @param grouping_column A character vector of metadata column for grouping
221233
#' @return A `SummarizedExperiment` object
222234
#' @importFrom dplyr mutate filter
223235
#' @importFrom HDF5Array loadHDF5SummarizedExperiment
@@ -228,10 +240,9 @@ get_summarized_experiment <- function(
228240
#' @importFrom glue glue
229241
#' @importFrom stringr str_replace_all
230242
#' @noRd
231-
group_to_sme <- function(i, df, dir_prefix, features, type = "sce") {
243+
group_to_data_container <- function(i, df, dir_prefix, features, grouping_column) {
232244
# Set file name based on type
233-
file_id_col <- if (type == "sce") "file_id_db" else "file_id"
234-
experiment_path <- df[[file_id_col]] |>
245+
experiment_path <- df[[grouping_column]] |>
235246
head(1) |>
236247
file.path(
237248
dir_prefix,
@@ -253,7 +264,7 @@ group_to_sme <- function(i, df, dir_prefix, features, type = "sce") {
253264
# Fix for https://github.com/tidyverse/dplyr/issues/6746
254265
force(i)
255266

256-
if (type == "sce") {
267+
if (grouping_column == "file_id_db") {
257268
# Process specific to SCE
258269
cells <- colnames(experiment) |> intersect(df$cell_)
259270

@@ -287,7 +298,8 @@ group_to_sme <- function(i, df, dir_prefix, features, type = "sce") {
287298
`colnames<-`(new_coldata$cell_) |>
288299
`colData<-`(value = new_coldata)
289300
}
290-
else if (type == "pseudobulk") {
301+
else if (grouping_column == "file_id") {
302+
# Process specific to Pseudobulk
291303
# remove cell-level annotations
292304
cell_level_anno <- c("cell_", "cell_type", "confidence_class", "file_id_db",
293305
"cell_annotation_blueprint_singler",
@@ -297,7 +309,7 @@ group_to_sme <- function(i, df, dir_prefix, features, type = "sce") {
297309
"sample_id_db")
298310

299311
new_coldata <- df |>
300-
select(-dplyr::all_of(cell_level_anno)) |>
312+
select(-dplyr::all_of(intersect(names(df), cell_level_anno))) |>
301313
distinct() |>
302314
mutate(
303315
sample_identifier = glue("{sample_}___{cell_type_harmonised}"),

man/get_SingleCellExperiment.Rd

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 10 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_pseudobulk.Rd

Lines changed: 9 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_seurat.Rd

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_single_cell_experiment.Rd

Lines changed: 8 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)