Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit 07fd0df

Browse files
committed
Preserve metadata when joining SCEs
1 parent 09719b2 commit 07fd0df

File tree

3 files changed

+24
-37
lines changed

3 files changed

+24
-37
lines changed

R/query.R

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -111,53 +111,36 @@ get_SingleCellExperiment <- function(
111111
}
112112

113113
cli_alert_info("Reading files.")
114-
sce <- subdirs |>
114+
sces <- subdirs |>
115115
imap(function(current_subdir, current_assay) {
116+
# Build up an SCE for each assay
116117
dir_prefix = file.path(
117118
cache_directory,
118119
current_subdir
119120
)
120121

121-
# Load each file
122-
sces <- raw_data |>
122+
raw_data |>
123123
dplyr::group_by(file_id_db) |>
124+
# Load each file and attach metadata
124125
dplyr::summarise(sces = list(group_to_sce(
125126
dplyr::cur_group_id(),
126127
dplyr::cur_data_all(),
127128
dir_prefix,
128129
features
129130
))) |>
130131
dplyr::pull(sces) |>
131-
# Drop files with one cell, which causes the DFrame objects to
132-
# combine must have the same column names
133-
# keep(~ ncol(.) > 1) |>
134132
# Combine each sce by column, since each sce has a different set
135133
# of cells
136-
do.call(cbind, args = _) |>
137-
# We only need the assay, since we ultimately need to combine
138-
# them We need to use :: here since we already have an assays
139-
# argument
140-
SummarizedExperiment::assays() |>
141-
setNames(current_assay)
142-
}) |>
143-
aside(cli_alert_info("Compiling Single Cell Experiment.")) |>
144-
# Combine the assays into one list
145-
reduce(c) |>
146-
SingleCellExperiment(assays = _)
134+
do.call(cbind, args = _)
135+
})
147136

148-
cli_alert_info("Attaching metadata.")
137+
cli_alert_info("Compiling Single Cell Experiment.")
138+
# Combine all the assays
139+
sce = sces[[1]]
140+
SummarizedExperiment::assays(sce) <- map(sces, function(sce){
141+
SummarizedExperiment::assays(sce)[[1]]
142+
})
149143

150-
colData(sce) <- raw_data |>
151-
# Needed because cell IDs are not unique outside the file_id or
152-
# file_id_db
153-
filter(.data$file_id_db %in% files_to_read) |>
154-
inner_join(
155-
colData(sce) |> as_tibble(rownames = ".cell"),
156-
by = ".cell"
157-
) |>
158-
column_to_rownames(".cell") |>
159-
as("DataFrame")
160-
161144
sce
162145
}
163146

@@ -215,7 +198,7 @@ group_to_sce = function(i, df, dir_prefix, features){
215198
}
216199
) |>
217200
`colnames<-`(new_cellnames) |>
218-
`colData<-`(new_coldata)
201+
`colData<-`(value = new_coldata)
219202
}
220203

221204
#' Synchronises one or more remote assays with a local copy

tests/testthat/test-query.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
library(HCAquery)
22

3+
test_that("get_SingleCellExperiment() correctly handles duplicate cell IDs", {
4+
meta = get_metadata() |> dplyr::filter(.cell == "868417_1") |> dplyr::collect()
5+
sce <- get_SingleCellExperiment(meta)
6+
# This query should return multiple cells, despite querying only 1 cell ID
7+
nrow(meta) |> expect_gt(1)
8+
# Each of the two ambiguous cell IDs should now be unique
9+
colnames(sce) |> expect_equal(c("868417_1_1", "868417_1_2"))
10+
# We should have lots of column data, derived from the metadata
11+
SummarizedExperiment::colData(sce) |> dim() |> expect_equal(c(2, 56))
12+
})
13+
314
test_that("get_default_cache_dir() returns the correct directory on Linux", {
415
grepl("linux", version$platform, fixed = TRUE) |>
516
skip_if_not()

tests/testthat/test-working.R

Lines changed: 0 additions & 7 deletions
This file was deleted.

0 commit comments

Comments
 (0)