Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit ebf914e

Browse files
authored
Merge pull request #83 from stemangiola/fix-78
Fix 78
2 parents 5103c18 + cc8e38c commit ebf914e

File tree

3 files changed

+68
-19
lines changed

3 files changed

+68
-19
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ importFrom(assertthat,has_name)
2323
importFrom(cli,cli_abort)
2424
importFrom(cli,cli_alert_info)
2525
importFrom(cli,cli_alert_success)
26+
importFrom(cli,cli_alert_warning)
2627
importFrom(dplyr,as_tibble)
2728
importFrom(dplyr,collect)
2829
importFrom(dplyr,filter)

R/query.R

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ COUNTS_VERSION <- "0.2"
4444
#' @importFrom glue glue
4545
#' @importFrom HDF5Array loadHDF5SummarizedExperiment HDF5RealizationSink
4646
#' loadHDF5SummarizedExperiment
47-
#' @importFrom SingleCellExperiment SingleCellExperiment simplifyToSCE
47+
#' @importFrom SingleCellExperiment SingleCellExperiment combineCols
4848
#' @importFrom SummarizedExperiment colData assayNames<-
4949
#' @importFrom httr parse_url
5050
#' @importFrom assertthat assert_that has_name
@@ -85,7 +85,7 @@ get_SingleCellExperiment <- function(
8585
inherits(raw_data, "tbl") |> assert_that()
8686
has_name(raw_data, c("cell_", "file_id_db")) |> assert_that()
8787

88-
versioned_cache_directory = file.path(cache_directory, COUNTS_VERSION)
88+
versioned_cache_directory <- file.path(cache_directory, COUNTS_VERSION)
8989
versioned_cache_directory |> dir.create(showWarnings = FALSE, recursive = TRUE)
9090

9191
subdirs <- assay_map[assays]
@@ -152,11 +152,13 @@ get_SingleCellExperiment <- function(
152152
#' @param dir_prefix The path to the single cell experiment, minus the final segment
153153
#' @param features The list of genes/rows of interest
154154
#' @return A SingleCellExperiment object
155-
#' @importFrom dplyr mutate
155+
#' @importFrom dplyr mutate filter
156156
#' @importFrom HDF5Array loadHDF5SummarizedExperiment
157157
#' @importFrom SummarizedExperiment colData<-
158158
#' @importFrom tibble column_to_rownames
159159
#' @importFrom utils head
160+
#' @importFrom cli cli_alert_warning cli_abort
161+
#' @importFrom glue glue
160162
#' @noRd
161163
group_to_sce <- function(i, df, dir_prefix, features) {
162164
sce_path <- df$file_id_db |>
@@ -178,26 +180,36 @@ group_to_sce <- function(i, df, dir_prefix, features) {
178180
# The cells we select here are those that are both available in the SCE
179181
# object, and requested for this particular file
180182
cells <- colnames(sce) |> intersect(df$cell_)
181-
# We need to make the cell names globally unique, which we can guarantee
182-
# by adding a suffix that is derived from file_id_db, which is the grouping
183-
# variable
184-
new_cellnames <- paste0(cells, "_", i)
183+
184+
if (length(cells) < nrow(df)){
185+
cli_alert_warning("Some cells were filtered out because of extremely low counts. The number of cells in the SingleCellExperiment will be less than the number of cells you have selected from the metadata.")
186+
df = filter(df, .data$cell_ %in% cells)
187+
}
188+
else if (length(cells) > nrow(df)){
189+
cli_abort("This should never happen")
190+
}
191+
192+
# Fix for https://github.com/tidyverse/dplyr/issues/6746
193+
force(i)
194+
185195
new_coldata <- df |>
186-
mutate(original_cell_id = .data$cell_, cell_ = new_cellnames) |>
196+
# We need to make the cell names globally unique, which we can guarantee
197+
# by adding a suffix that is derived from file_id_db, which is the grouping
198+
# variable
199+
mutate(original_cell_id = .data$cell_, cell_ = glue("{cell_}_{i}")) |>
187200
column_to_rownames("cell_") |>
188201
as("DataFrame")
189-
190-
features |>
191-
is.null() |>
192-
{
193-
`if`
194-
}(
195-
sce[, cells], {
202+
203+
`if`(
204+
is.null(features),
205+
sce[, new_coldata$original_cell_id],
206+
{
196207
# Optionally subset the genes
197208
genes <- rownames(sce) |> intersect(features)
198-
sce[genes, cells]
199-
}) |>
200-
`colnames<-`(new_cellnames) |>
209+
sce[genes, new_coldata$original_cell_id]
210+
}
211+
) |>
212+
`colnames<-`(new_coldata$cell_) |>
201213
`colData<-`(value = new_coldata)
202214
}
203215

tests/testthat/test-query.R

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
library(CuratedAtlasQueryR)
1+
library(dplyr)
22

33
test_that("get_SingleCellExperiment() correctly handles duplicate cell IDs", {
44
meta <- get_metadata() |>
@@ -120,3 +120,39 @@ test_that("get_seurat() returns the appropriate data in Seurat format", {
120120
rownames(seurat)
121121
)
122122
})
123+
124+
test_that("get_SingleCellExperiment() assigns the right cell ID to each cell", {
125+
id = "3214d8f8986c1e33a85be5322f2db4a9"
126+
127+
# Force the file to be cached
128+
get_metadata() |>
129+
filter(file_id_db == id) |>
130+
get_SingleCellExperiment()
131+
132+
# Load the SCE from cache directly
133+
assay_1 = CuratedAtlasQueryR:::get_default_cache_dir() |>
134+
file.path("0.2/original", id) |>
135+
HDF5Array::loadHDF5SummarizedExperiment() |>
136+
assay("X") |>
137+
as.matrix()
138+
139+
# Make a SCE that has the right column names, but reversed
140+
assay_2 =
141+
assay_1 |>
142+
colnames() |>
143+
tibble::tibble(
144+
file_id_db = id,
145+
cell_ = _
146+
) |>
147+
arrange(-row_number()) |>
148+
get_SingleCellExperiment(assays = "counts") |>
149+
assay("counts") |>
150+
as.matrix()
151+
152+
colnames(assay_2) = sub("_1", "", x=colnames(assay_2))
153+
154+
expect_equal(
155+
assay_1,
156+
assay_2[, colnames(assay_1)]
157+
)
158+
})

0 commit comments

Comments
 (0)