Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit 02c8a94

Browse files
committed
Merge branch 'master' of github.com:stemangiola/HCAquery
2 parents 7c5b4d8 + 4ee5c7c commit 02c8a94

File tree

7 files changed

+49
-44
lines changed

7 files changed

+49
-44
lines changed

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: CuratedAtlasQueryR
33
Title: Queries the Human Cell Atlas
4-
Version: 0.3.1
4+
Version: 0.4.0
55
Authors@R: c(
66
person(
77
"Stefano",
@@ -126,3 +126,4 @@ LazyDataCompression: xz
126126
URL: https://github.com/stemangiola/CuratedAtlasQueryR
127127
BugReports: https://github.com/stemangiola/CuratedAtlasQueryR/issues
128128
VignetteBuilder: knitr
129+
Roxygen: list(markdown = TRUE)

R/query.R

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,19 @@ assay_map <- c(
1111
)
1212

1313
REMOTE_URL <- "https://swift.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/harmonised-human-atlas"
14+
COUNTS_VERSION <- "0.2"
1415

15-
#' Given a data frame of HCA metadata, returns a SingleCellExperiment object
16-
#' corresponding to the samples in that data frame
16+
#' Gets a SingleCellExperiment from curated metadata
17+
#'
18+
#' Given a data frame of Curated Atlas metadata obtained from [get_metadata()],
19+
#' returns a [`SingleCellExperiment::SingleCellExperiment-class`] object corresponding to the samples in that
20+
#' data frame
1721
#'
1822
#' @param data A data frame containing, at minimum, a `.sample` column, which
1923
#' corresponds to a single cell sample ID. This can be obtained from the
2024
#' [get_metadata()] function.
21-
#' @param assays A character vector whose elements must be either "counts" and/or
22-
#' "cpm", representing the corresponding assay(s) you want to request.
25+
#' @param assays A character vector whose elements must be either "counts"
26+
#' and/or "cpm", representing the corresponding assay(s) you want to request.
2327
#' @param repository A character vector of length one. If provided, it should be
2428
#' an HTTP URL pointing to the location where the single cell data is stored.
2529
#' @param cache_directory An optional character vector of length one. If
@@ -51,7 +55,7 @@ REMOTE_URL <- "https://swift.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3e
5155
#'
5256
#' @export
5357
#'
54-
#'
58+
#'
5559
get_SingleCellExperiment <- function(
5660
data,
5761
assays = c("counts", "cpm"),
@@ -79,9 +83,10 @@ get_SingleCellExperiment <- function(
7983
cli_alert_info("Realising metadata.")
8084
raw_data <- collect(data)
8185
inherits(raw_data, "tbl") |> assert_that()
82-
has_name(raw_data, c(".cell", "file_id_db")) |> assert_that()
86+
has_name(raw_data, c("_cell", "file_id_db")) |> assert_that()
8387

84-
cache_directory |> dir.create(showWarnings = FALSE)
88+
versioned_cache_directory = file.path(cache_directory, COUNTS_VERSION)
89+
versioned_cache_directory |> dir.create(showWarnings = FALSE, recursive = TRUE)
8590

8691
subdirs <- assay_map[assays]
8792

@@ -100,7 +105,7 @@ get_SingleCellExperiment <- function(
100105
as.character() |>
101106
sync_assay_files(
102107
url = parsed_repo,
103-
cache_dir = cache_directory,
108+
cache_dir = versioned_cache_directory,
104109
files = _,
105110
subdirs = subdirs
106111
)
@@ -111,7 +116,7 @@ get_SingleCellExperiment <- function(
111116
imap(function(current_subdir, current_assay) {
112117
# Build up an SCE for each assay
113118
dir_prefix <- file.path(
114-
cache_directory,
119+
versioned_cache_directory,
115120
current_subdir
116121
)
117122

@@ -172,14 +177,14 @@ group_to_sce <- function(i, df, dir_prefix, features) {
172177
sce <- loadHDF5SummarizedExperiment(sce_path)
173178
# The cells we select here are those that are both available in the SCE
174179
# object, and requested for this particular file
175-
cells <- colnames(sce) |> intersect(df$.cell)
180+
cells <- colnames(sce) |> intersect(df$`_cell`)
176181
# We need to make the cell names globally unique, which we can guarantee
177182
# by adding a suffix that is derived from file_id_db, which is the grouping
178183
# variable
179184
new_cellnames <- paste0(cells, "_", i)
180185
new_coldata <- df |>
181-
mutate(original_cell_id = .data$.cell, .cell = new_cellnames) |>
182-
column_to_rownames(".cell") |>
186+
mutate(original_cell_id = .data$`_cell`, `_cell` = new_cellnames) |>
187+
column_to_rownames("_cell") |>
183188
as("DataFrame")
184189

185190
features |>
@@ -333,10 +338,12 @@ get_seurat <- function(...) {
333338
get_SingleCellExperiment(...) |> as.Seurat(data = NULL)
334339
}
335340

341+
#' Gets the Curated Atlas metadata as a data frame.
342+
#'
336343
#' Downloads a parquet database of the Human Cell Atlas metadata to a local
337344
#' cache, and then opens it as a data frame. It can then be filtered and
338345
#' passed into [get_SingleCellExperiment()]
339-
#' to obtain a [`SingleCellExperiment`](SingleCellExperiment::SingleCellExperiment-class)
346+
#' to obtain a [`SingleCellExperiment::SingleCellExperiment-class`]
340347
#'
341348
#' @param remote_url Optional character vector of length 1. An HTTP URL pointing
342349
#' to the location of the parquet database.
@@ -364,10 +371,10 @@ get_seurat <- function(...) {
364371
#' @importFrom httr progress
365372
#' @importFrom cli cli_alert_info
366373
get_metadata <- function(
367-
remote_url = "https://object-store.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/metadata-sqlite/metadata.parquet",
374+
remote_url = "https://object-store.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/metadata/metadata.0.2.2.parquet",
368375
cache_directory = get_default_cache_dir()
369376
) {
370-
db_path <- file.path(cache_directory, "metadata.parquet")
377+
db_path <- file.path(cache_directory, "metadata.0.2.2.parquet")
371378
sync_remote_file(
372379
remote_url,
373380
db_path,

man/get_SingleCellExperiment.Rd

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_metadata.Rd

Lines changed: 9 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_seurat.Rd

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-query.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ library(CuratedAtlasQueryR)
22

33
test_that("get_SingleCellExperiment() correctly handles duplicate cell IDs", {
44
meta <- get_metadata() |>
5-
dplyr::filter(.cell == "868417_1") |>
5+
dplyr::filter(`_cell` == "868417_1") |>
66
dplyr::collect()
77
sce <- get_SingleCellExperiment(meta)
88
# This query should return multiple cells, despite querying only 1 cell ID

vignettes/Introduction.Rmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ knitr::include_graphics(c(
3131
"../man/figures/svcf_logo.jpeg",
3232
"../man/figures/czi_logo.png",
3333
"../man/figures/bioconductor_logo.jpg",
34-
"../man/figures/vca_logo.png"
34+
"../man/figures/vca_logo.png"
3535
))
3636
```
3737

0 commit comments

Comments
 (0)