Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit 1ab4cd7

Browse files
committed
accepting counts from multiple databases
1 parent 78b8c0f commit 1ab4cd7

File tree

9 files changed

+78
-70
lines changed

9 files changed

+78
-70
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ biocViews:
121121
Transcription,
122122
Transcriptomics
123123
Encoding: UTF-8
124-
RoxygenNote: 7.2.3
124+
RoxygenNote: 7.3.1
125125
LazyDataCompression: xz
126126
URL: https://github.com/stemangiola/CuratedAtlasQueryR
127127
BugReports: https://github.com/stemangiola/CuratedAtlasQueryR/issues

NAMESPACE

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# Generated by roxygen2: do not edit by hand
22

33
S3method(as.sparse,DelayedMatrix)
4-
export(get_database_url)
54
export(SAMPLE_DATABASE_URL)
65
export(get_SingleCellExperiment)
6+
export(get_database_url)
77
export(get_metadata)
88
export(get_seurat)
99
export(get_single_cell_experiment)
@@ -62,7 +62,6 @@ importFrom(purrr,pmap_chr)
6262
importFrom(purrr,reduce)
6363
importFrom(purrr,set_names)
6464
importFrom(purrr,walk)
65-
importFrom(purrr,walk2)
6665
importFrom(rlang,.data)
6766
importFrom(stats,setNames)
6867
importFrom(stringr,str_remove_all)

R/metadata.R

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6,32 +6,28 @@ NULL
66
#' Environment that we use to cache the DuckDB connections
77
#' @noRd
88
cache <- rlang::env(
9-
metadata_table = rlang::env()
9+
metadata_table = rlang::env()
1010
)
1111

1212
#' Returns the URLs for all metadata files
13+
#' @param databases A character vector specifying the names of the metadata files. Default is c("metadata.0.2.3.parquet", "fibrosis.0.2.3.parquet")
1314
#' @export
14-
#' @return A named character vector whose names are parquet file names, and whose values are URLs
15+
#' @return A character vector of URLs to parquet files to download
1516
#' @examples
16-
#' get_metadata(remote_url = get_database_url("metadata.0.2.3.parquet"))
17-
18-
17+
#' get_database_url("metadata.0.2.3.parquet")
1918
get_database_url <- function(databases = c("metadata.0.2.3.parquet", "fibrosis.0.2.3.parquet")) {
20-
glue::glue(
21-
"https://object-store.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/metadata/{databases}"
22-
) |>
23-
setNames(databases)
19+
glue::glue(
20+
"https://object-store.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/metadata/{databases}")
2421
}
2522

26-
2723
#' URL pointing to the sample metadata file, which is smaller and for test,
2824
#' demonstration, and vignette purposes only
2925
#' @export
3026
#' @return A character scalar consisting of the URL
3127
#' @examples
32-
#' get_metadata(remote_url = SAMPLE_DATABASE_URL)
28+
#' get_metadata(remote_url = SAMPLE_DATABASE_URL, cache_directory = tempdir())
3329
SAMPLE_DATABASE_URL <- single_line_str(
34-
"https://object-store.rc.nectar.org.au/v1/
30+
"https://object-store.rc.nectar.org.au/v1/
3531
AUTH_06d6e008e3e642da99d806ba3ea629c5/metadata/
3632
sample_metadata.0.2.3.parquet"
3733
)
@@ -73,7 +69,7 @@ SAMPLE_DATABASE_URL <- single_line_str(
7369
#' @importFrom httr progress
7470
#' @importFrom cli cli_alert_info hash_sha256
7571
#' @importFrom glue glue
76-
#' @importFrom purrr walk2
72+
#' @importFrom purrr walk
7773
#'
7874
#' @details
7975
#'
@@ -147,35 +143,39 @@ SAMPLE_DATABASE_URL <- single_line_str(
147143
#'
148144
#' get_metadata(cache_directory = path.expand('~'))
149145
#'
150-
151-
152146
get_metadata <- function(
153147
remote_url = get_database_url(),
154148
cache_directory = get_default_cache_dir(),
155149
use_cache = TRUE
156150
) {
157-
hash <- c(remote_url, cache_directory) |> paste0(collapse="") |>
158-
hash_sha256()
159-
cached_connection <- cache$metadata_table[[hash]]
160-
if (!is.null(cached_connection) && isTRUE(use_cache)) {
161-
cached_connection
162-
}
163-
else {
164-
db_path <- file.path(cache_directory, remote_url |> basename())
165-
walk2(remote_url, db_path, function(url, path) {
166-
if (!file.exists(path)) {
167-
report_file_sizes(url)
168-
sync_remote_file(url,
169-
path,
170-
progress(type = "down", con = stderr()))
171-
}
172-
})
173-
174-
table <- duckdb() |>
175-
dbConnect(drv = _, read_only = TRUE) |>
176-
read_parquet(db_path)
177-
cache$metadata_table[[hash]] <- table
178-
table
151+
# Synchronize remote files
152+
walk(remote_url, function(url) {
153+
# Calculate the file path from the URL
154+
path <- file.path(cache_directory, url |> basename())
155+
if (!file.exists(path)) {
156+
report_file_sizes(url)
157+
sync_remote_file(url,
158+
path,
159+
progress(type = "down", con = stderr()))
179160
}
161+
})
162+
all_parquet <- file.path(cache_directory, dir(cache_directory, pattern = ".parquet$"))
163+
# We try to avoid re-reading a set of parquet files
164+
# that is identical to a previous set by hashing the file list
165+
hash <- all_parquet |> paste0(collapse="") |>
166+
hash_sha256()
167+
cached_connection <- cache$metadata_table[[hash]]
168+
169+
if (!is.null(cached_connection) && isTRUE(use_cache)) {
170+
# If the file list is identical, just re-use the database table
171+
cached_connection
172+
}
173+
else {
174+
table <- duckdb() |>
175+
dbConnect(drv = _, read_only = TRUE) |>
176+
read_parquet(path = all_parquet)
177+
cache$metadata_table[[hash]] <- table
178+
table
179+
}
180180
}
181181

man/DATABASE_URL.Rd

Lines changed: 0 additions & 22 deletions
This file was deleted.

man/SAMPLE_DATABASE_URL.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_database_url.Rd

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_default_cache_dir.Rd

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_metadata.Rd

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vignettes/Introduction.Rmd

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,9 @@ find_figure <- function(names){
5050
file.path("man", "figures", names)
5151
}
5252
METADATA_URL = if (params$demo_metadata)
53-
CuratedAtlasQueryR::SAMPLE_DATABASE_URL else
54-
CuratedAtlasQueryR::get_database_url
53+
CuratedAtlasQueryR::SAMPLE_DATABASE_URL else
54+
CuratedAtlasQueryR::get_database_url
55+
5556
```
5657

5758
<!-- badges: start -->
@@ -98,12 +99,20 @@ library(CuratedAtlasQueryR)
9899

99100
### Load the metadata
100101

101-
```{r}
102+
```{r, eval=FALSE}
102103
# Note: in real applications you should use the default value of remote_url
103104
metadata <- get_metadata(remote_url = METADATA_URL)
104105
metadata
105106
```
106107

108+
```{r, echo=FALSE}
109+
# Note: a custom cache is used here ONLY for R CHECK compliance purposes. Users will NOT need to specify a custom cache
110+
metadata <- get_metadata(remote_url = METADATA_URL, cache_directory = tempdir())
111+
metadata
112+
```
113+
114+
115+
107116
The `metadata` variable can then be re-used for all subsequent queries.
108117

109118
### Explore the tissue

0 commit comments

Comments
 (0)