Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit 212be10

Browse files
authored
Merge pull request #102 from stemangiola/fix-60-v2
Download size reporting
2 parents cc066dd + 31692fa commit 212be10

File tree

9 files changed

+74
-24
lines changed

9 files changed

+74
-24
lines changed

NAMESPACE

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,17 @@ importFrom(dbplyr,remote_con)
3030
importFrom(dplyr,as_tibble)
3131
importFrom(dplyr,collect)
3232
importFrom(dplyr,filter)
33-
importFrom(dplyr,full_join)
3433
importFrom(dplyr,group_by)
3534
importFrom(dplyr,inner_join)
3635
importFrom(dplyr,mutate)
3736
importFrom(dplyr,pull)
3837
importFrom(dplyr,summarise)
3938
importFrom(dplyr,tbl)
40-
importFrom(dplyr,tibble)
4139
importFrom(dplyr,transmute)
4240
importFrom(duckdb,duckdb)
4341
importFrom(glue,glue)
4442
importFrom(httr,GET)
43+
importFrom(httr,HEAD)
4544
importFrom(httr,modify_url)
4645
importFrom(httr,parse_url)
4746
importFrom(httr,progress)
@@ -51,11 +50,12 @@ importFrom(methods,as)
5150
importFrom(purrr,imap)
5251
importFrom(purrr,keep)
5352
importFrom(purrr,map)
53+
importFrom(purrr,map_chr)
54+
importFrom(purrr,map_dbl)
5455
importFrom(purrr,map_int)
5556
importFrom(purrr,pmap_chr)
5657
importFrom(purrr,reduce)
5758
importFrom(purrr,set_names)
58-
importFrom(purrr,transpose)
5959
importFrom(purrr,walk)
6060
importFrom(rlang,.data)
6161
importFrom(stats,setNames)

R/metadata.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ get_metadata <- function(
101101
cached_connection
102102
}
103103
else {
104+
report_file_sizes(remote_url)
104105
db_path <- file.path(cache_directory, "metadata.0.2.3.parquet")
105106
sync_remote_file(
106107
remote_url,

R/query.R

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,6 @@ get_SingleCellExperiment <- function(
148148
}
149149

150150
#' Converts a data frame into a single SCE
151-
#'
152151
#' @param i Suffix to be added to the column names, to make them unique
153152
#' @param df The data frame to be converted
154153
#' @param dir_prefix The path to the single cell experiment, minus the final segment
@@ -216,7 +215,6 @@ group_to_sce <- function(i, df, dir_prefix, features) {
216215
}
217216

218217
#' Synchronises one or more remote assays with a local copy
219-
#'
220218
#' @param url A character vector of length one. The base HTTP URL from which to
221219
#' obtain the files.
222220
#' @param cache_dir A character vector of length one. The local filepath to
@@ -226,14 +224,10 @@ group_to_sce <- function(i, df, dir_prefix, features) {
226224
#' @param files A character vector containing one or more file_id_db entries
227225
#' @returns A character vector consisting of file paths to all the newly
228226
#' downloaded files
229-
#'
230227
#' @return A character vector of files that have been downloaded
231-
#' @importFrom purrr pmap_chr transpose
232-
#' @importFrom httr modify_url GET write_disk stop_for_status parse_url
233-
#' @importFrom dplyr tibble transmute filter full_join
234-
#' @importFrom glue glue
235-
#' @importFrom assertthat assert_that
236-
#' @importFrom cli cli_alert_success cli_alert_info cli_abort
228+
#' @importFrom purrr pmap_chr map_chr
229+
#' @importFrom httr modify_url
230+
#' @importFrom dplyr transmute filter
237231
#' @noRd
238232
#'
239233
sync_assay_files <- function(
@@ -244,7 +238,7 @@ sync_assay_files <- function(
244238
) {
245239
# Find every combination of file name, sample id, and assay, since each
246240
# will be a separate file we need to download
247-
expand.grid(
241+
files = expand.grid(
248242
filename = c("assays.h5", "se.rds"),
249243
sample_id = files,
250244
subdir = subdirs,
@@ -261,7 +255,7 @@ sync_assay_files <- function(
261255
.data$sample_id,
262256
"/",
263257
.data$filename
264-
) |> map(~ modify_url(url, path = .)),
258+
) |> map_chr(~ modify_url(url, path = .)),
265259

266260
# Path to save the file on local disk (and its parent directory)
267261
# We use file.path since the file separator will differ on other OSs
@@ -281,14 +275,19 @@ sync_assay_files <- function(
281275
# proceed with the download if it has. However this is low
282276
# importance as the repository is not likely to change often
283277
!file.exists(.data$output_file)
284-
) |>
285-
pmap_chr(function(full_url, output_dir, output_file) {
286-
sync_remote_file(full_url, output_file)
287-
output_file
288-
}, .progress = list(name = "Downloading files"))
278+
)
279+
280+
report_file_sizes(files$full_url)
281+
282+
pmap_chr(files, function(full_url, output_dir, output_file) {
283+
sync_remote_file(full_url, output_file)
284+
output_file
285+
}, .progress = list(name = "Downloading files"))
289286
}
290287

291288
#' Synchronises a single remote file with a local path
289+
#' @importFrom httr write_disk GET stop_for_status
290+
#' @importFrom cli cli_abort cli_alert_info
292291
#' @noRd
293292
sync_remote_file <- function(full_url, output_file, ...) {
294293
if (!file.exists(output_file)) {

R/unharmonised.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ get_unharmonised_dataset = function(
6767
#' harmonised <- get_metadata() |> dplyr::filter(tissue == "kidney blood vessel")
6868
#' unharmonised <- get_unharmonised_metadata(harmonised)
6969
get_unharmonised_metadata = function(metadata, ...){
70-
args = list(...)
70+
args <- list(...)
7171
metadata |>
7272
collect() |>
7373
group_by(.data$file_id) |>

R/utils.R

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#' Gets the file size of a number of remote files
2+
#' @param urls A character vector containing URLs
3+
#' @return The file size of each of the files pointed to by the provided URL,
4+
#' in gigabytes, as double vector
5+
#' @importFrom purrr map_dbl
6+
#' @importFrom httr HEAD
7+
#' @keywords internal
8+
url_file_size = function(urls){
9+
map_dbl(urls, function(url){
10+
as.integer(
11+
HEAD(url)$headers$`content-length`
12+
) / 10^9
13+
})
14+
}
15+
16+
#' Prints a message indicating the size of a download
17+
#' @inheritParams url_file_size
18+
#' @importFrom cli cli_alert_info
19+
#' @keywords internal
20+
report_file_sizes = function(urls){
21+
total_size = url_file_size(urls) |>
22+
sum() |>
23+
round(digits=2)
24+
25+
cli_alert_info("Downloading {length(urls)} file{?s}, totalling {total_size} GB")
26+
}

R/zzz.R

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#' @importFrom purrr walk
2+
#' @importFrom dbplyr remote_con
3+
#' @importFrom DBI dbDisconnect
4+
.onUnload = function(libname, pkgname){
5+
# Close connections to all cached tables. This should avoid most of the
6+
# "Connection is garbage-collected" messages
7+
cache$metadata_table |>
8+
as.list() |>
9+
walk(function(table){
10+
table |>
11+
remote_con() |>
12+
dbDisconnect()
13+
})
14+
}

README.Rmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ knitr::include_graphics(c(
2828
"man/figures/svcf_logo.jpeg",
2929
"man/figures/czi_logo.png",
3030
"man/figures/bioconductor_logo.jpg",
31-
"man/figures/vca_logo.png",
31+
"man/figures/vca_logo.png",
3232
"man/figures/nectar_logo.png"
3333
))
3434
```
@@ -54,7 +54,7 @@ library(CuratedAtlasQueryR)
5454
### Load the metadata
5555

5656
```{r}
57-
metadata = get_metadata()
57+
metadata = get_metadata()
5858
5959
metadata
6060
```

tests/testthat/test-utils.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
test_that("url_file_size() returns the correct sizes", {
2+
c(
3+
"https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.fna.gz",
4+
"https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/GRCh37_latest/refseq_identifiers/GRCh37_latest_genomic.fna.gz"
5+
) |>
6+
url_file_size() |>
7+
expect_equal(c(
8+
0.973,
9+
0.944
10+
), tolerance = 0.001)
11+
})

vignettes/Introduction.Rmd

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ knitr::include_graphics(c(
3131
"../man/figures/svcf_logo.jpeg",
3232
"../man/figures/czi_logo.png",
3333
"../man/figures/bioconductor_logo.jpg",
34-
"../man/figures/vca_logo.png",
34+
"../man/figures/vca_logo.png",
3535
"../man/figures/nectar_logo.png"
3636
))
3737
```
@@ -92,7 +92,6 @@ metadata |>
9292
### Query raw counts
9393

9494
```{r}
95-
9695
single_cell_counts =
9796
metadata |>
9897
dplyr::filter(

0 commit comments

Comments
 (0)