Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit a284ec6

Browse files
committed
Demo metadata, build vignettes on CI
Conflicts: README.Rmd README.md vignettes/Introduction.Rmd
2 parents e60553d + 2d76f58 commit a284ec6

File tree

12 files changed

+375
-1076
lines changed

12 files changed

+375
-1076
lines changed

.github/workflows/check-bioc.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,8 @@ jobs:
219219
_R_CHECK_CRAN_INCOMING_: false
220220
run: |
221221
rcmdcheck::rcmdcheck(
222-
args = c("--no-build-vignettes", "--no-manual", "--timings"),
223-
build_args = c("--no-manual", "--no-resave-data"),
222+
args = c(""--no-manual", "--timings"),
223+
build_args = c("--no-resave-data"),
224224
error_on = "warning",
225225
check_dir = "check"
226226
)

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ Suggests:
105105
spelling,
106106
forcats,
107107
ggplot2,
108-
tidySingleCellExperiment
108+
tidySingleCellExperiment,
109+
rprojroot
109110
Biarch: true
110111
biocViews:
111112
AssayDomain,

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Generated by roxygen2: do not edit by hand
22

33
S3method(as.sparse,DelayedMatrix)
4+
export(DATABASE_URL)
5+
export(SAMPLE_DATABASE_URL)
46
export(get_SingleCellExperiment)
57
export(get_metadata)
68
export(get_seurat)

R/counts.R

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@ assay_map <- c(
1313
cpm = "cpm"
1414
)
1515

16-
#' Base URL pointing to the count data
16+
#' Base URL pointing to the count data at the current version
1717
COUNTS_URL <- single_line_str(
18-
"https://swift.rc.nectar.org.au/v1/
19-
AUTH_06d6e008e3e642da99d806ba3ea629c5/harmonised-human-atlas"
18+
"https://object-store.rc.nectar.org.au/v1/
19+
AUTH_06d6e008e3e642da99d806ba3ea629c5/cellxgene-0.2.1-hdf5"
2020
)
2121
#' Current version of the counts. This will be incremented when a newer
2222
#' version is released
23-
COUNTS_VERSION <- "0.2"
23+
COUNTS_VERSION <- "0.2.1"
2424

2525
#' @inherit get_single_cell_experiment
2626
#' @inheritDotParams get_single_cell_experiment
@@ -199,10 +199,10 @@ group_to_sce <- function(i, df, dir_prefix, features) {
199199

200200
file.exists(sce_path) |>
201201
assert_that(
202-
msg = "Your cache does not contain a file you
202+
msg = "Your cache does not contain a file {sce_path} you
203203
attempted to query. Please provide the repository
204204
parameter so that files can be synchronised from the
205-
internet"
205+
internet" |> glue()
206206
)
207207

208208
sce <- loadHDF5SummarizedExperiment(sce_path)

R/dev.R

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,64 @@ dir_to_anndata <- function(src, dest){
184184
}, .progress = "Converting files")
185185
}, env = zellkonverter::zellkonverterAnnDataEnv())
186186
}
187+
188+
#' Makes a "downsampled" metadata file that only contains the minimal data
189+
#' needed to run the vignette.
190+
#' @param output Character scalar. Path to the output file.
191+
#' @return NULL
192+
#' @keywords internal
193+
downsample_metadata <- function(output = "sample_meta.parquet"){
194+
metadata <- get_metadata()
195+
196+
# Make a table of rows per dataset
197+
dataset_sizes <- metadata |>
198+
dplyr::group_by(.data$file_id_db) |>
199+
summarise(n = dplyr::n()) |>
200+
dplyr::collect()
201+
202+
# For each of the 3 examples, we select the minimal file_id_db that will
203+
# satisfy the corresponding filters
204+
example_a_all <- metadata |>
205+
dplyr::filter(
206+
.data$ethnicity == "African" &
207+
stringr::str_like(.data$assay, "%10x%") &
208+
.data$tissue == "lung parenchyma" &
209+
stringr::str_like(.data$cell_type, "%CD4%")
210+
) |>
211+
dplyr::pull(.data$file_id_db)
212+
example_a_minimal <- dataset_sizes |>
213+
dplyr::filter(.data$file_id_db %in% example_a_all) |>
214+
dplyr::slice_head(n=5) |>
215+
dplyr::pull(.data$file_id_db)
216+
217+
example_b_all <- metadata |>
218+
dplyr::filter(.data$cell_type_harmonised == "cd14 mono") |>
219+
dplyr::pull(.data$file_id_db)
220+
example_b_minimal <- dataset_sizes |>
221+
dplyr::filter(.data$file_id_db %in% example_b_all) |>
222+
dplyr::slice_head(n=1) |>
223+
dplyr::pull(.data$file_id_db)
224+
225+
example_c_all <- metadata |>
226+
dplyr::filter(.data$cell_type_harmonised == "nk") |>
227+
dplyr::pull(.data$file_id_db)
228+
example_c_minimal <- dataset_sizes |>
229+
dplyr::filter(.data$file_id_db %in% example_c_all) |>
230+
dplyr::slice_head(n=1) |>
231+
dplyr::pull(.data$file_id_db)
232+
233+
# The final dataset is the union of all the selected file IDs
234+
minimal_file_ids <- union(
235+
example_a_minimal,
236+
example_b_minimal
237+
) |>
238+
union(example_c_minimal)
239+
240+
metadata |>
241+
dplyr::filter(.data$file_id_db %in% minimal_file_ids) |>
242+
dplyr::arrange(.data$file_id_db, .data$sample_) |>
243+
dplyr::collect() |>
244+
arrow::write_parquet("sample_meta.parquet")
245+
246+
NULL
247+
}

R/metadata.R

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,22 @@ cache <- rlang::env(
88
metadata_table = rlang::env()
99
)
1010

11+
#' URL pointing to the full metadata file
12+
#' @export
1113
DATABASE_URL <- single_line_str(
1214
"https://object-store.rc.nectar.org.au/v1/
1315
AUTH_06d6e008e3e642da99d806ba3ea629c5/metadata/metadata.0.2.3.parquet"
1416
)
1517

18+
#' URL pointing to the sample metadata file, which is smaller and for test,
19+
#' demonstration, and vignette purposes only
20+
#' @export
21+
SAMPLE_DATABASE_URL <- single_line_str(
22+
"https://object-store.rc.nectar.org.au/v1/
23+
AUTH_06d6e008e3e642da99d806ba3ea629c5/metadata/
24+
sample_metadata.0.2.3.parquet"
25+
)
26+
1627
#' Gets the Curated Atlas metadata as a data frame.
1728
#'
1829
#' Downloads a parquet database of the Human Cell Atlas metadata to a local
@@ -24,7 +35,7 @@ DATABASE_URL <- single_line_str(
2435
#' to the location of the parquet database.
2536
#' @param cache_directory Optional character vector of length 1. A file path on
2637
#' your local system to a directory (not a file) that will be used to store
27-
#' metadata.parquet
38+
#' `metadata.parquet`
2839
#' @param use_cache Optional logical scalar. If `TRUE` (the default), and this
2940
#' function has been called before with the same parameters, then a cached
3041
#' reference to the table will be returned. If `FALSE`, a new connection will

0 commit comments

Comments
 (0)