|
1 | 1 | # Utility scripts for development purposes, that are not exported to users |
2 | 2 |
|
3 | | -#' Update the metadata database in nectar using a newly created data frame |
4 | | -#' @param metadata The data frame to upload |
5 | | -#' @param version The version for the new metadata as a character scalar, e.g. |
6 | | -#' "0.2.3" |
| 3 | +#' Upload a file to the Nectar object store |
| 4 | +#' @param source A character scalar indicating the local path to the file to |
| 5 | +#' upload |
| 6 | +#' @param container A character scalar indicating the name of the container to |
| 7 | +#' upload to |
| 8 | +#' @param name An optional character scalar indicating the name the file should |
| 9 | +#' have after being uploaded. Defaults to being the basename of the source |
| 10 | +#' file. |
7 | 11 | #' @param credential_id The OpenStack application credential ID as a character |
8 | 12 | #' scalar. This is optional because you can alternatively source a |
9 | 13 | #' `-openrc.sh` file instead of providing it here. |
10 | 14 | #' @param credential_id The OpenStack application credential secret as a |
11 | 15 | #' character scalar |
12 | | -#' @noRd |
13 | | -#' @example |
14 | | -#' \dontrun{ |
15 | | -#' metadata = CuratedAtlasQueryR::get_metadata() |> head(10) |> dplyr::collect() |
16 | | -#' update_database(metadata, "0.2.3", "rfypdlunhrfopdnkrs", "3q5lw3qntafptdfsrdh-wa4p8h") |
17 | | -#' # Prints "metadata.0.2.3.parquet" if successful |
18 | | -#' } |
19 | | -update_database = function(metadata, version, credential_id = NULL, credential_secret = NULL){ |
20 | | - # These are optional dev packages |
21 | | - rlang::check_installed(c("arrow", "glue", "basilisk")) |
22 | | - |
23 | | - # Create parquet |
24 | | - dir <- tempdir() |
25 | | - parquet_name <- glue::glue("metadata.{version}.parquet") |
26 | | - parquet_path <- file.path(dir, parquet_name) |
27 | | - arrow::write_parquet(metadata, sink=parquet_path) |
28 | | - |
| 16 | +#' @return NULL |
| 17 | +#' @keywords internal |
| 18 | +upload_swift = function(source, container, name = basename(source), credential_id = NULL, credential_secret = NULL){ |
29 | 19 | # Create the basilisk environment |
30 | 20 | swift_env <- basilisk::BasiliskEnvironment( |
31 | 21 | envname="swift-nectar-upload", |
@@ -57,13 +47,54 @@ update_database = function(metadata, version, credential_id = NULL, credential_s |
57 | 47 | "06d6e008e3e642da99d806ba3ea629c5", |
58 | 48 | auth, |
59 | 49 | "upload", |
60 | | - "metadata", |
61 | | - parquet_path, |
| 50 | + container, |
| 51 | + source, |
62 | 52 | "--object-name", |
63 | | - parquet_name |
| 53 | + name |
64 | 54 | ) |
65 | 55 |
|
66 | 56 | # Perform the upload |
67 | 57 | system2(reticulate::py_exe(), args=args) |
68 | 58 | basilisk::basiliskStop(proc) |
| 59 | + |
| 60 | + invisible(NULL) |
| 61 | +} |
| 62 | + |
| 63 | +#' Update the metadata database in nectar using a newly created data frame |
| 64 | +#' @param metadata The data frame to upload |
| 65 | +#' @param version The version for the new metadata as a character scalar, e.g. |
| 66 | +#' "0.2.3" |
| 67 | +#' @inheritDotParams upload_swift |
| 68 | +#' @example |
| 69 | +#' \dontrun{ |
| 70 | +#' metadata = CuratedAtlasQueryR::get_metadata() |> head(10) |> dplyr::collect() |
| 71 | +#' update_database(metadata, "0.2.3", credential_id = "ABCDEFGHIJK", credential_secret = "ABCD1234EFGH-5678IJK") |
| 72 | +#' # Prints "metadata.0.2.3.parquet" if successful |
| 73 | +#' } |
| 74 | +#' @keywords internal |
| 75 | +update_database = function(metadata, version, ...){ |
| 76 | + # These are optional dev packages |
| 77 | + rlang::check_installed(c("arrow", "glue", "basilisk")) |
| 78 | + |
| 79 | + dir <- tempdir() |
| 80 | + parquet_name <- glue::glue("metadata.{version}.parquet") |
| 81 | + parquet_path <- file.path(dir, parquet_name) |
| 82 | + arrow::write_parquet(metadata, sink=parquet_path) |
| 83 | + |
| 84 | + upload_swift(parquet_path, container="metadata", name=parquet_name, ...) |
| 85 | +} |
| 86 | + |
| 87 | +#' Update the unharmonised parquet files |
| 88 | +#' @param unharmonised_parquet_dir The path to a directory containing parquet |
| 89 | +#' files, one for each dataset, e.g. |
| 90 | +#' /vast/projects/cellxgene_curated/metadata_non_harmonised_parquet_0.2 |
| 91 | +#' @inheritDotParams upload_swift |
| 92 | +#' @keywords internal |
| 93 | +#' @examples |
| 94 | +#' \dontrun{ |
| 95 | +#' update_unharmonised("/vast/projects/cellxgene_curated/metadata_non_harmonised_parquet_0.2", credential_id = "ABCDEFGHIJK", credential_secret = "ABCD1234EFGH-5678IJK") |
| 96 | +#' } |
| 97 | +update_unharmonised = function(unharmonised_parquet_dir, ...){ |
| 98 | + # name="/" forces it have no prefix, ie be at the top level in the bucket |
| 99 | + upload_swift(unharmonised_parquet_dir, container="unharmonised_metadata", name="/", ...) |
69 | 100 | } |
0 commit comments