-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathcreate_yaml_for_samples.R
More file actions
55 lines (45 loc) · 1.82 KB
/
create_yaml_for_samples.R
File metadata and controls
55 lines (45 loc) · 1.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
library(tidyverse)
library(argparse)
library(tools)
parser <- ArgumentParser(description='Automatically generate sample YAML file for pyspi-distribute.')
parser$add_argument("--data_dir", help="Directory containing samples' .npy files.")
parser$add_argument("--sample_metadata", help="OPTIONAL: CSV file containing sample metadata info.")
parser$add_argument("--label_vars", help="OPTIONAL: columns in metadata to use as labels for YAML.")
parser$add_argument("--overwrite", help="Should sample.yaml be overwritten if it already exists? Default is F.",
action="store_true", default=FALSE)
# Parse arguments
args <- parser$parse_args()
data_dir <- args$data_dir
metadata <- args$sample_metadata
meta_vars <- args$label_vars
overwrite <- args$overwrite
if (!endsWith(data_dir, '/')) {
data_dir <- paste0(data_dir, "/")
}
npy_files <- list.files(data_dir, pattern="*.npy")
yaml_file <- paste0(data_dir, "sample.yaml")
cat("\nYAML output:", yaml_file, "\n")
if (!is.null(metadata) & !is.null(meta_vars)) {
metadata_data <- read.csv(metadata)
}
if (!file.exists(yaml_file) | overwrite) {
cat("\nNow creating sample.yaml\n")
file.create(yaml_file)
yaml_string <- "- {file: %s, name: %s, dim_order: sp, labels: [%s] }\n"
for (npy in npy_files) {
sample_ID <- tools::file_path_sans_ext(npy)
if (!is.null(metadata) & !is.null(meta_vars)) {
sample_data <- metadata_data %>%
dplyr::filter(sampleID == sample_ID) %>%
dplyr::select(meta_vars)
sample_data_vector <- paste(as.vector(sample_data[1,]), collapse=",")
} else{
sample_data_vector <- ""
}
sample_string <- sprintf(yaml_string,
paste0(data_dir, npy),
sample_ID,
sample_data_vector)
write_file(sample_string, yaml_file, append=T)
}
}