Skip to content

Commit c9a39e3

Browse files
committed
updating read functions, stash
1 parent b469752 commit c9a39e3

File tree

1 file changed

+64
-23
lines changed

1 file changed

+64
-23
lines changed

R/read.R

Lines changed: 64 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,47 @@ read_cells <- function(drfile, sheet, variables, translate = FALSE, translations
2828
#' Read keyvalue pair formatted data from a spreadsheet
2929
#' @param drfile Path to the data reporting file
3030
#' @param sheet The sheet name
31-
#' @param range The range of the data
31+
#' @param ranges A vector of ranges
3232
#' @param translate Whether to translate long variable names to short variable names
3333
#' @param translations A named vector with long variable names as names and short variable names as values
3434
#' @param atomicclass The name of the class to which the values should be coerced, if possible
35+
#' @description
36+
#' The `atomicclass` argument can be a single class name or a vector of class names.
37+
#' If it is a single class name, all values will be coerced to this class. If it
38+
#' is a vector of class names, the length of the vector must be equal to the number
39+
#' of rows in the keyvalue table or equal to the number of columns in a table type
40+
#' range. In this case, each value will be coerced to the class specified in the
41+
#' corresponding element of the vector or column of the table.
3542
#' @return A named list. Values are coerced to character
3643
#' @noRd
3744
#'
38-
read_keyvalue <- function(drfile, sheet, range, translate = FALSE, translations = NULL, atomicclass = "character", ...) {
39-
keyvalue <- readxl::read_excel(drfile, sheet = sheet, range = range, col_names = c("key", "value"))
45+
read_keyvalue <- function(drfile, sheet, ranges, translate = FALSE, translations = NULL, atomicclass = "character", ...) {
46+
47+
chunks <- lapply(ranges, function(range) {
48+
readxl::read_excel(drfile, sheet = sheet, range = range, col_names = c("key", "value"))
49+
})
50+
51+
keyvalue <- do.call(c, chunks) |>
52+
4053
if (translate) {
4154
keyvalue$key <- long_to_shortnames(keyvalue$key, translations)
4255
}
43-
kvlist <- lapply(keyvalue$value, coerce, atomicclass)
56+
57+
if (length(atomicclass) == 1) {
58+
kvlist <- lapply(keyvalue$value, coerce, atomicclass)
59+
} else {
60+
61+
if (!length(atomicclass) == length(keyvalue)) {
62+
rlang::abort(
63+
glue::glue("The number of atomic classes ({ length(atomicclass) }) must be 1 or equal to the
64+
number of elements ({ length(keyvalue) }) in the keyvalue table.")
65+
)
66+
}
67+
68+
kvlist <- lapply(seq_along(keyvalue$value), function(i) {
69+
keyvalue$value[i] |> coerce(atomicclass[i])
70+
})
71+
}
4472
names(kvlist) <- keyvalue$key
4573
kvlist
4674
}
@@ -50,12 +78,28 @@ read_keyvalue <- function(drfile, sheet, range, translate = FALSE, translations
5078
#' @return A data frame in long format
5179
#' @noRd
5280
#'
53-
read_table <- function(drfile, sheet, range, translate = FALSE, translations = NULL, atomicclass = "character", ...) {
54-
# TODO coerce to atomicclass
55-
tbl <- readxl::read_excel(drfile, sheet = sheet, range = range)
81+
read_table <- function(drfile, sheet, ranges, translate = FALSE, translations = NULL, atomicclass = "character", ...) {
82+
tbl <- lapply(ranges, function(range) {
83+
readxl::read_excel(drfile, sheet = sheet, range = range)}) |>
84+
dplyr::bind_rows()
85+
86+
if (length(atomicclass) == 1) {
87+
for (i in seq_along(tbl)) {
88+
tbl[[i]] <- tbl[[i]] |> coerce(atomicclass)
89+
}
90+
} else {
91+
if (!length(atomicclass) == ncol(tbl)) {
92+
rlang::abort("The number of atomic classes must be 1 or equal to the number of columns in the table.")
93+
}
94+
for (i in seq_along(atomicclass)) {
95+
tbl[[i]] <- tbl[[i]] |> coerce(atomicclass[i])
96+
}
97+
}
98+
5699
if (translate) {
57100
names(tbl) <- long_to_shortnames(names(tbl), translations)
58101
}
102+
59103
tbl
60104
}
61105

@@ -72,6 +116,8 @@ plate_to_df <- function(d) {
72116
var = as.matrix(d[, -1]) |>
73117
as.vector()
74118
)
119+
120+
# TODO: handle plate formats generically
75121
names(newdf) <- c("row", "col", var)
76122
newdf
77123
}
@@ -81,9 +127,13 @@ plate_to_df <- function(d) {
81127
#' @inherit read_keyvalue
82128
#' @return A data frame in long format
83129
#' @noRd
84-
read_key_plate <- function(drfile, sheet, range, translate = FALSE, translations = NULL, atomicclass = "character", ...) {
85-
plate <- readxl::read_excel(drfile, sheet = sheet, range = range)
86-
plate_to_df(plate)
130+
read_key_plate <- function(drfile, sheet, ranges, translate = FALSE, translations = NULL, atomicclass = "character", ...) {
131+
chunks <- lapply(ranges, function(range) {
132+
plate <- readxl::read_excel(drfile, sheet = sheet, range = range) |>
133+
plate_to_df()
134+
})
135+
# TODO: handle vectors of atomicclass
136+
suppressMessages(Reduce(dplyr::full_join, chunks))
87137
}
88138

89139
#' Translation between long and short variable names
@@ -162,23 +212,14 @@ read_data <- function(drfile, guide, checkname = FALSE) {
162212
atomicclass <- if ("atomicclass" %in% names(location)) location$atomicclass else "character"
163213

164214
if (!location$type == "cells") {
165-
chunks <- lapply(location$ranges, function(range) {
166-
read_function(drfile = drfile, sheet = location$sheet, range = range, translate = location$translate,
167-
translations = guide$translations, atomicclass = atomicclass)
168-
})
215+
chunk <- read_function(drfile = drfile, sheet = location$sheet, ranges = location$ranges,
216+
translate = location$translate, translations = guide$translations,
217+
atomicclass = atomicclass)
169218
} else {
170-
chunks <- read_cells(drfile = drfile, sheet = location$sheet, variables = location$variables, translate = location$translate,
219+
chunk <- read_cells(drfile = drfile, sheet = location$sheet, variables = location$variables, translate = location$translate,
171220
translations = guide$translations, atomicclass = atomicclass)
172221
}
173222

174-
chunk <- switch(
175-
location$type,
176-
"keyvalue" = do.call(c, chunks),
177-
"table" = dplyr::bind_rows(chunks),
178-
"platedata" = suppressMessages(Reduce(dplyr::full_join, chunks)),
179-
"cells" = chunks
180-
)
181-
182223
if (!(location$varname %in% names(result[[location$type]]))) {
183224
result[[location$type]][[location$varname]] <- chunk
184225
} else {

0 commit comments

Comments
 (0)