Skip to content

Commit dc9030b

Browse files
committed
Merge branch 'development'
Mends a bug and enables the application of vectors of atomicclasses to the differnt types of data structures.
2 parents bef000c + a78209e commit dc9030b

16 files changed

+711
-340
lines changed

R/read.R

Lines changed: 203 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -9,52 +9,106 @@
99
#' @noRd
1010
#'
1111
read_cells <- function(drfile, sheet, variables, translate = FALSE, translations = NULL, atomicclass = 'character') {
12+
# Process each variable
1213
result <- lapply(variables, function(v) {
13-
dims <- dim(cellranger::as.cell_limits(v$cell))
14-
if (any(dims > 1)) {
14+
# Ensure the cell address points to a single cell
15+
if (any(dim(cellranger::as.cell_limits(v$cell)) > 1)) {
1516
rlang::abort(glue::glue("A cell address should point to a single cell."))
1617
}
17-
x <- suppressMessages(readxl::read_excel(drfile, sheet = sheet, range = v$cell, col_names = FALSE))
18-
if (nrow(x) == 0) {
19-
x <- NA
18+
19+
# Read the cell value
20+
cell_data <- suppressMessages(
21+
readxl::read_excel(drfile, sheet = sheet, range = v$cell, col_names = FALSE)
22+
)
23+
24+
# Handle empty cells
25+
if (nrow(cell_data) == 0) {
26+
NA
2027
} else {
21-
x <- x[[1]][1]
28+
cell_data[[1]][1]
2229
}
2330
})
24-
result <- stats::setNames(result, sapply(variables, function(x) x$name))
31+
32+
# Assign names to the results
33+
result <- stats::setNames(result, sapply(variables, `[[`, "name"))
34+
35+
# Coerce values to the specified atomic class
2536
lapply(result, coerce, atomicclass)
2637
}
2738

2839
#' Read keyvalue pair formatted data from a spreadsheet
2940
#' @param drfile Path to the data reporting file
3041
#' @param sheet The sheet name
31-
#' @param range The range of the data
42+
#' @param ranges A vector of ranges
3243
#' @param translate Whether to translate long variable names to short variable names
3344
#' @param translations A named vector with long variable names as names and short variable names as values
3445
#' @param atomicclass The name of the class to which the values should be coerced, if possible
46+
#' @description
47+
#' The `atomicclass` argument can be a single class name or a vector of class names.
48+
#' If it is a single class name, all values will be coerced to this class. If it
49+
#' is a vector of class names, the length of the vector must be equal to the number
50+
#' of rows in the keyvalue table or equal to the number of columns in a table type
51+
#' range. In this case, each value will be coerced to the class specified in the
52+
#' corresponding element of the vector or column of the table.
3553
#' @return A named list. Values are coerced to character
3654
#' @noRd
3755
#'
38-
read_keyvalue <- function(drfile, sheet, range, translate = FALSE, translations = NULL, atomicclass = "character", ...) {
39-
keyvalue <- readxl::read_excel(drfile, sheet = sheet, range = range, col_names = c("key", "value"))
56+
read_keyvalue <- function(drfile, sheet, ranges, translate = FALSE, translations = NULL, atomicclass = "character", ...) {
57+
# Read and combine key-value pairs from the specified ranges
58+
kvtable <- lapply(ranges, function(range) {
59+
readxl::read_excel(drfile, sheet = sheet, range = range, col_names = c("key", "value"))
60+
}) |> dplyr::bind_rows()
61+
62+
# Translate keys if required
4063
if (translate) {
41-
keyvalue$key <- long_to_shortnames(keyvalue$key, translations)
64+
kvtable$key <- long_to_shortnames(kvtable$key, translations)
4265
}
43-
kvlist <- lapply(keyvalue$value, coerce, atomicclass)
44-
names(kvlist) <- keyvalue$key
45-
kvlist
66+
67+
# Convert values to a list
68+
kvlist <- as.list(kvtable$value)
69+
70+
# Coerce values to the specified atomic class
71+
kvlist <- if (length(atomicclass) == 1) {
72+
lapply(kvlist, coerce, atomicclass)
73+
} else {
74+
if (length(atomicclass) != length(kvlist)) {
75+
rlang::abort(glue::glue(
76+
"The number of atomic classes ({length(atomicclass)}) must be 1 or equal to the number of elements ({length(kvlist)}) in the key-value table."
77+
))
78+
}
79+
mapply(coerce, kvlist, atomicclass, SIMPLIFY = FALSE)
80+
}
81+
82+
# Return a named list with keys and coerced values
83+
setNames(kvlist, kvtable$key)
4684
}
4785

4886
#' Read table formatted data from a spreadsheet
4987
#' @inherit read_keyvalue
5088
#' @return A data frame in long format
5189
#' @noRd
5290
#'
53-
read_table <- function(drfile, sheet, range, translate = FALSE, translations = NULL, atomicclass = "character", ...) {
54-
tbl <- readxl::read_excel(drfile, sheet = sheet, range = range)
91+
read_table <- function(drfile, sheet, ranges, translate = FALSE, translations = NULL, atomicclass = "character", ...) {
92+
# Read and combine data from the specified ranges
93+
tbl <- lapply(ranges, function(range) {
94+
readxl::read_excel(drfile, sheet = sheet, range = range)
95+
}) |> dplyr::bind_rows()
96+
97+
# Coerce columns to the specified atomic class
98+
if (length(atomicclass) == 1) {
99+
tbl[] <- lapply(tbl, coerce, atomicclass)
100+
} else {
101+
if (length(atomicclass) != ncol(tbl)) {
102+
rlang::abort("The number of atomic classes must be 1 or equal to the number of columns in the table.")
103+
}
104+
tbl[] <- Map(coerce, tbl, atomicclass)
105+
}
106+
107+
# Translate column names if required
55108
if (translate) {
56109
names(tbl) <- long_to_shortnames(names(tbl), translations)
57110
}
111+
58112
tbl
59113
}
60114

@@ -71,6 +125,8 @@ plate_to_df <- function(d) {
71125
var = as.matrix(d[, -1]) |>
72126
as.vector()
73127
)
128+
129+
# TODO: handle plate formats generically
74130
names(newdf) <- c("row", "col", var)
75131
newdf
76132
}
@@ -80,9 +136,34 @@ plate_to_df <- function(d) {
80136
#' @inherit read_keyvalue
81137
#' @return A data frame in long format
82138
#' @noRd
83-
read_key_plate <- function(drfile, sheet, range, translate = FALSE, translations = NULL, atomicclass = "character", ...) {
84-
plate <- readxl::read_excel(drfile, sheet = sheet, range = range)
85-
plate_to_df(plate)
139+
read_key_plate <- function(drfile, sheet, ranges, translate = FALSE, translations = NULL, atomicclass = "character", ...) {
140+
# Read and convert each range to a long-format data frame
141+
chunks <- lapply(ranges, function(range) {
142+
plate <- readxl::read_excel(drfile, sheet = sheet, range = range) |>
143+
plate_to_df()
144+
})
145+
146+
# Combine all chunks into a single data frame
147+
combined <- suppressMessages(Reduce(dplyr::full_join, chunks))
148+
149+
# Handle vectors of atomicclass
150+
if (length(atomicclass) == 1) {
151+
combined[] <- lapply(combined, coerce, atomicclass)
152+
} else {
153+
if (length(atomicclass) != ncol(combined)) {
154+
rlang::abort(glue::glue(
155+
"The number of atomic classes ({length(atomicclass)}) must be 1 or equal to the number of columns ({ncol(combined)}) in the combined data frame."
156+
))
157+
}
158+
combined[] <- Map(coerce, combined, atomicclass)
159+
}
160+
161+
# Translate column names if required
162+
if (translate) {
163+
names(combined) <- long_to_shortnames(names(combined), translations)
164+
}
165+
166+
combined
86167
}
87168

88169
#' Translation between long and short variable names
@@ -95,25 +176,34 @@ read_key_plate <- function(drfile, sheet, range, translate = FALSE, translations
95176
#' @return A vector of long or short variable names
96177
#' @export
97178
long_to_shortnames <- function(v, translations) {
179+
# Match long names to their corresponding short names
98180
positions <- match(v, translations$long)
99181
shortnames <- translations$short[positions]
100-
if (any (is.na(positions))) {
182+
183+
# Handle missing translations
184+
missing_indices <- is.na(positions)
185+
if (any(missing_indices)) {
101186
rlang::warn("Missing translations. Using original long names.")
102-
shortnames[is.na(positions)] <- v[is.na(positions)]
187+
shortnames[missing_indices] <- v[missing_indices]
103188
}
189+
104190
shortnames
105191
}
106192

107193
#' @return A vector of long variable names
108194
#' @rdname long_to_shortnames
109195
#' @export
110196
short_to_longnames <- function(v, translations) {
197+
# Match short names to their corresponding long names
111198
positions <- match(v, translations$short)
112199
longnames <- translations$long[positions]
113-
if (any(is.na(positions))) {
114-
rlang::warn("Missing reverse translations. Using short names.")
200+
201+
# Handle missing translations
202+
if (anyNA(positions)) {
203+
rlang::warn("Missing reverse translations. Using original short names.")
115204
longnames[is.na(positions)] <- v[is.na(positions)]
116205
}
206+
117207
longnames
118208
}
119209

@@ -137,87 +227,121 @@ short_to_longnames <- function(v, translations) {
137227
#' @export
138228
#'
139229
read_data <- function(drfile, guide, checkname = FALSE) {
140-
230+
# Load the guide if it's a file path
141231
if (inherits(guide, "character")) {
142-
# If 'guide' is a file path then read the guide
143232
guide <- read_guide(guide)
144-
} else {
145-
if (! inherits(guide, "guide")) {
146-
cl <- class(guide)
147-
rlang::abort(glue::glue("The guide must be a path (character) to a guide file or a reporting template guide object (guide object), not an object of class {cl}."))
148-
}
233+
} else if (!inherits(guide, "guide")) {
234+
rlang::abort(glue::glue(
235+
"The guide must be a path (character) to a guide file or a reporting template guide object (guide object), not an object of class {class(guide)}."
236+
))
149237
}
150238

151239
result <- list()
152240

241+
# Process each location in the guide
153242
for (location in guide$locations) {
154243
read_function <- switch(
155244
location$type,
156245
"keyvalue" = read_keyvalue,
157246
"table" = read_table,
158-
"platedata" = read_key_plate
247+
"platedata" = read_key_plate,
248+
"cells" = read_cells,
249+
rlang::abort(glue::glue("Unsupported location type: {location$type}"))
159250
)
160251

161-
atomicclass <- if ("atomicclass" %in% names(location)) location$atomicclass else "character"
252+
atomicclass <- location$atomicclass %||% "character"
162253

163-
if (!location$type == "cells") {
164-
chunks <- lapply(location$ranges, function(range) {
165-
read_function(drfile = drfile, sheet = location$sheet, range = range, translate = location$translate,
166-
translations = guide$translations, atomicclass = atomicclass)
167-
})
254+
# Read data using the appropriate function
255+
chunk <- if (location$type == "cells") {
256+
read_function(
257+
drfile = drfile,
258+
sheet = location$sheet,
259+
variables = location$variables,
260+
translate = location$translate,
261+
translations = guide$translations,
262+
atomicclass = atomicclass
263+
)
168264
} else {
169-
chunks <- read_cells(drfile = drfile, sheet = location$sheet, variables = location$variables, translate = location$translate,
170-
translations = guide$translations, atomicclass = atomicclass)
265+
read_function(
266+
drfile = drfile,
267+
sheet = location$sheet,
268+
ranges = location$ranges,
269+
translate = location$translate,
270+
translations = guide$translations,
271+
atomicclass = atomicclass
272+
)
171273
}
172274

173-
chunk <- switch(
174-
location$type,
175-
"keyvalue" = do.call(c, chunks),
176-
"table" = dplyr::bind_rows(chunks),
177-
"platedata" = suppressMessages(Reduce(dplyr::full_join, chunks)),
178-
"cells" = chunks
275+
# Combine results
276+
result[[location$type]][[location$varname]] <- combine_results(
277+
result[[location$type]][[location$varname]],
278+
chunk,
279+
location$type
179280
)
281+
}
180282

181-
if (!(location$varname %in% names(result[[location$type]]))) {
182-
result[[location$type]][[location$varname]] <- chunk
183-
} else {
184-
result[[location$type]][[location$varname]] <- switch(
185-
location$type,
186-
"keyvalue" = c(result[[location$type]][[location$varname]], chunk),
187-
"table" = dplyr::bind_rows(result[[location$type]][[location$varname]], chunk),
188-
"platedata" = suppressMessages(dplyr::full_join(result[[location$type]][[location$varname]], chunk)),
189-
"cells" = c(result[[location$type]][[location$varname]], chunk),
190-
)
191-
}
283+
# Validate template version
284+
validate_template_version(result$cells$.template$version, guide)
285+
286+
# Check template name if required
287+
if (checkname && guide$template.name != result$template.metadata$template.name) {
288+
rlang::abort(glue::glue(
289+
"The name of the guide ({guide$template.name}) does not match the name of the excel template ({result$template.metadata$template.name})."
290+
))
192291
}
193292

194-
template.version <- result$cells$.template$version
195-
if (grepl("^\\d+$", template.version)) {
196-
template.version <- paste0(template.version, ".0")
197-
rlang::warn(glue::glue("Incorrectly formatted template version number '{result$cells$.template$version}'. Version
198-
numbers must have a minor number. Will interpret '{result$cells$.template$version}' as '{template.version}'."))
293+
result$.sourcefile <- drfile
294+
result$.guide <- guide
295+
result
296+
}
297+
298+
#' Helper function to combine results based on location type
299+
#' @param existing The existing data
300+
#' @param chunk The new data
301+
#' @param type The location type
302+
#' @noRd
303+
combine_results <- function(existing, chunk, type) {
304+
if (is.null(existing)) {
305+
return(chunk)
199306
}
200-
num.template.version <- package_version(template.version)
201-
num.min.version <- package_version(guide$template.min.version)
202-
if (num.template.version < num.min.version) {
203-
rlang::abort(glue::glue("The guide is incompatible with the template.
204-
The template version should be minimally {guide$template.min.version}, whereas it is {result$template.metadata$template.version}."))
307+
308+
switch(
309+
type,
310+
"keyvalue" = c(existing, chunk),
311+
"table" = dplyr::bind_rows(existing, chunk),
312+
"platedata" = suppressMessages(dplyr::full_join(existing, chunk)),
313+
"cells" = c(existing, chunk),
314+
rlang::abort(glue::glue("Unsupported location type for combining results: {type}"))
315+
)
316+
}
317+
318+
#' Helper function to validate template
319+
#' @param template_version The version of the template
320+
#' @param guide The guide object
321+
#' @noRd
322+
validate_template_version <- function(template_version, guide) {
323+
if (grepl("^\\d+$", template_version)) {
324+
template_version <- paste0(template_version, ".0")
325+
rlang::warn(glue::glue(
326+
"Incorrectly formatted template version number '{template_version}'. Version numbers must have a minor number. Interpreting as '{template_version}'."
327+
))
205328
}
206-
if (!is.null(guide$template.max.version)) {
207-
num.max.version <- package_version(guide$template.max.version)
208-
if (num.max.version < num.template.version) {
209-
rlang::abort(glue::glue("The guide is incompatible with the template.
210-
The template version should be maximally {guide$template.max.version}, whereas it is {result$template.metadata$template.version}."))
211-
}
329+
330+
num_template_version <- package_version(template_version)
331+
num_min_version <- package_version(guide$template.min.version)
332+
333+
if (num_template_version < num_min_version) {
334+
rlang::abort(glue::glue(
335+
"The guide is incompatible with the template. The template version should be at least {guide$template.min.version}, but it is {template_version}."
336+
))
212337
}
213338

214-
if (checkname) {
215-
if (guide$template.name != result$template.metadata$template.name) {
216-
rlang::abort(glue::glue("The name of the guide ({guide$template.name}) does not match the name of the excel template ({result$template.metadata$template.name})."))
339+
if (!is.null(guide$template.max.version)) {
340+
num_max_version <- package_version(guide$template.max.version)
341+
if (num_template_version > num_max_version) {
342+
rlang::abort(glue::glue(
343+
"The guide is incompatible with the template. The template version should be at most {guide$template.max.version}, but it is {template_version}."
344+
))
217345
}
218346
}
219-
220-
result$.sourcefile <- drfile
221-
result$.guide <- guide
222-
result
223347
}

0 commit comments

Comments
 (0)