Skip to content

Commit d92f27c

Browse files
committed
Merge branch 'development'
merging updates from the development branch
2 parents 6271cdd + fd8a7e6 commit d92f27c

16 files changed

+248
-487
lines changed

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Imports:
2323
glue,
2424
readxl,
2525
rlang,
26+
stats,
2627
stringr,
2728
tibble,
2829
yaml

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ export(short_to_longnames)
1111
export(star_to_number)
1212
export(well_from_rowcol)
1313
importFrom(rlang,.data)
14+
importFrom(stats,setNames)

R/excelDataGuide-package.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#' @keywords internal
2+
"_PACKAGE"
3+
4+
## usethis namespace: start
5+
#' @importFrom stats setNames
6+
## usethis namespace: end
7+
NULL

R/guide.R

Lines changed: 108 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -14,32 +14,43 @@ read_guide <- function(path) {
1414
guide <- yaml::read_yaml(path)
1515
check_guide(guide)
1616

17-
if ('translations' %in% names(guide)) {
18-
17+
if ("translations" %in% names(guide)) {
1918
guide$translations <- dplyr::bind_rows(guide$translations)
2019

21-
# translation long and short names must be unique
22-
if (anyDuplicated(guide$translations$long)) {
23-
dup <- unique(guide$translations$long[duplicated(guide$translations$long)])
24-
rlang::abort("Duplicate keys in long names of the translations: {paste0(dup, collapse=', ')}")
25-
}
26-
27-
if (anyDuplicated(guide$translations$short)) {
28-
dup <- unique(guide$translations$short[duplicated(guide$translations$short)])
29-
rlang::abort("Duplicate keys in short names of the translations: {paste0(dup, collapse=', ')}")
30-
}
20+
# Ensure unique long and short names in translations
21+
validate_unique_names(guide$translations$long, "long")
22+
validate_unique_names(guide$translations$short, "short")
3123

24+
# Ensure reserved 'File path' and '.sourcefile' mapping
3225
if (!(".sourcefile" %in% guide$translations$short)) {
33-
if ('File path' %in% guide$translations$long) {
26+
if ("File path" %in% guide$translations$long) {
3427
rlang::abort("The 'long' variable name 'File path' is reserved for the 'short' name '.sourcefile' exclusively.")
3528
}
36-
guide$translations <- dplyr::bind_rows(guide$translations, list(long='File path', short='.sourcefile'))
29+
guide$translations <- dplyr::bind_rows(
30+
guide$translations,
31+
list(long = "File path", short = ".sourcefile")
32+
)
3733
}
3834
}
3935

4036
structure(guide, class = "guide")
4137
}
4238

39+
#' Helper function to validate unique names in translations
40+
#' @param names_vector A vector of names to check for uniqueness
41+
#' @param name_type The type of names to check (e.g., "long" or "short")
42+
#' @return An error message or nothing
43+
#' @noRd
44+
#'
45+
validate_unique_names <- function(names_vector, name_type) {
46+
if (anyDuplicated(names_vector)) {
47+
duplicates <- unique(names_vector[duplicated(names_vector)])
48+
rlang::abort(glue::glue(
49+
"Duplicate keys in {name_type} names of the translations: {paste0(duplicates, collapse = ', ')}"
50+
))
51+
}
52+
}
53+
4354
#' Abort if the guide does not contain all required elements
4455
#' @param guide A spreadsheet guide object
4556
#' @return An error message or nothing
@@ -48,40 +59,74 @@ read_guide <- function(path) {
4859
check_guide <- function(guide) {
4960
## NOTE: Most of the validation of a guide should be performed using the JSON schema
5061

51-
# TODO: make translations optional
62+
# Ensure translations are optional
63+
if (!"translations" %in% names(guide)) {
64+
guide$translations <- NULL
65+
}
5266

53-
# Conditionally required element plate.format in case we have platedata
54-
types <- unique(sapply(guide$locations, function(x) x$type))
55-
if ('platedata' %in% types) {
56-
if (!('plate.format' %in% names(guide))) {
57-
rlang::abort("The spreadsheet guide must contain the 'plate.format' element when 'platedata' is present in the locations.")
58-
}
59-
if (!(as.character(guide$plate.format) %in% names(.plateformats))) {
60-
rlang::abort(glue::glue("The plate format in the spreadsheet guide is not valid. It must be one of '24', '48', '96' or '384'."))
61-
}
67+
# Validate plate.format if platedata is present
68+
if ("platedata" %in% unique(sapply(guide$locations, `[[`, "type"))) {
69+
validate_plate_format(guide)
6270
}
6371

64-
# Check content of locations
65-
for (i in seq_along(guide$locations)) {
66-
if (guide$locations[[i]]$type == 'platedata') {
67-
for (range in guide$locations[[i]]$ranges) {
68-
check_dim(
69-
range,
70-
required_rows = .plateformats[[as.character(guide$plate.format)]]$rows + 1,
71-
required_cols = .plateformats[[as.character(guide$plate.format)]]$cols + 1
72-
)
73-
}
74-
} else {
75-
if (guide$locations[[i]]$type == 'cells') {
76-
dims <- dim(cellranger::as.cell_limits(guide$locations[[i]]$ranges[1]))
77-
# if (range.dim[range.dim > 1] != length(varnames)) {
78-
# rlang::abort(glue::glue("The length of the range ({range.dim[range.dim > 1]}) differs from the number of variables given ({length(varnames)})."))
79-
# }
80-
}
81-
}
72+
# Validate each location in the guide
73+
lapply(guide$locations, validate_location, guide)
74+
}
75+
76+
#' Helper function to validate plate.format
77+
#' @param guide A spreadsheet guide object
78+
#' @return An error message or nothing
79+
#' @noRd
80+
#'
81+
validate_plate_format <- function(guide) {
82+
if (!"plate.format" %in% names(guide)) {
83+
rlang::abort("The spreadsheet guide must contain the 'plate.format' element when 'platedata' is present in the locations.")
84+
}
85+
if (!(as.character(guide$plate.format) %in% names(.plateformats))) {
86+
rlang::abort(glue::glue("The plate format in the spreadsheet guide is not valid. It must be one of '24', '48', '96', or '384'."))
8287
}
8388
}
8489

90+
#' Helper function to validate a single location
91+
#' @param location A location object from the guide
92+
#' @param guide A spreadsheet guide object
93+
#' @return An error message or nothing
94+
#' @noRd
95+
#'
96+
validate_location <- function(location, guide) {
97+
if (location$type == "platedata") {
98+
validate_platedata_ranges(location$ranges, guide$plate.format)
99+
} else if (location$type == "cells") {
100+
validate_cells(location$ranges)
101+
}
102+
}
103+
104+
#' Helper function to validate platedata ranges
105+
#' @param ranges A list of ranges for platedata
106+
#' @param plate_format The plate format specified in the guide
107+
#' @return An error message or nothing
108+
#' @noRd
109+
#'
110+
validate_platedata_ranges <- function(ranges, plate_format) {
111+
for (range in ranges) {
112+
check_dim(
113+
range,
114+
required_rows = .plateformats[[as.character(plate_format)]]$rows + 1,
115+
required_cols = .plateformats[[as.character(plate_format)]]$cols + 1
116+
)
117+
}
118+
}
119+
120+
#' Helper function to validate cell ranges
121+
#' @param ranges A list of ranges for cells
122+
#' @return An error message or nothing
123+
#' @noRd
124+
#'
125+
validate_cells <- function(ranges) {
126+
dims <- dim(cellranger::as.cell_limits(ranges[1]))
127+
# TODO: add additional validation logic for cells if needed
128+
}
129+
85130
#' Function to check the dimensions of a range
86131
#' @param range A range object
87132
#' @param required_rows The required number of rows
@@ -91,16 +136,32 @@ check_guide <- function(guide) {
91136
#'
92137
check_dim <- function(range, required_rows = NA, required_cols = NA) {
93138
if (is.na(required_rows) && is.na(required_cols)) {
94-
rlang::abort("You must specify at least one of 'required_rows' or 'required_cols'")
139+
rlang::abort("You must specify at least one of 'required_rows' or 'required_cols'.")
95140
}
141+
96142
dims <- dim(cellranger::as.cell_limits(range))
97-
if (all(!is.na(c(required_rows, required_cols))) && any(dims != c(required_rows, required_cols))) {
98-
rlang::abort(glue::glue("The range {range} does not have the required dimensions. Expected: {required_rows} rows and {required_cols} columns"))
143+
144+
# Validate both rows and columns if both are specified
145+
if (!is.na(required_rows) && !is.na(required_cols)) {
146+
if (!all(dims == c(required_rows, required_cols))) {
147+
rlang::abort(glue::glue(
148+
"The range {range} does not have the required dimensions. Expected: {required_rows} rows and {required_cols} columns."
149+
))
150+
}
151+
return() # Exit early if both dimensions are validated
99152
}
153+
154+
# Validate rows if specified
100155
if (!is.na(required_rows) && dims[1] != required_rows) {
101-
rlang::abort(glue::glue("The range {range} does not have the required number of rows. Expected: {required_rows}"))
156+
rlang::abort(glue::glue(
157+
"The range {range} does not have the required number of rows. Expected: {required_rows}."
158+
))
102159
}
160+
161+
# Validate columns if specified
103162
if (!is.na(required_cols) && dims[2] != required_cols) {
104-
rlang::abort(glue::glue("The range {range} does not have the required number of columns. Expected: {required_cols}"))
163+
rlang::abort(glue::glue(
164+
"The range {range} does not have the required number of columns. Expected: {required_cols}."
165+
))
105166
}
106167
}

R/read.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ read_table <- function(drfile, sheet, ranges, translate = FALSE, translations =
118118
#' @noRd
119119
#'
120120
plate_to_df <- function(d) {
121+
# TODO: handle plate formats generically
121122
var <- names(d)[1]
122123
newdf <- tibble::tibble(
123124
row = rep(LETTERS[1:8], 12),
@@ -126,7 +127,6 @@ plate_to_df <- function(d) {
126127
as.vector()
127128
)
128129

129-
# TODO: handle plate formats generically
130130
names(newdf) <- c("row", "col", var)
131131
newdf
132132
}

R/utils.R

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,53 @@ has_star <- function(x) {
5757
#' `format(as.POSIXct(x, tz=""), format="%Y-%m-%d")` to get a string with format
5858
#' YYYY-MM-DD.
5959
#'
60-
# TODO: check the previous statements
60+
# TODO: check the previous and following statements
61+
#
62+
# ACCORDING TO Copilot:
63+
# ### **Analysis**:
64+
# 1. **Excel Date Storage**:
65+
# - Excel does **not** store dates as the number of days since January 1, 1970. Instead:
66+
# - Excel stores dates as the number of days since **January 1, 1900** (for Windows systems)
67+
# or **January 1, 1904** (for macOS systems).
68+
# - Excel also incorrectly assumes that 1900 was a leap year, which introduces an offset of
69+
# 1 day for dates before March 1, 1900.
70+
#
71+
# 2. **`read_excel` Behavior**:
72+
# - When using `readxl::read_excel`, Excel dates are typically read as numeric values representing
73+
# the number of days since Excel's epoch (e.g., 1900 or 1904). These values are **not
74+
# automatically converted to POSIXct** by `read_excel`. The user must manually convert them.
75+
#
76+
# 3. **POSIXct Conversion**:
77+
# - The description mentions converting the number to a date using `as.POSIXct(as.integer(x))`.
78+
# However:
79+
# - This assumes that the numeric value `x` is already in seconds since January 1, 1970, which
80+
# is not the case for Excel dates.
81+
# - To convert Excel dates to R's `POSIXct`, you need to account for Excel's epoch (e.g.,
82+
# subtract the appropriate offset for 1900 or 1904).
83+
#
84+
# 4. **Formatting**:
85+
# - The description correctly states that `format(as.POSIXct(x, tz=""), format="%Y-%m-%d")`
86+
# can be used to format a `POSIXct` object as a string in the `YYYY-MM-DD` format.
87+
#
88+
# ### **Corrected Description**:
89+
# Here’s a revised and accurate version of the description:
90+
#
91+
# Excel stores dates as numeric values representing the number of days since
92+
# January 1, 1900 (Windows) or January 1, 1904 (macOS). Note that Excel's 1900
93+
# date system incorrectly assumes 1900 was a leap year, which introduces a
94+
# 1-day offset for dates before March 1, 1900.
95+
#
96+
# When read using `readxl::read_excel`, Excel dates are imported as numeric
97+
# values. To convert these to R's `POSIXct` class, you must account for Excel's
98+
# epoch. For example, subtract 25569 days (the number of days between January 1,
99+
# 1900, and January 1, 1970) and convert to seconds by multiplying by 86400.
100+
#
101+
# Example conversion:
102+
# `as.POSIXct((x - 25569) * 86400, origin = "1970-01-01", tz = "")`
103+
#
104+
# To format the date as a string in `YYYY-MM-DD` format, use:
105+
# `format(as.POSIXct(...), format = "%Y-%m-%d")`.
106+
#
61107
#' @return A vector of the specified atomic class
62108
#' @noRd
63109
coerce <- function(x, atomicclass) {

0 commit comments

Comments
 (0)