Skip to content

Commit 28baae4

Browse files
committed
Merge branch 'master' of https://github.com/explodecomputer/alspac into tidy-inst-copy
2 parents 6d03260 + b5d639f commit 28baae4

File tree

2 files changed

+71
-43
lines changed

2 files changed

+71
-43
lines changed

R/dictionary.r

Lines changed: 65 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -130,47 +130,73 @@ updateDictionaries <- function() {
130130
#' @return Data frame dictionary listing available variables.
131131
createDictionary <- function(datadir="Current", name=NULL, quick=FALSE, sourcesFile = NULL) {
132132
stopifnot(datadir %in% c("Current", "../DataBuddy/DataRequests/Waiting Room"))
133+
133134
if(is.null(sourcesFile))
134-
135135
sourcesFile <- system.file("extdata", "sources.csv", package = "alspac")
136-
137-
alspacdir <- options()$alspac_data_dir
138-
datadir <- file.path(alspacdir, datadir)
139-
files <- list.files(datadir,
140-
pattern="dta$",
141-
full.names=TRUE,
142-
recursive=TRUE,
143-
ignore.case=TRUE)
144-
145-
dictionary <- parallel::mclapply(files, function(file) {
146-
cat(date(), "loading", file, "\n")
147-
tryCatch({
148-
merge(
149-
processDTA(file, quick),
150-
createFileTable(file, alspacdir), by = "obj")
151-
}, error=function(e) {
152-
warning("Error loading", file, "\n")
153-
print(e)
154-
NULL
155-
})
156-
}) %>% dplyr::bind_rows()
157-
158-
dictionary <- dictionary[which(dictionary$counts > 0),]
159-
160-
## add data sources information so that withdrawn consent can be
161-
## handled correctly for each variable
162-
dictionary <- addSourcesToDictionary(dictionary, sourcesFile)
163-
164-
if (!is.null(name)) {
165-
saveDictionary(name, dictionary)
166-
}
167-
168-
## Also save a copy in /inst/data/ for devtools::load_all()
169-
inst_path <- file.path("inst", "data")
170-
if (!dir.exists(inst_path)) dir.create(inst_path, recursive = TRUE)
171-
save(list = name, file = file.path(inst_path, paste0(name, ".rdata")))
172-
173-
invisible(dictionary)
136+
137+
alspacdir <- options()$alspac_data_dir
138+
datadir <- file.path(alspacdir, datadir)
139+
140+
# --- NEW list.files section with version handling ---
141+
files <- list.files(datadir,
142+
pattern="dta$",
143+
full.names=TRUE,
144+
recursive=TRUE,
145+
ignore.case=TRUE)
146+
147+
# Extract base name and version (assuming suffix like _1a, _2b etc.)
148+
fnames <- basename(files)
149+
parts <- sub("\\.dta$", "", fnames) # drop extension
150+
base <- sub("_[0-9]+[a-zA-Z]$", "", parts) # everything before version
151+
vers <- sub(".*_", "", parts) # the version part (e.g., 1a, 2b)
152+
153+
# Split numeric and letter parts
154+
num <- suppressWarnings(as.integer(sub("([0-9]+).*", "\\1", vers)))
155+
let <- sub("[0-9]+", "", vers)
156+
157+
# Build table of file info
158+
file_info <- data.frame(
159+
file = files,
160+
base = base,
161+
num = num,
162+
let = let,
163+
stringsAsFactors = FALSE
164+
)
165+
166+
# Keep latest per base (highest number, then highest letter)
167+
latest_files <- file_info |>
168+
dplyr::group_by(base) |>
169+
dplyr::arrange(dplyr::desc(num), dplyr::desc(let)) |>
170+
dplyr::slice_head(n = 1) |>
171+
dplyr::pull(file)
172+
# --- END of new section ---
173+
174+
dictionary <- parallel::mclapply(latest_files, function(file) {
175+
cat(date(), "loading", file, "\n")
176+
tryCatch({
177+
merge(
178+
processDTA(file, quick),
179+
createFileTable(file, alspacdir), by = "obj")
180+
}, error=function(e) {
181+
warning("Error loading", file, "\n")
182+
print(e)
183+
NULL
184+
})
185+
}) %>% dplyr::bind_rows()
186+
187+
dictionary <- dictionary[which(dictionary$counts > 0),]
188+
189+
dictionary <- addSourcesToDictionary(dictionary, sourcesFile)
190+
191+
if (!is.null(name)) {
192+
saveDictionary(name, dictionary)
193+
}
194+
## Also save a copy in /inst/data/ for devtools::load_all()
195+
inst_path <- file.path("inst", "data")
196+
if (!dir.exists(inst_path)) dir.create(inst_path, recursive = TRUE)
197+
save(list = name, file = file.path(inst_path, paste0(name, ".rdata")))
198+
199+
invisible(dictionary)
174200
}
175201

176202
countCharOccurrences <- function(char, s) {

inst/extdata/sources.csv

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
obj,path,mother_clinic,mother_quest,mother,partner_quest,partner_clinic,partner,child_based,child_completed
22
FOF1_,current/clinic/G0/partner/,FALSE,FALSE,FALSE,FALSE,TRUE,TRUE,FALSE,FALSE
3-
FOM1_,current/clinic/G0/partner/,TRUE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
4-
FOM2_,current/clinic/G0/partner/,TRUE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
5-
FOM3_,current/clinic/G0/partner/,TRUE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
6-
FOM4_,current/clinic/G0/partner/,TRUE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
3+
F30_G0P_,current/clinic/G0/partner/,FALSE,FALSE,FALSE,FALSE,TRUE,TRUE,FALSE,FALSE
4+
FOM1_,current/clinic/G0/mother/,TRUE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
5+
FOM2_,current/clinic/G0/mother/,TRUE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
6+
FOM3_,current/clinic/G0/mother/,TRUE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
7+
FOM4_,current/clinic/G0/mother/,TRUE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
8+
F30_G0M_,current/clinic/G0/mother/,TRUE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
79
cif_,current/clinic/G1/,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE
810
f07_,current/clinic/G1/,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE
911
f08_,current/clinic/G1/,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE

0 commit comments

Comments
 (0)