@@ -130,47 +130,73 @@ updateDictionaries <- function() {
130130# ' @return Data frame dictionary listing available variables.
131131createDictionary <- function (datadir = " Current" , name = NULL , quick = FALSE , sourcesFile = NULL ) {
132132 stopifnot(datadir %in% c(" Current" , " ../DataBuddy/DataRequests/Waiting Room" ))
133+
133134 if (is.null(sourcesFile ))
134-
135135 sourcesFile <- system.file(" extdata" , " sources.csv" , package = " alspac" )
136-
137- alspacdir <- options()$ alspac_data_dir
138- datadir <- file.path(alspacdir , datadir )
139- files <- list.files(datadir ,
140- pattern = " dta$" ,
141- full.names = TRUE ,
142- recursive = TRUE ,
143- ignore.case = TRUE )
144-
145- dictionary <- parallel :: mclapply(files , function (file ) {
146- cat(date(), " loading" , file , " \n " )
147- tryCatch({
148- merge(
149- processDTA(file , quick ),
150- createFileTable(file , alspacdir ), by = " obj" )
151- }, error = function (e ) {
152- warning(" Error loading" , file , " \n " )
153- print(e )
154- NULL
155- })
156- }) %> % dplyr :: bind_rows()
157-
158- dictionary <- dictionary [which(dictionary $ counts > 0 ),]
159-
160- # # add data sources information so that withdrawn consent can be
161- # # handled correctly for each variable
162- dictionary <- addSourcesToDictionary(dictionary , sourcesFile )
163-
164- if (! is.null(name )) {
165- saveDictionary(name , dictionary )
166- }
167-
168- # # Also save a copy in /inst/data/ for devtools::load_all()
169- inst_path <- file.path(" inst" , " data" )
170- if (! dir.exists(inst_path )) dir.create(inst_path , recursive = TRUE )
171- save(list = name , file = file.path(inst_path , paste0(name , " .rdata" )))
172-
173- invisible (dictionary )
136+
137+ alspacdir <- options()$ alspac_data_dir
138+ datadir <- file.path(alspacdir , datadir )
139+
140+ # --- NEW list.files section with version handling ---
141+ files <- list.files(datadir ,
142+ pattern = " dta$" ,
143+ full.names = TRUE ,
144+ recursive = TRUE ,
145+ ignore.case = TRUE )
146+
147+ # Extract base name and version (assuming suffix like _1a, _2b etc.)
148+ fnames <- basename(files )
149+ parts <- sub(" \\ .dta$" , " " , fnames ) # drop extension
150+ base <- sub(" _[0-9]+[a-zA-Z]$" , " " , parts ) # everything before version
151+ vers <- sub(" .*_" , " " , parts ) # the version part (e.g., 1a, 2b)
152+
153+ # Split numeric and letter parts
154+ num <- suppressWarnings(as.integer(sub(" ([0-9]+).*" , " \\ 1" , vers )))
155+ let <- sub(" [0-9]+" , " " , vers )
156+
157+ # Build table of file info
158+ file_info <- data.frame (
159+ file = files ,
160+ base = base ,
161+ num = num ,
162+ let = let ,
163+ stringsAsFactors = FALSE
164+ )
165+
166+ # Keep latest per base (highest number, then highest letter)
167+ latest_files <- file_info | >
168+ dplyr :: group_by(base ) | >
169+ dplyr :: arrange(dplyr :: desc(num ), dplyr :: desc(let )) | >
170+ dplyr :: slice_head(n = 1 ) | >
171+ dplyr :: pull(file )
172+ # --- END of new section ---
173+
174+ dictionary <- parallel :: mclapply(latest_files , function (file ) {
175+ cat(date(), " loading" , file , " \n " )
176+ tryCatch({
177+ merge(
178+ processDTA(file , quick ),
179+ createFileTable(file , alspacdir ), by = " obj" )
180+ }, error = function (e ) {
181+ warning(" Error loading" , file , " \n " )
182+ print(e )
183+ NULL
184+ })
185+ }) %> % dplyr :: bind_rows()
186+
187+ dictionary <- dictionary [which(dictionary $ counts > 0 ),]
188+
189+ dictionary <- addSourcesToDictionary(dictionary , sourcesFile )
190+
191+ if (! is.null(name )) {
192+ saveDictionary(name , dictionary )
193+ }
194+ # # Also save a copy in /inst/data/ for devtools::load_all()
195+ inst_path <- file.path(" inst" , " data" )
196+ if (! dir.exists(inst_path )) dir.create(inst_path , recursive = TRUE )
197+ save(list = name , file = file.path(inst_path , paste0(name , " .rdata" )))
198+
199+ invisible (dictionary )
174200}
175201
176202countCharOccurrences <- function (char , s ) {
0 commit comments