Skip to content

Commit 4f395ab

Browse files
authored
Merge pull request #203 from r-transit/dev/speed-up-key-check
Improve duplicated primary key check
2 parents 18cf9b4 + 40719ac commit 4f395ab

File tree

2 files changed

+21
-19
lines changed

2 files changed

+21
-19
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ Suggests:
4242
scales,
4343
lubridate,
4444
leaflet
45-
RoxygenNote: 7.2.2
45+
RoxygenNote: 7.2.3
4646
URL: https://github.com/r-transit/tidytransit
4747
BugReports: https://github.com/r-transit/tidytransit
4848
Roxygen: list(markdown = TRUE)

R/validate_gtfs.R

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -215,24 +215,26 @@ validate_gtfs <- function(gtfs_obj, files = NULL, warnings = TRUE) {
215215
#' Check if primary keys are unique within tables
216216
#' @param gtfs_list list of tables
217217
duplicated_primary_keys = function(gtfs_list) {
218-
vapply(names(gtfs_list), function(tbl_name) {
219-
if(tbl_name %in% names(gtfs_meta)) {
220-
id_fields = gtfs_meta[[tbl_name]]$primary_key
218+
stopifnot(inherits(gtfs_list, "list"))
219+
tbl_has_dupl_keys = rep(FALSE, length(gtfs_list))
220+
names(tbl_has_dupl_keys) <- names(gtfs_list)
221+
222+
for(tbl_name in intersect(names(gtfs_list), names(gtfs_meta))) {
223+
id_fields = gtfs_meta[[tbl_name]]$primary_key
224+
225+
if(all(!is.na(id_fields))) {
226+
if(length(id_fields) == 1 && id_fields == "*") {
227+
id_fields <- colnames(gtfs_list[[tbl_name]])
228+
}
221229

222-
if(all(!is.na(id_fields))) {
223-
if(length(id_fields) == 1 && id_fields == "*") {
224-
id_fields <- colnames(gtfs_list[[tbl_name]])
225-
}
226-
227-
# required fields have already been checked in validate_gtfs
228-
id_fields = intersect(colnames(gtfs_list[[tbl_name]]), id_fields)
229-
if(length(id_fields) == 0) {
230-
return(FALSE)
231-
}
232-
primary_key_table = as.data.frame(gtfs_list[[tbl_name]])[,id_fields]
233-
return(any(duplicated(primary_key_table)))
230+
# required fields have already been checked in validate_gtfs
231+
id_fields = intersect(colnames(gtfs_list[[tbl_name]]), id_fields)
232+
if(length(id_fields) > 0) {
233+
primary_key_table = as.data.table(gtfs_list[[tbl_name]])[,id_fields, with = FALSE]
234+
primary_key_table_dupl.index = anyDuplicated(primary_key_table)
235+
tbl_has_dupl_keys[tbl_name] <- any(primary_key_table_dupl.index != 0)
234236
}
235237
}
236-
return(FALSE)
237-
}, logical(1))
238-
}
238+
}
239+
return(tbl_has_dupl_keys)
240+
}

0 commit comments

Comments
 (0)