Skip to content

Commit 5d0b166

Browse files
hjg
1 parent af1f0a7 commit 5d0b166

File tree

2 files changed

+30
-46
lines changed

2 files changed

+30
-46
lines changed

R/bmerge.R

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
7575
condition <- structure(
7676
list(
7777
message = condition_message,
78-
call = NULL,
7978
c_bmerge_x_arg_bare_col_name = names(x)[xcol],
8079
c_bmerge_x_arg_type = x_merge_type,
8180
c_bmerge_i_arg_bare_col_name = names(i)[icol],

R/merge.R

Lines changed: 30 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FALSE, all.x = all,
22
all.y = all, sort = TRUE, suffixes = c(".x", ".y"), no.dups = TRUE, allow.cartesian=getOption("datatable.allow.cartesian"), incomparables=NULL, ...) {
33

4-
# NO user_x_name / user_y_name here to maintain original variable environment for tests
4+
# NO user_x_name / user_y_name at the top to maintain original variable environment for most of the function
5+
# They will be fetched *only* within the error handler if needed.
56

67
if (!sort %in% c(TRUE, FALSE))
78
stopf("Argument 'sort' should be logical TRUE/FALSE")
@@ -39,33 +40,32 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
3940
if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y))
4041
stopf("A non-empty vector of column names is required for `by.x` and `by.y`.")
4142
if (!all(idx <- by.x %chin% nm_x)) {
42-
stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx])) # Original: literal "x"
43+
stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx]))
4344
}
4445
if (!all(idx <- by.y %chin% nm_y)) {
45-
stopf("The following columns listed in `%s` are missing from %s: %s", "by.y", "y", brackify(by.y[!idx])) # Original: literal "y"
46+
stopf("The following columns listed in `%s` are missing from %s: %s", "by.y", "y", brackify(by.y[!idx]))
4647
}
4748
by = by.x
4849
names(by) = by.y
4950
} else {
5051
if (is.null(by))
5152
by = intersect(key(x), key(y))
52-
if (!length(by)) # was is.null() before PR#5183 changed to !length()
53+
if (!length(by))
5354
by = key(x)
5455
if (!length(by))
5556
by = intersect(nm_x, nm_y)
5657
if (length(by) == 0L || !is.character(by))
5758
stopf("A non-empty vector of column names for `by` is required.")
5859
if (!all(idx <- by %in% nm_x)) {
59-
stopf("The following columns listed in `%s` are missing from %s: %s", "by", "x", brackify(by[!idx])) # Original: literal "x"
60+
stopf("The following columns listed in `%s` are missing from %s: %s", "by", "x", brackify(by[!idx]))
6061
}
6162
if (!all(idx <- by %in% nm_y)) {
62-
stopf("The following columns listed in `%s` are missing from %s: %s", "by", "y", brackify(by[!idx])) # Original: literal "y"
63+
stopf("The following columns listed in `%s` are missing from %s: %s", "by", "y", brackify(by[!idx]))
6364
}
6465
by = unname(by)
6566
by.x = by.y = by
6667
}
6768

68-
# warn about unused arguments #2587
6969
if (length(list(...))) {
7070
ell = as.list(substitute(list(...)))[-1L]
7171
for (n in setdiff(names(ell), "")) warningf("Unknown argument '%s' has been passed.", n)
@@ -74,8 +74,8 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
7474
warningf("Passed %d unknown and unnamed arguments.", unnamed_n)
7575
}
7676

77-
start = setdiff(nm_x, by.x) # Original logic
78-
end = setdiff(nm_y, by.y) # Original logic
77+
start = setdiff(nm_x, by.x)
78+
end = setdiff(nm_y, by.y)
7979
dupnames = intersect(start, end)
8080
if (length(dupnames)) {
8181
start[chmatch(dupnames, start, 0L)] = paste0(dupnames, suffixes[1L])
@@ -86,62 +86,47 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
8686
end[chmatch(dupkeyx, end, 0L)] = paste0(dupkeyx, suffixes[2L])
8787
}
8888

89-
# implement incomparables argument #2587
9089
if (!is.null(incomparables)) {
91-
# %fin% to be replaced when #5232 is implemented/closed
9290
"%fin%" = function(x_val, table_val) if (is.character(x_val) && is.character(table_val)) x_val %chin% table_val else x_val %in% table_val
93-
xind = rowSums(x[, lapply(.SD, function(x_col_val) !(x_col_val %fin% incomparables)), .SDcols=by.x]) == length(by.x) # Original by.x usage
94-
yind = rowSums(y[, lapply(.SD, function(y_col_val) !(y_col_val %fin% incomparables)), .SDcols=by.y]) == length(by.y) # Original by.y usage
95-
# subset both so later steps still work
96-
x = x[xind] # Original modification of x
97-
y = y[yind] # Original modification of y
91+
xind = rowSums(x[, lapply(.SD, function(x_col_val) !(x_col_val %fin% incomparables)), .SDcols=by.x]) == length(by.x)
92+
yind = rowSums(y[, lapply(.SD, function(y_col_val) !(y_col_val %fin% incomparables)), .SDcols=by.y]) == length(by.y)
93+
x = x[xind]
94+
y = y[yind]
9895
}
9996

100-
# ----- MINIMAL MODIFICATION: tryCatch around the main join call -----
10197
dt <- tryCatch({
102-
# Original join call, using 'by' as constructed by original logic above
10398
y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian]
10499
},
105100
bmerge_incompatible_type_error = function(e) {
106-
# Get table names *locally* only when this specific error occurs.
107-
# These x_arg_name and y_arg_name refer to the 'x' and 'y' arguments of merge.data.table
108-
x_arg_name <- deparse1(substitute(x, env = parent.frame(2))) # Go up to merge.data.table's frame
109-
y_arg_name <- deparse1(substitute(y, env = parent.frame(2))) # Go up to merge.data.table's frame
110-
111-
# Ensure arg names are single, non-empty strings
112-
final_x_arg_name <- if (is.null(x_arg_name) || length(x_arg_name) != 1L || !nzchar(x_arg_name[1L])) "arg 'x'" else x_arg_name[1L]
113-
final_y_arg_name <- if (is.null(y_arg_name) || length(y_arg_name) != 1L || !nzchar(y_arg_name[1L])) "arg 'y'" else y_arg_name[1L]
114-
115-
# Extract column/type details from error object 'e'
116-
# Assumes attributes on 'e' are valid single character strings from R/bmerge.R
117-
# Mapping: merge's 'x' is bmerge's 'i'; merge's 'y' is bmerge's 'x'
118-
col_from_x_arg <- e$c_bmerge_i_arg_bare_col_name
119-
type_from_x_arg <- e$c_bmerge_i_arg_type
120-
col_from_y_arg <- e$c_bmerge_x_arg_bare_col_name
121-
type_from_y_arg <- e$c_bmerge_x_arg_type
101+
# For merge(x=DT1, y=DT2), DT1 (user's 'x') is bmerge's 'i'
102+
# DT2 (user's 'y') is bmerge's 'x'
103+
x_part_col_name <- e$c_bmerge_i_arg_bare_col_name
104+
x_part_type <- e$c_bmerge_i_arg_type
105+
y_part_col_name <- e$c_bmerge_x_arg_bare_col_name
106+
y_part_type <- e$c_bmerge_x_arg_type
122107

123-
# Construct the concise error message
108+
# Use literal "x." and "y." prefixes referring to the arguments of merge()
124109
msg <- sprintf(
125-
"Incompatible join types for merge: table '%s' column '%s' (%s) and table '%s' column '%s' (%s). Factor columns must join to factor or character columns.",
126-
final_x_arg_name, col_from_x_arg, type_from_x_arg,
127-
final_y_arg_name, col_from_y_arg, type_from_y_arg
110+
"Incompatible join types: x.%s (%s) and y.%s (%s). Factor columns must join to factor or character columns.",
111+
x_part_col_name, x_part_type, # Corresponds to merge() argument 'x'
112+
y_part_col_name, y_part_type # Corresponds to merge() argument 'y'
128113
)
129114

130-
stop(errorCondition(message = msg, class = c("datatable_merge_type_error", "data.table_error", "error", "condition"), call = NULL))
115+
# Remove call = NULL to get "Error in merge.data.table(...): " prefix.
116+
stop(errorCondition(message = msg, class = c("datatable_merge_type_error", "data.table_error", "error", "condition")))
131117
}
132118
)
133-
# ----- END MINIMAL MODIFICATION -----
134119

135-
if (all.y && nrow(y)) { # Original logic
120+
if (all.y && nrow(y)) {
136121
missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian]
137122
if (length(missingyidx)) dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE, ignore.attr=TRUE)
138123
}
139124

140-
newend = setdiff(nm_y, by.y) # Original logic
141-
setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend)) # Original logic
142-
setnames(dt, c(by.x, start, end)) # Original logic
125+
newend = setdiff(nm_y, by.y)
126+
setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend))
127+
setnames(dt, c(by.x, start, end))
143128
if (nrow(dt) > 0L) {
144-
setkeyv(dt, if (sort) by.x else NULL) # Original logic
129+
setkeyv(dt, if (sort) by.x else NULL)
145130
}
146131

147132
resultdupnames = names(dt)[duplicated(names(dt))]

0 commit comments

Comments
 (0)