diff --git a/.ci/.lintr.R b/.ci/.lintr.R index f081763b41..c4f6d0c5e1 100644 --- a/.ci/.lintr.R +++ b/.ci/.lintr.R @@ -40,6 +40,9 @@ linters = c(dt_linters, all_linters( # TODO(lintr#2442): Use this once x[ , j, by] is supported. commas_linter = NULL, commented_code_linter = NULL, + # mostly, we just use stopf() & friends, but ignore this for the + # rare cases we need plain stop (e.g. #7048) + condition_call_linter = NULL, # TODO(linter->3.2.0): Activate this. consecutive_assertion_linter = NULL, cyclocomp_linter = NULL, diff --git a/R/bmerge.R b/R/bmerge.R index dffca5e44f..0331624200 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -84,7 +84,17 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos next } } - stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) + condition_message = gettextf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) + condition = list( + message = condition_message, + call = sys.call(sys.nframe() - 1L), + bmerge_x_arg_col_name = names(x)[xcol], + bmerge_x_arg_type = x_merge_type, + bmerge_i_arg_col_name = names(i)[icol], + bmerge_i_arg_type = i_merge_type + ) + class(condition) = c("dt_bmerge_incompatible_type_error", "error", "condition") + stop(condition) } # we check factors first to cater for the case when trying to do rolling joins on factors if (x_merge_type == i_merge_type) { diff --git a/R/merge.R b/R/merge.R index 4d7245983e..cc15b830bf 100644 --- a/R/merge.R +++ b/R/merge.R @@ -90,15 +90,24 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL # implement incomparables argument #2587 if (!is.null(incomparables)) { - # %fin% to be replaced when #5232 is implemented/closed - "%fin%" = function(x, table) if (is.character(x) && is.character(table)) x %chin% table else x %in% table - xind = rowSums(x[, lapply(.SD, function(x) !(x %fin% incomparables)), .SDcols=by.x]) == length(by) - yind = rowSums(y[, lapply(.SD, function(x) !(x %fin% incomparables)), .SDcols=by.y]) == length(by) - # subset both so later steps still work + "%fin%" = function(x_val, table_val) if (is.character(x_val) && is.character(table_val)) x_val %chin% table_val else x_val %in% table_val + xind = rowSums(x[, lapply(.SD, function(x_col_val) !(x_col_val %fin% incomparables)), .SDcols=by.x]) == length(by.x) + yind = rowSums(y[, lapply(.SD, function(y_col_val) !(y_col_val %fin% incomparables)), .SDcols=by.y]) == length(by.y) x = x[xind] y = y[yind] } - dt = y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian] # includes JIS columns (with a i. prefix if conflict with x names) + + dt = tryCatch( + y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian], + dt_bmerge_incompatible_type_error = function(e) { + x_part_col_name = paste0("x.", e$bmerge_i_arg_col_name) + x_part_type = e$bmerge_i_arg_type + y_part_col_name = paste0("y.", e$bmerge_x_arg_col_name) + y_part_type = e$bmerge_x_arg_type + + stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", x_part_col_name, x_part_type, y_part_col_name, y_part_type) + } + ) if (all.y && nrow(y)) { # If y does not have any rows, no need to proceed # Perhaps not very commonly used, so not a huge deal that the join is redone here. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c9cfe6244b..c8bb42ae0e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21157,3 +21157,8 @@ test(2317.6, DT1[DF1, on='a', .(d = x.a + i.d)]$d, 5) test(2317.7, DT1[DF2, on='a', e := i.e]$e, 5) test(2317.8, DT1[DF2, on='a', e2 := x.a + i.e]$e2, 6) test(2317.9, DT1[DF2, on='a', .(e = x.a + i.e)]$e, 6) + +# Test for incompatible factor joins rephrased by merge.data.table(#7048) +DT1=data.table(a=factor('a')) +DT2=data.table(a=1L) +test(2318, merge(DT1, DT2, by = "a"), error = "Incompatible join types: x.a (factor) and y.a (integer). Factor columns must join to factor or character columns.")