11merge.data.table = function (x , y , by = NULL , by.x = NULL , by.y = NULL , all = FALSE , all.x = all ,
22 all.y = all , sort = TRUE , suffixes = c(" .x" , " .y" ), no.dups = TRUE , allow.cartesian = getOption(" datatable.allow.cartesian" ), incomparables = NULL , ... ) {
33
4- # NO user_x_name / user_y_name here to maintain original variable environment for tests
4+ # NO user_x_name / user_y_name at the top to maintain original variable environment for most of the function
5+ # They will be fetched *only* within the error handler if needed.
56
67 if (! sort %in% c(TRUE , FALSE ))
78 stopf(" Argument 'sort' should be logical TRUE/FALSE" )
@@ -39,33 +40,32 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
3940 if (length(by.x ) == 0L || ! is.character(by.x ) || ! is.character(by.y ))
4041 stopf(" A non-empty vector of column names is required for `by.x` and `by.y`." )
4142 if (! all(idx <- by.x %chin % nm_x )) {
42- stopf(" The following columns listed in `%s` are missing from %s: %s" , " by.x" , " x" , brackify(by.x [! idx ])) # Original: literal "x"
43+ stopf(" The following columns listed in `%s` are missing from %s: %s" , " by.x" , " x" , brackify(by.x [! idx ]))
4344 }
4445 if (! all(idx <- by.y %chin % nm_y )) {
45- stopf(" The following columns listed in `%s` are missing from %s: %s" , " by.y" , " y" , brackify(by.y [! idx ])) # Original: literal "y"
46+ stopf(" The following columns listed in `%s` are missing from %s: %s" , " by.y" , " y" , brackify(by.y [! idx ]))
4647 }
4748 by = by.x
4849 names(by ) = by.y
4950 } else {
5051 if (is.null(by ))
5152 by = intersect(key(x ), key(y ))
52- if (! length(by )) # was is.null() before PR#5183 changed to !length()
53+ if (! length(by ))
5354 by = key(x )
5455 if (! length(by ))
5556 by = intersect(nm_x , nm_y )
5657 if (length(by ) == 0L || ! is.character(by ))
5758 stopf(" A non-empty vector of column names for `by` is required." )
5859 if (! all(idx <- by %in% nm_x )) {
59- stopf(" The following columns listed in `%s` are missing from %s: %s" , " by" , " x" , brackify(by [! idx ])) # Original: literal "x"
60+ stopf(" The following columns listed in `%s` are missing from %s: %s" , " by" , " x" , brackify(by [! idx ]))
6061 }
6162 if (! all(idx <- by %in% nm_y )) {
62- stopf(" The following columns listed in `%s` are missing from %s: %s" , " by" , " y" , brackify(by [! idx ])) # Original: literal "y"
63+ stopf(" The following columns listed in `%s` are missing from %s: %s" , " by" , " y" , brackify(by [! idx ]))
6364 }
6465 by = unname(by )
6566 by.x = by.y = by
6667 }
6768
68- # warn about unused arguments #2587
6969 if (length(list (... ))) {
7070 ell = as.list(substitute(list (... )))[- 1L ]
7171 for (n in setdiff(names(ell ), " " )) warningf(" Unknown argument '%s' has been passed." , n )
@@ -74,8 +74,8 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
7474 warningf(" Passed %d unknown and unnamed arguments." , unnamed_n )
7575 }
7676
77- start = setdiff(nm_x , by.x ) # Original logic
78- end = setdiff(nm_y , by.y ) # Original logic
77+ start = setdiff(nm_x , by.x )
78+ end = setdiff(nm_y , by.y )
7979 dupnames = intersect(start , end )
8080 if (length(dupnames )) {
8181 start [chmatch(dupnames , start , 0L )] = paste0(dupnames , suffixes [1L ])
@@ -86,62 +86,47 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
8686 end [chmatch(dupkeyx , end , 0L )] = paste0(dupkeyx , suffixes [2L ])
8787 }
8888
89- # implement incomparables argument #2587
9089 if (! is.null(incomparables )) {
91- # %fin% to be replaced when #5232 is implemented/closed
9290 " %fin%" = function (x_val , table_val ) if (is.character(x_val ) && is.character(table_val )) x_val %chin % table_val else x_val %in% table_val
93- xind = rowSums(x [, lapply(.SD , function (x_col_val ) ! (x_col_val %fin % incomparables )), .SDcols = by.x ]) == length(by.x ) # Original by.x usage
94- yind = rowSums(y [, lapply(.SD , function (y_col_val ) ! (y_col_val %fin % incomparables )), .SDcols = by.y ]) == length(by.y ) # Original by.y usage
95- # subset both so later steps still work
96- x = x [xind ] # Original modification of x
97- y = y [yind ] # Original modification of y
91+ xind = rowSums(x [, lapply(.SD , function (x_col_val ) ! (x_col_val %fin % incomparables )), .SDcols = by.x ]) == length(by.x )
92+ yind = rowSums(y [, lapply(.SD , function (y_col_val ) ! (y_col_val %fin % incomparables )), .SDcols = by.y ]) == length(by.y )
93+ x = x [xind ]
94+ y = y [yind ]
9895 }
9996
100- # ----- MINIMAL MODIFICATION: tryCatch around the main join call -----
10197 dt <- tryCatch({
102- # Original join call, using 'by' as constructed by original logic above
10398 y [x , nomatch = if (all.x ) NA else NULL , on = by , allow.cartesian = allow.cartesian ]
10499 },
105100 bmerge_incompatible_type_error = function (e ) {
106- # Get table names *locally* only when this specific error occurs.
107- # These x_arg_name and y_arg_name refer to the 'x' and 'y' arguments of merge.data.table
108- x_arg_name <- deparse1(substitute(x , env = parent.frame(2 ))) # Go up to merge.data.table's frame
109- y_arg_name <- deparse1(substitute(y , env = parent.frame(2 ))) # Go up to merge.data.table's frame
110-
111- # Ensure arg names are single, non-empty strings
112- final_x_arg_name <- if (is.null(x_arg_name ) || length(x_arg_name ) != 1L || ! nzchar(x_arg_name [1L ])) " arg 'x'" else x_arg_name [1L ]
113- final_y_arg_name <- if (is.null(y_arg_name ) || length(y_arg_name ) != 1L || ! nzchar(y_arg_name [1L ])) " arg 'y'" else y_arg_name [1L ]
114-
115- # Extract column/type details from error object 'e'
116- # Assumes attributes on 'e' are valid single character strings from R/bmerge.R
117- # Mapping: merge's 'x' is bmerge's 'i'; merge's 'y' is bmerge's 'x'
118- col_from_x_arg <- e $ c_bmerge_i_arg_bare_col_name
119- type_from_x_arg <- e $ c_bmerge_i_arg_type
120- col_from_y_arg <- e $ c_bmerge_x_arg_bare_col_name
121- type_from_y_arg <- e $ c_bmerge_x_arg_type
101+ # For merge(x=DT1, y=DT2), DT1 (user's 'x') is bmerge's 'i'
102+ # DT2 (user's 'y') is bmerge's 'x'
103+ x_part_col_name <- e $ c_bmerge_i_arg_bare_col_name
104+ x_part_type <- e $ c_bmerge_i_arg_type
105+ y_part_col_name <- e $ c_bmerge_x_arg_bare_col_name
106+ y_part_type <- e $ c_bmerge_x_arg_type
122107
123- # Construct the concise error message
108+ # Use literal "x." and "y." prefixes referring to the arguments of merge()
124109 msg <- sprintf(
125- " Incompatible join types for merge: table '%s' column '%s' (%s) and table '%s' column '%s' (%s). Factor columns must join to factor or character columns." ,
126- final_x_arg_name , col_from_x_arg , type_from_x_arg ,
127- final_y_arg_name , col_from_y_arg , type_from_y_arg
110+ " Incompatible join types: x.%s (%s) and y.%s (%s). Factor columns must join to factor or character columns." ,
111+ x_part_col_name , x_part_type , # Corresponds to merge() argument 'x'
112+ y_part_col_name , y_part_type # Corresponds to merge() argument 'y'
128113 )
129114
130- stop(errorCondition(message = msg , class = c(" datatable_merge_type_error" , " data.table_error" , " error" , " condition" ), call = NULL ))
115+ # Remove call = NULL to get "Error in merge.data.table(...): " prefix.
116+ stop(errorCondition(message = msg , class = c(" datatable_merge_type_error" , " data.table_error" , " error" , " condition" )))
131117 }
132118 )
133- # ----- END MINIMAL MODIFICATION -----
134119
135- if (all.y && nrow(y )) { # Original logic
120+ if (all.y && nrow(y )) {
136121 missingyidx = y [! x , which = TRUE , on = by , allow.cartesian = allow.cartesian ]
137122 if (length(missingyidx )) dt = rbind(dt , y [missingyidx ], use.names = FALSE , fill = TRUE , ignore.attr = TRUE )
138123 }
139124
140- newend = setdiff(nm_y , by.y ) # Original logic
141- setcolorder(dt , c(by.y , setdiff(names(dt ), c(by.y , newend )), newend )) # Original logic
142- setnames(dt , c(by.x , start , end )) # Original logic
125+ newend = setdiff(nm_y , by.y )
126+ setcolorder(dt , c(by.y , setdiff(names(dt ), c(by.y , newend )), newend ))
127+ setnames(dt , c(by.x , start , end ))
143128 if (nrow(dt ) > 0L ) {
144- setkeyv(dt , if (sort ) by.x else NULL ) # Original logic
129+ setkeyv(dt , if (sort ) by.x else NULL )
145130 }
146131
147132 resultdupnames = names(dt )[duplicated(names(dt ))]
0 commit comments