11merge.data.table = function (x , y , by = NULL , by.x = NULL , by.y = NULL , all = FALSE , all.x = all ,
2- all.y = all , sort = TRUE , suffixes = c(" .x" , " .y" ), no.dups = TRUE ,
3- allow.cartesian = getOption(" datatable.allow.cartesian" ), incomparables = NULL , ... ) {
4-
5- # Error handling for logical arguments
2+ all.y = all , sort = TRUE , suffixes = c(" .x" , " .y" ), no.dups = TRUE ,
3+ allow.cartesian = getOption(" datatable.allow.cartesian" ),
4+ incomparables = NULL , ... ) {
65 if (! is.logical(sort )) stopf(" Argument 'sort' should be logical TRUE/FALSE" )
76 if (! is.logical(no.dups )) stopf(" Argument 'no.dups' should be logical TRUE/FALSE" )
87
9- # Convert y to data.table if not already
108 class_x = class(x )
119 if (! is.data.table(y )) {
1210 y = as.data.table(y )
1311 if (missing(by ) && missing(by.x )) {
1412 by = key(x )
1513 }
1614 }
15+
1716 x0 = length(x ) == 0L
1817 y0 = length(y ) == 0L
1918
20- # Handle case when either x or y is empty
2119 if (x0 || y0 ) {
2220 if (x0 && y0 ) {
2321 warningf(" Neither of the input data.tables to join have columns." )
@@ -34,41 +32,49 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
3432 nm_x = names(x )
3533 nm_y = names(y )
3634
37- # Set up 'by'/'by.x'/'by.y'
38- if ((! is.null(by.x ) || ! is.null(by.y )) && length(by.x ) != length(by.y )) {
39- stopf(" `by.x` and `by.y` must be of same length." )
40- }
41- if (! missing(by ) && ! missing(by.x )) {
42- warningf(" Supplied both `by` and `by.x/by.y`. `by` argument will be ignored." )
43- }
44-
4535 # Setup 'by', 'by.x', 'by.y'
4636 if ((! is.null(by.x ) || ! is.null(by.y )) && length(by.x ) != length(by.y )) {
4737 stopf(" by.x and by.y must be of the same length." )
4838 }
49-
39+
5040 if (! missing(by ) && ! missing(by.x )) {
5141 warningf(" Supplied both 'by' and 'by.x/by.y'. 'by' argument will be ignored." )
5242 }
53-
43+
5444 if (! is.null(by.x )) {
5545 if (length(by.x ) == 0L || ! is.character(by.x ) || ! is.character(by.y )) {
56- stopf(" A non-empty vector of column names is required for `by.x` and `by.y`." )
46+ stopf(" A non-empty vector of column names is required for by.x and by.y." )
47+ }
48+ if (! all(by.x %chin % nm_x )) {
49+ stopf(" Elements listed in by.x must be valid column names in x." )
50+ }
51+ if (! all(by.y %chin % nm_y )) {
52+ stopf(" Elements listed in by.y must be valid column names in y." )
5753 }
58- if (! all(by.x %chin % nm_x )) stopf(" Elements listed in `by.x` must be valid column names in x." )
59- if (! all(by.y %chin % nm_y )) stopf(" Elements listed in `by.y` must be valid column names in y." )
6054 by = by.x
6155 names(by ) = by.y
6256 } else {
6357 if (is.null(by )) by = intersect(key(x ), key(y ))
6458 if (! length(by )) by = key(x )
6559 if (! length(by )) by = intersect(nm_x , nm_y )
6660 if (length(by ) == 0L || ! is.character(by )) {
67- stopf(" A non-empty vector of column names for `by` is required." )
61+ stopf(" A non-empty vector of column names for 'by' is required." )
6862 }
69- if (! all(by %chin % intersect(nm_x , nm_y ))) {
70- stopf(" Elements listed in `by` must be valid column names in x and y" )
63+
64+ # Updated Error Handling Section
65+ missing_in_x = setdiff(by , nm_x )
66+ missing_in_y = setdiff(by , nm_y )
67+ if (length(missing_in_x ) > 0 || length(missing_in_y ) > 0 ) {
68+ error_msg = " Columns listed in 'by' must be valid column names in both data.tables.\n "
69+ if (length(missing_in_x ) > 0 ) {
70+ error_msg = paste0(error_msg , sprintf(" ? Missing in x: %s\n " , toString(missing_in_x )))
71+ }
72+ if (length(missing_in_y ) > 0 ) {
73+ error_msg = paste0(error_msg , sprintf(" ? Missing in y: %s" , toString(missing_in_y )))
74+ }
75+ stopf(error_msg )
7176 }
77+
7278 by = unname(by )
7379 by.x = by.y = by
7480 }
@@ -78,9 +84,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
7884 ell = as.list(substitute(list (... )))[- 1L ]
7985 for (n in setdiff(names(ell ), " " )) warningf(" Unknown argument '%s' has been passed." , n )
8086 unnamed_n = length(ell ) - sum(nzchar(names(ell )))
81- if (unnamed_n ) {
82- warningf(" Passed %d unknown and unnamed arguments." , unnamed_n )
83- }
87+ if (unnamed_n ) warningf(" Passed %d unknown and unnamed arguments." , unnamed_n )
8488 }
8589
8690 # Handle duplicate column names
@@ -91,13 +95,8 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
9195 start [chmatch(dupnames , start , 0L )] = paste0(dupnames , suffixes [1L ])
9296 end [chmatch(dupnames , end , 0L )] = paste0(dupnames , suffixes [2L ])
9397 }
94- # If no.dups = TRUE we also need to add the suffix to columns in y that share a name with by.x
95- dupkeyx = intersect(by.x , end )
96- if (no.dups && length(dupkeyx )) {
97- end [chmatch(dupkeyx , end , 0L )] = paste0(dupkeyx , suffixes [2L ])
98- }
99-
100- # Implement incomparables argument
98+
99+ # Handle incomparables argument
101100 if (! is.null(incomparables )) {
102101 " %fin%" = function (x , table ) if (is.character(x ) && is.character(table )) x %chin % table else x %in% table
103102 xind = rowSums(x [, lapply(.SD , function (x ) ! (x %fin % incomparables )), .SDcols = by.x ]) == length(by )
@@ -107,30 +106,28 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
107106 }
108107
109108 dt = y [x , nomatch = if (all.x ) NA else NULL , on = by , allow.cartesian = allow.cartesian ]
110-
109+
111110 if (all.y && nrow(y )) {
112111 missingyidx = y [! x , which = TRUE , on = by , allow.cartesian = allow.cartesian ]
113- if (length(missingyidx )) {
114- dt = rbind(dt , y [missingyidx ], use.names = FALSE , fill = TRUE , ignore.attr = TRUE )
115- }
112+ if (length(missingyidx )) dt = rbind(dt , y [missingyidx ], use.names = FALSE , fill = TRUE , ignore.attr = TRUE )
116113 }
117114
118115 # Reorder columns
119116 newend = setdiff(nm_y , by.y )
120117 setcolorder(dt , c(by.y , setdiff(names(dt ), c(by.y , newend )), newend ))
121118 setnames(dt , c(by.x , start , end ))
122-
119+
123120 if (nrow(dt ) > 0L ) {
124121 setkeyv(dt , if (sort ) by.x else NULL )
125122 }
126-
123+
127124 # Warn about duplicate column names in result
128125 resultdupnames = names(dt )[duplicated(names(dt ))]
129126 if (length(resultdupnames )) {
130127 warningf(" Column names %s are duplicated in the result" , toString(resultdupnames ))
131128 }
132129
133- # Retain custom classes of first argument
130+ # Retain custom classes
134131 setattr(dt , " class" , class_x )
135132 dt
136133}
0 commit comments