11merge.data.table = function (x , y , by = NULL , by.x = NULL , by.y = NULL , all = FALSE , all.x = all ,
2- all.y = all , sort = TRUE , suffixes = c(" .x" , " .y" ), no.dups = TRUE ,
3- allow.cartesian = getOption(" datatable.allow.cartesian" ),
4- incomparables = NULL , ... ) {
2+ all.y = all , sort = TRUE , suffixes = c(" .x" , " .y" ), no.dups = TRUE ,
3+ allow.cartesian = getOption(" datatable.allow.cartesian" ), incomparables = NULL , ... ) {
4+
55 # Error handling for logical arguments
66 if (! is.logical(sort )) stopf(" Argument 'sort' should be logical TRUE/FALSE" )
77 if (! is.logical(no.dups )) stopf(" Argument 'no.dups' should be logical TRUE/FALSE" )
@@ -14,11 +14,10 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
1414 by = key(x )
1515 }
1616 }
17-
18- # Handle case when either x or y is empty
1917 x0 = length(x ) == 0L
2018 y0 = length(y ) == 0L
2119
20+ # Handle case when either x or y is empty
2221 if (x0 || y0 ) {
2322 if (x0 && y0 ) {
2423 warningf(" Neither of the input data.tables to join have columns." )
@@ -35,49 +34,41 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
3534 nm_x = names(x )
3635 nm_y = names(y )
3736
37+ # Set up 'by'/'by.x'/'by.y'
38+ if ((! is.null(by.x ) || ! is.null(by.y )) && length(by.x ) != length(by.y )) {
39+ stopf(" `by.x` and `by.y` must be of same length." )
40+ }
41+ if (! missing(by ) && ! missing(by.x )) {
42+ warningf(" Supplied both `by` and `by.x/by.y`. `by` argument will be ignored." )
43+ }
44+
3845 # Setup 'by', 'by.x', 'by.y'
3946 if ((! is.null(by.x ) || ! is.null(by.y )) && length(by.x ) != length(by.y )) {
4047 stopf(" by.x and by.y must be of the same length." )
4148 }
42-
49+
4350 if (! missing(by ) && ! missing(by.x )) {
4451 warningf(" Supplied both 'by' and 'by.x/by.y'. 'by' argument will be ignored." )
4552 }
46-
53+
4754 if (! is.null(by.x )) {
4855 if (length(by.x ) == 0L || ! is.character(by.x ) || ! is.character(by.y )) {
49- stopf(" A non-empty vector of column names is required for by.x and by.y." )
50- }
51- if (! all(by.x %chin % nm_x )) {
52- stopf(" Elements listed in by.x must be valid column names in x." )
53- }
54- if (! all(by.y %chin % nm_y )) {
55- stopf(" Elements listed in by.y must be valid column names in y." )
56+ stopf(" A non-empty vector of column names is required for `by.x` and `by.y`." )
5657 }
58+ if (! all(by.x %chin % nm_x )) stopf(" Elements listed in `by.x` must be valid column names in x." )
59+ if (! all(by.y %chin % nm_y )) stopf(" Elements listed in `by.y` must be valid column names in y." )
5760 by = by.x
5861 names(by ) = by.y
5962 } else {
6063 if (is.null(by )) by = intersect(key(x ), key(y ))
6164 if (! length(by )) by = key(x )
6265 if (! length(by )) by = intersect(nm_x , nm_y )
6366 if (length(by ) == 0L || ! is.character(by )) {
64- stopf(" A non-empty vector of column names for 'by' is required." )
67+ stopf(" A non-empty vector of column names for `by` is required." )
6568 }
66-
67- # Updated Error Handling Section
68- missing_in_x = setdiff(by , nm_x )
69- missing_in_y = setdiff(by , nm_y )
70- if (length(missing_in_x ) > 0 || length(missing_in_y ) > 0 ) {
71- error_msg = " Columns listed in 'by' must be valid column names in both data.tables.\n "
72- if (length(missing_in_x ) > 0 ) {
73- error_msg = paste0(error_msg , sprintf(" ? Missing in x: %s\n " , toString(missing_in_x )))
74- }
75- if (length(missing_in_y ) > 0 ) {
76- error_msg = paste0(error_msg , sprintf(" ? Missing in y: %s" , toString(missing_in_y )))
77- }
78- stopf(error_msg )
69+ if (! all(by %chin % intersect(nm_x , nm_y ))) {
70+ stopf(" Elements listed in `by` must be valid column names in x and y" )
7971 }
80-
8172 by = unname(by )
8273 by.x = by.y = by
8374 }
@@ -87,7 +78,9 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
8778 ell = as.list(substitute(list (... )))[- 1L ]
8879 for (n in setdiff(names(ell ), " " )) warningf(" Unknown argument '%s' has been passed." , n )
8980 unnamed_n = length(ell ) - sum(nzchar(names(ell )))
90- if (unnamed_n ) warningf(" Passed %d unknown and unnamed arguments." , unnamed_n )
81+ if (unnamed_n ) {
82+ warningf(" Passed %d unknown and unnamed arguments." , unnamed_n )
83+ }
9184 }
9285
9386 # Handle duplicate column names
@@ -98,8 +91,13 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
9891 start [chmatch(dupnames , start , 0L )] = paste0(dupnames , suffixes [1L ])
9992 end [chmatch(dupnames , end , 0L )] = paste0(dupnames , suffixes [2L ])
10093 }
101-
102- # Handle incomparables argument
94+ # If no.dups = TRUE we also need to add the suffix to columns in y that share a name with by.x
95+ dupkeyx = intersect(by.x , end )
96+ if (no.dups && length(dupkeyx )) {
97+ end [chmatch(dupkeyx , end , 0L )] = paste0(dupkeyx , suffixes [2L ])
98+ }
99+
100+ # Implement incomparables argument
103101 if (! is.null(incomparables )) {
104102 " %fin%" = function (x , table ) if (is.character(x ) && is.character(table )) x %chin % table else x %in% table
105103 xind = rowSums(x [, lapply(.SD , function (x ) ! (x %fin % incomparables )), .SDcols = by.x ]) == length(by )
@@ -109,28 +107,30 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
109107 }
110108
111109 dt = y [x , nomatch = if (all.x ) NA else NULL , on = by , allow.cartesian = allow.cartesian ]
112-
110+
113111 if (all.y && nrow(y )) {
114112 missingyidx = y [! x , which = TRUE , on = by , allow.cartesian = allow.cartesian ]
115- if (length(missingyidx )) dt = rbind(dt , y [missingyidx ], use.names = FALSE , fill = TRUE , ignore.attr = TRUE )
113+ if (length(missingyidx )) {
114+ dt = rbind(dt , y [missingyidx ], use.names = FALSE , fill = TRUE , ignore.attr = TRUE )
115+ }
116116 }
117117
118118 # Reorder columns
119119 newend = setdiff(nm_y , by.y )
120120 setcolorder(dt , c(by.y , setdiff(names(dt ), c(by.y , newend )), newend ))
121121 setnames(dt , c(by.x , start , end ))
122-
122+
123123 if (nrow(dt ) > 0L ) {
124124 setkeyv(dt , if (sort ) by.x else NULL )
125125 }
126-
126+
127127 # Warn about duplicate column names in result
128128 resultdupnames = names(dt )[duplicated(names(dt ))]
129129 if (length(resultdupnames )) {
130130 warningf(" Column names %s are duplicated in the result" , toString(resultdupnames ))
131131 }
132132
133- # Retain custom classes
133+ # Retain custom classes of first argument
134134 setattr(dt , " class" , class_x )
135135 dt
136136}
0 commit comments