Skip to content

Commit ee5ac99

Browse files
committed
recheck
1 parent c4c447e commit ee5ac99

File tree

1 file changed

+36
-39
lines changed

1 file changed

+36
-39
lines changed

R/merge.R

Lines changed: 36 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,21 @@
11
merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FALSE, all.x = all,
2-
all.y = all, sort = TRUE, suffixes = c(".x", ".y"), no.dups = TRUE,
3-
allow.cartesian = getOption("datatable.allow.cartesian"), incomparables = NULL, ...) {
4-
5-
# Error handling for logical arguments
2+
all.y = all, sort = TRUE, suffixes = c(".x", ".y"), no.dups = TRUE,
3+
allow.cartesian = getOption("datatable.allow.cartesian"),
4+
incomparables = NULL, ...) {
65
if (!is.logical(sort)) stopf("Argument 'sort' should be logical TRUE/FALSE")
76
if (!is.logical(no.dups)) stopf("Argument 'no.dups' should be logical TRUE/FALSE")
87

9-
# Convert y to data.table if not already
108
class_x = class(x)
119
if (!is.data.table(y)) {
1210
y = as.data.table(y)
1311
if (missing(by) && missing(by.x)) {
1412
by = key(x)
1513
}
1614
}
15+
1716
x0 = length(x) == 0L
1817
y0 = length(y) == 0L
1918

20-
# Handle case when either x or y is empty
2119
if (x0 || y0) {
2220
if (x0 && y0) {
2321
warningf("Neither of the input data.tables to join have columns.")
@@ -34,41 +32,49 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
3432
nm_x = names(x)
3533
nm_y = names(y)
3634

37-
# Set up 'by'/'by.x'/'by.y'
38-
if ((!is.null(by.x) || !is.null(by.y)) && length(by.x) != length(by.y)) {
39-
stopf("`by.x` and `by.y` must be of same length.")
40-
}
41-
if (!missing(by) && !missing(by.x)) {
42-
warningf("Supplied both `by` and `by.x/by.y`. `by` argument will be ignored.")
43-
}
44-
4535
# Setup 'by', 'by.x', 'by.y'
4636
if ((!is.null(by.x) || !is.null(by.y)) && length(by.x) != length(by.y)) {
4737
stopf("by.x and by.y must be of the same length.")
4838
}
49-
39+
5040
if (!missing(by) && !missing(by.x)) {
5141
warningf("Supplied both 'by' and 'by.x/by.y'. 'by' argument will be ignored.")
5242
}
53-
43+
5444
if (!is.null(by.x)) {
5545
if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y)) {
56-
stopf("A non-empty vector of column names is required for `by.x` and `by.y`.")
46+
stopf("A non-empty vector of column names is required for by.x and by.y.")
47+
}
48+
if (!all(by.x %chin% nm_x)) {
49+
stopf("Elements listed in by.x must be valid column names in x.")
50+
}
51+
if (!all(by.y %chin% nm_y)) {
52+
stopf("Elements listed in by.y must be valid column names in y.")
5753
}
58-
if (!all(by.x %chin% nm_x)) stopf("Elements listed in `by.x` must be valid column names in x.")
59-
if (!all(by.y %chin% nm_y)) stopf("Elements listed in `by.y` must be valid column names in y.")
6054
by = by.x
6155
names(by) = by.y
6256
} else {
6357
if (is.null(by)) by = intersect(key(x), key(y))
6458
if (!length(by)) by = key(x)
6559
if (!length(by)) by = intersect(nm_x, nm_y)
6660
if (length(by) == 0L || !is.character(by)) {
67-
stopf("A non-empty vector of column names for `by` is required.")
61+
stopf("A non-empty vector of column names for 'by' is required.")
6862
}
69-
if (!all(by %chin% intersect(nm_x, nm_y))) {
70-
stopf("Elements listed in `by` must be valid column names in x and y")
63+
64+
# Updated Error Handling Section
65+
missing_in_x = setdiff(by, nm_x)
66+
missing_in_y = setdiff(by, nm_y)
67+
if (length(missing_in_x) > 0 || length(missing_in_y) > 0) {
68+
error_msg = "Columns listed in 'by' must be valid column names in both data.tables.\n"
69+
if (length(missing_in_x) > 0) {
70+
error_msg = paste0(error_msg, sprintf("? Missing in x: %s\n", toString(missing_in_x)))
71+
}
72+
if (length(missing_in_y) > 0) {
73+
error_msg = paste0(error_msg, sprintf("? Missing in y: %s", toString(missing_in_y)))
74+
}
75+
stopf(error_msg)
7176
}
77+
7278
by = unname(by)
7379
by.x = by.y = by
7480
}
@@ -78,9 +84,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
7884
ell = as.list(substitute(list(...)))[-1L]
7985
for (n in setdiff(names(ell), "")) warningf("Unknown argument '%s' has been passed.", n)
8086
unnamed_n = length(ell) - sum(nzchar(names(ell)))
81-
if (unnamed_n) {
82-
warningf("Passed %d unknown and unnamed arguments.", unnamed_n)
83-
}
87+
if (unnamed_n) warningf("Passed %d unknown and unnamed arguments.", unnamed_n)
8488
}
8589

8690
# Handle duplicate column names
@@ -91,13 +95,8 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
9195
start[chmatch(dupnames, start, 0L)] = paste0(dupnames, suffixes[1L])
9296
end[chmatch(dupnames, end, 0L)] = paste0(dupnames, suffixes[2L])
9397
}
94-
# If no.dups = TRUE we also need to add the suffix to columns in y that share a name with by.x
95-
dupkeyx = intersect(by.x, end)
96-
if (no.dups && length(dupkeyx)) {
97-
end[chmatch(dupkeyx, end, 0L)] = paste0(dupkeyx, suffixes[2L])
98-
}
99-
100-
# Implement incomparables argument
98+
99+
# Handle incomparables argument
101100
if (!is.null(incomparables)) {
102101
"%fin%" = function(x, table) if (is.character(x) && is.character(table)) x %chin% table else x %in% table
103102
xind = rowSums(x[, lapply(.SD, function(x) !(x %fin% incomparables)), .SDcols = by.x]) == length(by)
@@ -107,30 +106,28 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
107106
}
108107

109108
dt = y[x, nomatch = if (all.x) NA else NULL, on = by, allow.cartesian = allow.cartesian]
110-
109+
111110
if (all.y && nrow(y)) {
112111
missingyidx = y[!x, which = TRUE, on = by, allow.cartesian = allow.cartesian]
113-
if (length(missingyidx)) {
114-
dt = rbind(dt, y[missingyidx], use.names = FALSE, fill = TRUE, ignore.attr = TRUE)
115-
}
112+
if (length(missingyidx)) dt = rbind(dt, y[missingyidx], use.names = FALSE, fill = TRUE, ignore.attr = TRUE)
116113
}
117114

118115
# Reorder columns
119116
newend = setdiff(nm_y, by.y)
120117
setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend))
121118
setnames(dt, c(by.x, start, end))
122-
119+
123120
if (nrow(dt) > 0L) {
124121
setkeyv(dt, if (sort) by.x else NULL)
125122
}
126-
123+
127124
# Warn about duplicate column names in result
128125
resultdupnames = names(dt)[duplicated(names(dt))]
129126
if (length(resultdupnames)) {
130127
warningf("Column names %s are duplicated in the result", toString(resultdupnames))
131128
}
132129

133-
# Retain custom classes of first argument
130+
# Retain custom classes
134131
setattr(dt, "class", class_x)
135132
dt
136133
}

0 commit comments

Comments
 (0)