Skip to content

Commit 49f849b

Browse files
committed
Revert "remove indent to cater for diff"
This reverts commit 562a9fd.
1 parent 562a9fd commit 49f849b

File tree

1 file changed

+90
-87
lines changed

1 file changed

+90
-87
lines changed

R/bmerge.R

Lines changed: 90 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -35,99 +35,102 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
3535
}
3636

3737
if (nrow(i)) {
38-
xhead = x[0, ..xcols]
39-
ihead = i[0, ..icols]
40-
xtypes = vapply_1c(xhead, getClass)
41-
itypes = vapply_1c(ihead, getClass)
42-
for (a in seq_along(icols)) {
43-
# - check that join columns have compatible types
44-
# - do type coercions if necessary on just the shallow local copies for the purpose of join
45-
# - handle factor columns appropriately
46-
# Note that if i is keyed, if this coerces i's key gets dropped by set()
47-
ic = icols[a]
48-
xc = xcols[a]
49-
xtype = xtypes[a]
50-
itype = itypes[a]
51-
xname = paste0("x.", names(xhead)[a])
52-
iname = paste0("i.", names(ihead)[a])
53-
if (!xtype %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, xtype)
54-
if (!itype %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, itype)
55-
if (xtype=="factor" || itype=="factor") {
56-
if (roll!=0.0 && a==length(icols))
57-
stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname)
58-
if (xtype=="factor" && itype=="factor") {
59-
if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname)
60-
set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values
61-
next
62-
} else {
63-
if (xtype=="character") {
64-
if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname)
65-
set(i, j=ic, value=val<-as.character(i[[ic]]))
66-
set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
67-
next
68-
} else if (itype=="character") {
69-
if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname)
70-
newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L)
71-
if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809
72-
set(i, j=ic, value=newvalue)
38+
x_merge_types = vapply_1c(x[0L, ..xcols], getClass)
39+
i_merge_types = vapply_1c(x[0L, ..icols], getClass)
40+
xnames = paste0("x.", names(x)[xcols])
41+
inames = paste0("i.", names(i)[icols])
42+
for (a in seq_along(icols)) {
43+
# - check that join columns have compatible types
44+
# - do type coercions if necessary on just the shallow local copies for the purpose of join
45+
# - handle factor columns appropriately
46+
# Note that if i is keyed, if this coerces i's key gets dropped by set()
47+
ic = icols[a]
48+
xc = xcols[a]
49+
x_merge_type = x_merge_types[a]
50+
i_merge_type = i_merge_types[a]
51+
xname = xnames[a]
52+
iname = inames[a]
53+
if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type)
54+
if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type)
55+
if (x_merge_type=="factor" || i_merge_type=="factor") {
56+
if (roll!=0.0 && a==length(icols))
57+
stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname)
58+
if (x_merge_type=="factor" && i_merge_type=="factor") {
59+
if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname)
60+
set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values
61+
next
62+
} else {
63+
if (x_merge_type=="character") {
64+
if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname)
65+
set(i, j=ic, value=val<-as.character(i[[ic]]))
66+
set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
67+
next
68+
} else if (i_merge_type=="character") {
69+
if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname)
70+
newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L)
71+
if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809
72+
set(i, j=ic, value=newvalue)
73+
next
74+
}
75+
}
76+
stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type)
77+
}
78+
if (x_merge_type == i_merge_type) {
79+
if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname)
7380
next
7481
}
75-
}
76-
stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xtype, iname, itype)
77-
}
78-
if (xtype == itype) {
79-
if (anyDuplicated(icols) && !all() && duplicated(icols, fromLast=TRUE)[a]) {
80-
set(x, j=xc, value=as.double(x[[xc]]))
81-
set(i, j=ic, value=as.double(i[[ic]]))
82-
if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname)
83-
} else {
84-
if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, xtype, xname)
85-
}
86-
next
87-
}
88-
if (xtype=="character" || itype=="character" ||
89-
xtype=="logical" || itype=="logical" ||
90-
xtype=="factor" || itype=="factor") {
91-
if (anyNA(i[[ic]]) && allNA(i[[ic]])) {
92-
if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, itype, xtype, xname)
93-
set(i, j=ic, value=match.fun(paste0("as.", xtype))(i[[ic]]))
94-
next
95-
}
96-
else if (anyNA(x[[xc]]) && allNA(x[[xc]])) {
97-
if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, xtype, itype, iname)
98-
set(x, j=xc, value=match.fun(paste0("as.", itype))(x[[xc]]))
99-
next
100-
}
101-
stopf("Incompatible join types: %s (%s) and %s (%s)", xname, xtype, iname, itype)
102-
}
103-
if (xtype=="integer64" || itype=="integer64") {
104-
nm = c(iname, xname)
105-
if (xtype=="integer64") { w=i; wc=ic; wclass=itype; } else { w=x; wc=xc; wclass=xtype; nm=rev(nm) } # w is which to coerce
106-
if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) {
107-
if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L])
108-
set(w, j=wc, value=bit64::as.integer64(w[[wc]]))
109-
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L])
110-
} else {
111-
# just integer and double left
112-
if (itype=="double") {
113-
if (!isReallyReal(i[[ic]])) {
114-
# common case of ad hoc user-typed integers missing L postfix joining to correct integer keys
115-
# we've always coerced to int and returned int, for convenience.
116-
if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname)
117-
val = as.integer(i[[ic]])
118-
if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679
119-
set(i, j=ic, value=val)
120-
set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too.
82+
if (x_merge_type=="character" || i_merge_type=="character" ||
83+
x_merge_type=="logical" || i_merge_type=="logical" ||
84+
x_merge_type=="factor" || i_merge_type=="factor") {
85+
if (anyNA(i[[ic]]) && allNA(i[[ic]])) {
86+
if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname)
87+
set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]]))
88+
next
89+
}
90+
else if (anyNA(x[[xc]]) && allNA(x[[xc]])) {
91+
if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname)
92+
set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]]))
93+
next
94+
}
95+
stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type)
96+
}
97+
if (x_merge_type=="integer64" || i_merge_type=="integer64") {
98+
nm = c(iname, xname)
99+
if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce
100+
if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) {
101+
if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L])
102+
set(w, j=wc, value=bit64::as.integer64(w[[wc]]))
103+
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L])
121104
} else {
122-
if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname)
123-
set(x, j=xc, value=as.double(x[[xc]]))
105+
# just integer and double left
106+
if (i_merge_type=="double") {
107+
if (!isReallyReal(i[[ic]])) {
108+
# common case of ad hoc user-typed integers missing L postfix joining to correct integer keys
109+
# we've always coerced to int and returned int, for convenience.
110+
if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname)
111+
val = as.integer(i[[ic]])
112+
if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679
113+
set(i, j=ic, value=val)
114+
set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too.
115+
} else {
116+
if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname)
117+
set(x, j=xc, value=as.double(x[[xc]]))
118+
}
119+
} else {
120+
if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname)
121+
set(i, j=ic, value=as.double(i[[ic]]))
122+
ic_idx = which(ic == icols)
123+
if (length(ic_idx)>1) {
124+
for (b in which(x_merge_types[ic_idx] != "double")) {
125+
xb = xcols[b]
126+
if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname)
127+
set(x, j=xb, value=as.double(x[[xb]]))
128+
}
129+
}
130+
}
124131
}
125-
} else {
126-
if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname)
127-
set(i, j=ic, value=as.double(i[[ic]]))
128132
}
129133
}
130-
}}
131134

132135
## after all modifications of x, check if x has a proper key on all xcols.
133136
## If not, calculate the order. Also for non-equi joins, the order must be calculated.

0 commit comments

Comments
 (0)