Skip to content

Commit e275acf

Browse files
committed
move verbosity to coerce_col, update tests
1 parent a9cd0bf commit e275acf

File tree

2 files changed

+29
-28
lines changed

2 files changed

+29
-28
lines changed

R/bmerge.R

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,16 @@ mergeType = function(x) {
1111

1212
cast_with_atts = function(x, as.f) {
1313
ans = as.f(x)
14-
if (!is.null(attributes(x))) attributes(ans) = attributes(x)
14+
# do not copy attributes when coercing factor (to character)
15+
if (!is.factor(x) && !is.null(attributes(x))) attributes(ans) = attributes(x)
1516
ans
1617
}
1718

18-
coerce_col = function(dt, col, from_type, to_type, from_name, to_name, verbose_msg=NULL) {
19-
if (!is.null(verbose_msg)) catf(verbose_msg, from_type, from_name, to_type, to_name, domain=NULL)
19+
coerce_col = function(dt, col, from_type, to_type, from_name, to_name, from_detail = "", to_detail = "", verbose) {
20+
if (verbose) catf(
21+
"Coercing %s column %s%s to type %s to match type of %s%s.\n",
22+
from_type, from_name, from_detail, to_type, to_name, to_detail
23+
)
2024
set(dt, j=col, value=cast_with_atts(dt[[col]], match.fun(paste0("as.", to_type))))
2125
}
2226

@@ -68,9 +72,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
6872
next
6973
} else {
7074
if (x_merge_type=="character") {
71-
if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname)
72-
set(i, j=icol, value=val<-as.character(i[[icol]]))
73-
set(callersi, j=icol, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
75+
coerce_col(i, icol, "factor", "character", iname, xname, verbose = verbose)
76+
set(callersi, j=icol, value=i[[icol]]) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
7477
next
7578
} else if (i_merge_type=="character") {
7679
if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname)
@@ -89,13 +92,12 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
8992
}
9093
cfl = c("character", "logical", "factor")
9194
if (x_merge_type %chin% cfl || i_merge_type %chin% cfl) {
92-
msg = if(verbose) gettext("Coercing all-NA %s column %s to type %s to match type of %s.\n") else NULL
9395
if (anyNA(i[[icol]]) && allNA(i[[icol]])) {
94-
coerce_col(i, icol, i_merge_type, x_merge_type, iname, xname, msg)
96+
coerce_col(i, icol, i_merge_type, x_merge_type, iname, xname, gettext(" (all-NA)"), verbose = verbose)
9597
next
9698
}
9799
if (anyNA(x[[xcol]]) && allNA(x[[xcol]])) {
98-
coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, msg)
100+
coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, gettext(" (all-NA)"), verbose = verbose)
99101
next
100102
}
101103
stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type)
@@ -104,8 +106,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
104106
nm = c(iname, xname)
105107
if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce
106108
if (wclass=="integer" || (wclass=="double" && fitsInInt64(w[[wc]]))) {
107-
if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which has integer64 representation, e.g. no fractions)" else "", nm[2L])
108-
set(w, j=wc, value=bit64::as.integer64(w[[wc]]))
109+
from_det_msg = if (wclass == "double") gettext(" (which has integer64 representation, e.g. no fractions)") else ""
110+
coerce_col(w, wc, wclass, "integer64", nm[1L], nm[2L], from_det_msg, verbose = verbose)
109111
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and cannot be coerced to integer64 (e.g. has fractions)", nm[2L], nm[1L])
110112
} else {
111113
# just integer and double left
@@ -126,28 +128,26 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
126128
}
127129
}
128130
if (coerce_x) {
129-
msg = if (verbose) gettext("Coercing %s column %s (which contains no fractions) to type %s to match type of %s.\n") else NULL
130-
coerce_col(i, icol, "double", "integer", iname, xname, msg)
131+
from_det_msg = gettext(" (which contains no fractions)")
132+
coerce_col(i, icol, "double", "integer", iname, xname, from_det_msg, verbose = verbose)
131133
set(callersi, j=icol, value=i[[icol]]) # change the shallow copy of i up in [.data.table to reflect in the result, too.
132134
if (length(ic_idx)>1L) {
133135
xc_idx = xcols[ic_idx]
134136
for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "double")]) {
135-
coerce_col(x, xb, "double", "integer", paste0("x.", names(x)[xb]), xname, msg)
137+
coerce_col(x, xb, "double", "integer", paste0("x.", names(x)[xb]), xname, from_det_msg, verbose = verbose)
136138
}
137139
}
138140
}
139141
}
140142
if (!coerce_x) {
141-
msg = if (verbose) gettext("Coercing %s column %s to type %s to match type of %s which contains fractions.\n") else NULL
142-
coerce_col(x, xcol, "integer", "double", xname, iname, msg)
143+
coerce_col(x, xcol, "integer", "double", xname, iname, , gettext(" (which contains fractions)"), verbose = verbose)
143144
}
144145
} else {
145-
msg = if (verbose) gettext("Coercing %s column %s to type %s for join to match type of %s.\n") else NULL
146-
coerce_col(i, icol, "integer", "double", iname, xname, msg)
146+
coerce_col(i, icol, "integer", "double", iname, xname, gettext(" (for join)"), verbose = verbose)
147147
if (length(ic_idx)>1L) {
148148
xc_idx = xcols[ic_idx]
149149
for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "integer")]) {
150-
coerce_col(x, xb, "integer", "double", paste0("x.", names(x)[xb]), xname, msg)
150+
coerce_col(x, xb, "integer", "double", paste0("x.", names(x)[xb]), xname, verbose = verbose)
151151
}
152152
}
153153
}

inst/tests/tests.Rraw

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15148,13 +15148,13 @@ test(2044.60, dt1[dt2, ..cols, on="int==doubleInt", verbose=TRUE],
1514815148
test(2044.61, dt1[dt2, ..cols, on="int==realDouble", verbose=TRUE], # this was wrong in v1.12.2 (the fractions were truncated and joined to next lowest int)
1514915149
data.table(x.bool=c(NA,FALSE,NA,FALSE,NA), x.int=INT(NA,1,NA,2,NA), x.doubleInt=c(NA,1,NA,2,NA),
1515015150
i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
15151-
output="Coercing integer column x.int to type double to match type of i.realDouble which contains fractions")
15151+
output="Coercing integer column x.int to type double to match type of i.realDouble .which contains fractions.")
1515215152
test(2044.62, dt1[dt2, ..cols, on="doubleInt==int", verbose=TRUE],
1515315153
data.table(x.bool=FALSE, x.int=1:5, x.doubleInt=as.double(1:5), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
15154-
output="Coercing integer column i.int to type double for join to match type of x.doubleInt")
15154+
output="Coercing integer column i.int .for join. to type double to match type of x.doubleInt")
1515515155
test(2044.63, dt1[dt2, ..cols, on="realDouble==int", verbose=TRUE],
1515615156
data.table(x.bool=c(rep(FALSE,4),TRUE), x.int=INT(2,4,6,8,10), x.doubleInt=c(2,4,6,8,10), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
15157-
output="Coercing integer column i.int to type double for join to match type of x.realDouble")
15157+
output="Coercing integer column i.int .for join. to type double to match type of x.realDouble")
1515815158
cols = c("x.int","x.char","x.fact","i.int","i.char","i.char")
1515915159
test(2044.64, dt1[dt2, ..cols, on="char==fact", verbose=TRUE],
1516015160
ans<-data.table(x.int=1:5, x.char=letters[1:5], x.fact=factor(letters[1:5]), i.int=1:5, i.char=letters[1:5], i.char=letters[1:5]),
@@ -15189,15 +15189,15 @@ if (test_bit64) {
1518915189
dt1 = data.table(a=1, b=NA_character_)
1519015190
dt2 = data.table(a=2L, b=NA)
1519115191
test(2044.80, dt1[dt2, on="a==b", verbose=TRUE], data.table(a=NA, b=NA_character_, i.a=2L),
15192-
output=msg<-"Coercing all-NA logical column i.b to type double to match type of x.a")
15192+
output=msg<-"Coercing logical column i.b .all-NA. to type double to match type of x.a")
1519315193
test(2044.81, dt1[dt2, on="a==b", nomatch=0L, verbose=TRUE], data.table(a=logical(), b=character(), i.a=integer()),
1519415194
output=msg)
1519515195
test(2044.82, dt1[dt2, on="b==b", verbose=TRUE], data.table(a=1, b=NA, i.a=2L),
15196-
output=msg<-"Coercing all-NA logical column i.b to type character to match type of x.b")
15196+
output=msg<-"Coercing logical column i.b .all-NA. to type character to match type of x.b")
1519715197
test(2044.83, dt1[dt2, on="b==b", nomatch=0L, verbose=TRUE], data.table(a=1, b=NA, i.a=2L),
1519815198
output=msg)
1519915199
test(2044.84, dt1[dt2, on="b==a", verbose=TRUE], data.table(a=NA_real_, b=2L, i.b=NA),
15200-
output=msg<-"Coercing all-NA character column x.b to type integer to match type of i.a")
15200+
output=msg<-"Coercing character column x.b .all-NA. to type integer to match type of i.a")
1520115201
test(2044.85, dt1[dt2, on="b==a", nomatch=0L, verbose=TRUE], data.table(a=double(), b=integer(), i.b=logical()),
1520215202
output=msg)
1520315203

@@ -15624,7 +15624,7 @@ i = data.table(date = dbl_date, key = 'date')
1562415624
test(2064.1, x[i, class(date), verbose=TRUE], 'Date',
1562515625
output="Coercing double column i.date (which contains no fractions) to type integer to match type of x.date")
1562615626
test(2064.2, i[x, class(date), verbose=TRUE], 'Date',
15627-
output="Coercing integer column i.date to type double for join to match type of x.date")
15627+
output="Coercing integer column i.date .for join. to type double to match type of x.date")
1562815628

1562915629
# complex values in grouping, #3639
1563015630
set.seed(42)
@@ -20634,8 +20634,9 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%')
2063420634
# fix coercing integer/double for joins on multiple columns, #6602
2063520635
x = data.table(a=1L)
2063620636
y = data.table(c=1L, d=1)
20637-
test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double")
20638-
test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double")
20637+
# 2297.01 and 02: previous test was "Coercing .*a to type double.*Coercing .*c to type double" in which 2nd ".*" silently captures "for join"
20638+
test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a (.for join.)? to type double.*Coercing .*c to type double")
20639+
test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a (.for join.)? to type double.*Coercing .*c to type double")
2063920640
x = data.table(a=1)
2064020641
y = data.table(c=1, d=1L)
2064120642
test(2297.03, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer")

0 commit comments

Comments
 (0)