Skip to content

Commit b739ab2

Browse files
Normalizing verbose messages in bmerge.R (#6728)
* move verbosity to coerce_col, update tests * do not match.fun as.integer64, pull 'factor' cond from cast_with_atts to coerce_col * delint * delint * simplify coerce_col and cast_with_attrs further * style+readability changes; attempt to fix possible mistaken use of from_detail= * remove ()? in for join msgs * fix arg name * fully normalize useFancyQuotes * Revert "fully normalize useFancyQuotes" This reverts commit 8d50c3a. * one-off handling of useFancyQuotes instead --------- Co-authored-by: Michael Chirico <[email protected]> Co-authored-by: Michael Chirico <[email protected]>
1 parent 3aeaedf commit b739ab2

File tree

2 files changed

+34
-32
lines changed

2 files changed

+34
-32
lines changed

R/bmerge.R

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,20 @@ mergeType = function(x) {
99
ans
1010
}
1111

12-
cast_with_atts = function(x, as.f) {
13-
ans = as.f(x)
14-
if (!is.null(attributes(x))) attributes(ans) = attributes(x)
12+
cast_with_attrs = function(x, cast_fun) {
13+
ans = cast_fun(x)
14+
# do not copy attributes when coercing factor (to character)
15+
if (!is.factor(x) && !is.null(attributes(x))) attributes(ans) = attributes(x)
1516
ans
1617
}
1718

18-
coerce_col = function(dt, col, from_type, to_type, from_name, to_name, verbose_msg=NULL) {
19-
if (!is.null(verbose_msg)) catf(verbose_msg, from_type, from_name, to_type, to_name, domain=NULL)
20-
set(dt, j=col, value=cast_with_atts(dt[[col]], match.fun(paste0("as.", to_type))))
19+
coerce_col = function(dt, col, from_type, to_type, from_name, to_name, from_detail="", to_detail="", verbose) {
20+
if (verbose) catf(
21+
"Coercing %s column %s%s to type %s to match type of %s%s.\n",
22+
from_type, from_name, from_detail, to_type, to_name, to_detail
23+
)
24+
cast_fun = switch(to_type, integer64 = bit64::as.integer64, match.fun(paste0("as.", to_type)))
25+
set(dt, j=col, value=cast_with_attrs(dt[[col]], cast_fun))
2126
}
2227

2328
bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbose)
@@ -68,9 +73,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
6873
next
6974
} else {
7075
if (x_merge_type=="character") {
71-
if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname)
72-
set(i, j=icol, value=val<-as.character(i[[icol]]))
73-
set(callersi, j=icol, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
76+
coerce_col(i, icol, "factor", "character", iname, xname, verbose=verbose)
77+
set(callersi, j=icol, value=i[[icol]]) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
7478
next
7579
} else if (i_merge_type=="character") {
7680
if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname)
@@ -89,13 +93,12 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
8993
}
9094
cfl = c("character", "logical", "factor")
9195
if (x_merge_type %chin% cfl || i_merge_type %chin% cfl) {
92-
msg = if(verbose) gettext("Coercing all-NA %s column %s to type %s to match type of %s.\n") else NULL
9396
if (anyNA(i[[icol]]) && allNA(i[[icol]])) {
94-
coerce_col(i, icol, i_merge_type, x_merge_type, iname, xname, msg)
97+
coerce_col(i, icol, i_merge_type, x_merge_type, iname, xname, from_detail=gettext(" (all-NA)"), verbose=verbose)
9598
next
9699
}
97100
if (anyNA(x[[xcol]]) && allNA(x[[xcol]])) {
98-
coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, msg)
101+
coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, from_detail=gettext(" (all-NA)"), verbose=verbose)
99102
next
100103
}
101104
stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type)
@@ -104,8 +107,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
104107
nm = c(iname, xname)
105108
if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce
106109
if (wclass=="integer" || (wclass=="double" && fitsInInt64(w[[wc]]))) {
107-
if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which has integer64 representation, e.g. no fractions)" else "", nm[2L])
108-
set(w, j=wc, value=bit64::as.integer64(w[[wc]]))
110+
from_detail = if (wclass == "double") gettext(" (which has integer64 representation, e.g. no fractions)") else ""
111+
coerce_col(w, wc, wclass, "integer64", nm[1L], nm[2L], from_detail, verbose=verbose)
109112
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and cannot be coerced to integer64 (e.g. has fractions)", nm[2L], nm[1L])
110113
} else {
111114
# just integer and double left
@@ -126,28 +129,26 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
126129
}
127130
}
128131
if (coerce_x) {
129-
msg = if (verbose) gettext("Coercing %s column %s (which contains no fractions) to type %s to match type of %s.\n") else NULL
130-
coerce_col(i, icol, "double", "integer", iname, xname, msg)
132+
from_detail = gettext(" (which contains no fractions)")
133+
coerce_col(i, icol, "double", "integer", iname, xname, from_detail, verbose=verbose)
131134
set(callersi, j=icol, value=i[[icol]]) # change the shallow copy of i up in [.data.table to reflect in the result, too.
132135
if (length(ic_idx)>1L) {
133136
xc_idx = xcols[ic_idx]
134137
for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "double")]) {
135-
coerce_col(x, xb, "double", "integer", paste0("x.", names(x)[xb]), xname, msg)
138+
coerce_col(x, xb, "double", "integer", paste0("x.", names(x)[xb]), xname, from_detail, verbose=verbose)
136139
}
137140
}
138141
}
139142
}
140143
if (!coerce_x) {
141-
msg = if (verbose) gettext("Coercing %s column %s to type %s to match type of %s which contains fractions.\n") else NULL
142-
coerce_col(x, xcol, "integer", "double", xname, iname, msg)
144+
coerce_col(x, xcol, "integer", "double", xname, iname, to_detail=gettext(" (which contains fractions)"), verbose=verbose)
143145
}
144146
} else {
145-
msg = if (verbose) gettext("Coercing %s column %s to type %s for join to match type of %s.\n") else NULL
146-
coerce_col(i, icol, "integer", "double", iname, xname, msg)
147+
coerce_col(i, icol, "integer", "double", iname, xname, from_detail=gettext(" (for join)"), verbose=verbose)
147148
if (length(ic_idx)>1L) {
148149
xc_idx = xcols[ic_idx]
149150
for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "integer")]) {
150-
coerce_col(x, xb, "integer", "double", paste0("x.", names(x)[xb]), xname, msg)
151+
coerce_col(x, xb, "integer", "double", paste0("x.", names(x)[xb]), xname, verbose=verbose)
151152
}
152153
}
153154
}

inst/tests/tests.Rraw

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10994,7 +10994,8 @@ DT = data.table(
1099410994
D = as.POSIXct(dt<-paste(d,t), tz="UTC"),
1099510995
E = as.POSIXct(paste0(dt,c(".999",".0",".5",".111112",".123456",".023",".0",".999999",".99",".0009")), tz="UTC"))
1099610996

10997-
test(1740.1, fwrite(DT,dateTimeAs="iso"), error=base_messages$match_arg_4_choices("ISO", "squash", "epoch", "write.csv"))
10997+
test(1740.1, options=c(useFancyQuotes=FALSE), fwrite(DT,dateTimeAs="iso"),
10998+
error=base_messages$match_arg_4_choices("ISO", "squash", "epoch", "write.csv"))
1099810999
test(1740.2, fwrite(DT,dateTimeAs=c("ISO","squash")), error=base_messages$match_arg_length)
1099911000
test(1740.3, capture.output(fwrite(DT,dateTimeAs="ISO")), c(
1100011001
"A,B,C,D,E",
@@ -15165,13 +15166,13 @@ test(2044.60, dt1[dt2, ..cols, on="int==doubleInt", verbose=TRUE],
1516515166
test(2044.61, dt1[dt2, ..cols, on="int==realDouble", verbose=TRUE], # this was wrong in v1.12.2 (the fractions were truncated and joined to next lowest int)
1516615167
data.table(x.bool=c(NA,FALSE,NA,FALSE,NA), x.int=INT(NA,1,NA,2,NA), x.doubleInt=c(NA,1,NA,2,NA),
1516715168
i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
15168-
output="Coercing integer column x.int to type double to match type of i.realDouble which contains fractions")
15169+
output="Coercing integer column x.int to type double to match type of i.realDouble .which contains fractions.")
1516915170
test(2044.62, dt1[dt2, ..cols, on="doubleInt==int", verbose=TRUE],
1517015171
data.table(x.bool=FALSE, x.int=1:5, x.doubleInt=as.double(1:5), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
15171-
output="Coercing integer column i.int to type double for join to match type of x.doubleInt")
15172+
output="Coercing integer column i.int .for join. to type double to match type of x.doubleInt")
1517215173
test(2044.63, dt1[dt2, ..cols, on="realDouble==int", verbose=TRUE],
1517315174
data.table(x.bool=c(rep(FALSE,4),TRUE), x.int=INT(2,4,6,8,10), x.doubleInt=c(2,4,6,8,10), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
15174-
output="Coercing integer column i.int to type double for join to match type of x.realDouble")
15175+
output="Coercing integer column i.int .for join. to type double to match type of x.realDouble")
1517515176
cols = c("x.int","x.char","x.fact","i.int","i.char","i.char")
1517615177
test(2044.64, dt1[dt2, ..cols, on="char==fact", verbose=TRUE],
1517715178
ans<-data.table(x.int=1:5, x.char=letters[1:5], x.fact=factor(letters[1:5]), i.int=1:5, i.char=letters[1:5], i.char=letters[1:5]),
@@ -15206,15 +15207,15 @@ if (test_bit64) {
1520615207
dt1 = data.table(a=1, b=NA_character_)
1520715208
dt2 = data.table(a=2L, b=NA)
1520815209
test(2044.80, dt1[dt2, on="a==b", verbose=TRUE], data.table(a=NA, b=NA_character_, i.a=2L),
15209-
output=msg<-"Coercing all-NA logical column i.b to type double to match type of x.a")
15210+
output=msg<-"Coercing logical column i.b .all-NA. to type double to match type of x.a")
1521015211
test(2044.81, dt1[dt2, on="a==b", nomatch=0L, verbose=TRUE], data.table(a=logical(), b=character(), i.a=integer()),
1521115212
output=msg)
1521215213
test(2044.82, dt1[dt2, on="b==b", verbose=TRUE], data.table(a=1, b=NA, i.a=2L),
15213-
output=msg<-"Coercing all-NA logical column i.b to type character to match type of x.b")
15214+
output=msg<-"Coercing logical column i.b .all-NA. to type character to match type of x.b")
1521415215
test(2044.83, dt1[dt2, on="b==b", nomatch=0L, verbose=TRUE], data.table(a=1, b=NA, i.a=2L),
1521515216
output=msg)
1521615217
test(2044.84, dt1[dt2, on="b==a", verbose=TRUE], data.table(a=NA_real_, b=2L, i.b=NA),
15217-
output=msg<-"Coercing all-NA character column x.b to type integer to match type of i.a")
15218+
output=msg<-"Coercing character column x.b .all-NA. to type integer to match type of i.a")
1521815219
test(2044.85, dt1[dt2, on="b==a", nomatch=0L, verbose=TRUE], data.table(a=double(), b=integer(), i.b=logical()),
1521915220
output=msg)
1522015221

@@ -15641,7 +15642,7 @@ i = data.table(date = dbl_date, key = 'date')
1564115642
test(2064.1, x[i, class(date), verbose=TRUE], 'Date',
1564215643
output="Coercing double column i.date (which contains no fractions) to type integer to match type of x.date")
1564315644
test(2064.2, i[x, class(date), verbose=TRUE], 'Date',
15644-
output="Coercing integer column i.date to type double for join to match type of x.date")
15645+
output="Coercing integer column i.date .for join. to type double to match type of x.date")
1564515646

1564615647
# complex values in grouping, #3639
1564715648
set.seed(42)
@@ -20688,8 +20689,8 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%')
2068820689
# fix coercing integer/double for joins on multiple columns, #6602
2068920690
x = data.table(a=1L)
2069020691
y = data.table(c=1L, d=1)
20691-
test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double")
20692-
test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double")
20692+
test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double")
20693+
test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double")
2069320694
x = data.table(a=1)
2069420695
y = data.table(c=1, d=1L)
2069520696
test(2297.03, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer")

0 commit comments

Comments
 (0)