Skip to content

Commit 0cf6ee7

Browse files
authored
Merge branch 'master' into issue6888
2 parents f6b24c7 + 8785804 commit 0cf6ee7

File tree

5 files changed

+35
-8
lines changed

5 files changed

+35
-8
lines changed

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
1. New `sort_by()` method for data.tables, [#6662](https://github.com/Rdatatable/data.table/issues/6662). It uses `forder()` to improve upon the data.frame method and also match `DT[order(...)]` behavior with respect to locale. Thanks @rikivillalba for the suggestion and PR.
88

9+
2. `melt()` now supports using `patterns()` with `id.vars`, [#6867](https://github.com/Rdatatable/data.table/issues/6867). Thanks to Toby Dylan Hocking for the suggestion and PR.
10+
911
## BUG FIXES
1012

1113
1. Custom binary operators from the `lubridate` package now work with objects of class `IDate` as with a `Date` subclass, [#6839](https://github.com/Rdatatable/data.table/issues/6839). Thanks @emallickhossain for the report and @aitap for the fix.
@@ -20,6 +22,9 @@
2022

2123
6. By-reference sub-assignments to factor columns now match the levels in UTF-8, preventing their duplication when the same level exists in different encodings, [#6886](https://github.com/Rdatatable/data.table/issues/6886). Thanks @iagogv3 for the report and @aitap for the fix.
2224

25+
7. `fwrite()` now avoids a crash when translating strings into a different encoding, [#6883](https://github.com/Rdatatable/data.table/issues/6883). Thanks @filipemsc for the report and @aitap for the fix.
26+
27+
2328
## NOTES
2429

2530
1. Continued work to remove non-API C functions, [#6180](https://github.com/Rdatatable/data.table/issues/6180). Thanks Ivan Krylov for the PRs and for writing a clear and concise guide about the R API: https://aitap.codeberg.page/R-api/.

R/fmelt.R

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -182,13 +182,17 @@ melt.data.table = function(data, id.vars, measure.vars, variable.name = "variabl
182182
value.name = "value", ..., na.rm = FALSE, variable.factor = TRUE, value.factor = FALSE,
183183
verbose = getOption("datatable.verbose")) {
184184
if (!is.data.table(data)) stopf("'data' must be a data.table")
185-
if (missing(id.vars)) id.vars=NULL
186-
if (missing(measure.vars)) measure.vars = NULL
187-
measure.sub = substitute(measure.vars)
188-
if (is.call(measure.sub)) {
189-
eval.result = eval_with_cols(measure.sub, names(data))
190-
if (!is.null(eval.result)) {
191-
measure.vars = eval.result
185+
for(type.vars in c("id.vars","measure.vars")){
186+
sub.lang <- substitute({
187+
if (missing(VAR)) VAR=NULL
188+
substitute(VAR)
189+
}, list(VAR=as.symbol(type.vars)))
190+
sub.result = eval(sub.lang)
191+
if (is.call(sub.result)) {
192+
eval.result = eval_with_cols(sub.result, names(data))
193+
if (!is.null(eval.result)) {
194+
assign(type.vars, eval.result)
195+
}
192196
}
193197
}
194198
if (is.list(measure.vars)) {

R/fwrite.R

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,15 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
111111
}
112112
# nocov end
113113
file = enc2native(file) # CfwriteR cannot handle UTF-8 if that is not the native encoding, see #3078.
114+
# pre-encode any strings or factor levels to avoid translateChar trying to allocate from OpenMP threads
115+
if (encoding %chin% c("UTF-8", "native")) {
116+
enc = switch(encoding, "UTF-8" = enc2utf8, "native" = enc2native)
117+
x = lapply(x, function(x) {
118+
if (is.character(x)) x = enc(x)
119+
if (is.factor(x)) levels(x) = enc(levels(x))
120+
x
121+
})
122+
}
114123
.Call(CfwriteR, x, file, sep, sep2, eol, na, dec, quote, qmethod=="escape", append,
115124
row.names, col.names, logical01, scipen, dateTimeAs, buffMB, nThread,
116125
showProgress, is_gzip, compressLevel, bom, yaml, verbose, encoding)

inst/tests/tests.Rraw

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3220,6 +3220,10 @@ test(1034, as.data.table(x<-as.character(sample(letters, 5))), data.table(V1=x))
32203220
test(1035.12, attr(melt(DT, id.vars=1:2)$x, "foo"), "bla1")
32213221
test(1035.13, attr(melt(DT, id.vars=1:2)$y, "bar"), 1:4)
32223222

3223+
# issue #6867 - id.vars=patterns().
3224+
DT=data.table(x_long=0, x_short=0, z=0, y1=1, y2=2)
3225+
test(1035.131, melt(DT, measure.vars=patterns("y"), id.vars=patterns("x")), data.table(x_long=0, x_short=0, variable=factor(c("y1","y2")), value=c(1,2)))
3226+
32233227
# bug #699 - melt segfaults when vars are not in dt; was test 1316
32243228
x = data.table(a=c(1,2),b=c(2,3),c=c(3,4))
32253229
test(1035.14, melt(x, id.vars="d"), error="One or more values")
@@ -21121,3 +21125,7 @@ DT[1, V1 := samelevel]
2112121125
test(2311.1, nlevels(DT$V1), 1L) # used to be 2
2112221126
DT[1, V1 := factor("a", levels = c("a", samelevel))]
2112321127
test(2311.2, nlevels(DT$V1), 2L) # used to be 3
21128+
21129+
# avoid translateChar*() in OpenMP threads, #6883
21130+
DF = list(rep(iconv("\uf8", from = "UTF-8", to = "latin1"), 2e6))
21131+
test(2312, fwrite(DF, nullfile(), encoding = "UTF-8", nThread = 2L), NULL)

man/melt.data.table.Rd

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ multiple columns simultaneously.
1919
\arguments{
2020
\item{data}{ A \code{data.table} object to melt.}
2121
\item{id.vars}{vector of id variables. Can be integer (corresponding id
22-
column numbers) or character (id column names) vector. If missing, all
22+
column numbers) or character (id column names) vector, perhaps created using \code{patterns()}. If missing, all
2323
non-measure columns will be assigned to it. If integer, must be positive; see Details. }
2424
\item{measure.vars}{Measure variables for \code{melt}ing. Can be missing, vector, list, or pattern-based.
2525
@@ -131,6 +131,7 @@ melt(DT, id.vars=1, measure.vars=c("c_1", "c_2"), na.rm=TRUE) # remove NA
131131
# melt "f_1,f_2" and "d_1,d_2" simultaneously, retain 'factor' attribute
132132
# convenient way using internal function patterns()
133133
melt(DT, id.vars=1:2, measure.vars=patterns("^f_", "^d_"), value.factor=TRUE)
134+
melt(DT, id.vars=patterns("[in]"), measure.vars=patterns("^f_", "^d_"), value.factor=TRUE)
134135
# same as above, but provide list of columns directly by column names or indices
135136
melt(DT, id.vars=1:2, measure.vars=list(3:4, c("d_1", "d_2")), value.factor=TRUE)
136137
# same as above, but provide names directly:

0 commit comments

Comments
 (0)