@@ -3568,7 +3568,28 @@ DT[,`:=`(last.x=tail(x,1L),last.x1=tail(x1,1L)),by=y]
35683568test(1086, class(DT$last.x), c("POSIXct", "POSIXt"))
35693569test(1087, class(DT$last.x1), "ITime")
35703570
3571- # Tests 1088-1093 were non-ASCII. Now in DtNonAsciiTests
3571+ # chmatch on 'unknown' encoding (e.g. as.character(as.symbol("\u00E4")) )falling back to match, #2538 and #4818
3572+ x1 <- c("al\u00E4", "ala", "\u00E4allc", "coep")
3573+ x2 <- c("ala", "al\u00E4")
3574+ test(1088.1, requires_utf8=TRUE, chmatch(x1, x2), match(x1, x2)) # should not fallback to "match"
3575+ test(1088.2, requires_utf8=TRUE, x1 %chin% x2, x1 %in% x2)
3576+ # change x1 to symbol to character
3577+ x3 <- unlist(lapply(x1, function(x) as.character(as.name(x))), use.names=FALSE)
3578+ test(1089.1, requires_utf8=TRUE, chmatch(x3, x2), match(x3, x2)) # should fallback to match in "x"
3579+ test(1089.2, requires_utf8=TRUE, x3 %chin% x2, x3 %in% x2) # should fallback to match in "x"
3580+ # change x2 to symbol to character
3581+ x4 <- unlist(lapply(x2, function(x) as.character(as.name(x))), use.names=FALSE)
3582+ test(1090.1, requires_utf8=TRUE, chmatch(x1,x4), match(x1, x4)) # should fallback to match in "table"
3583+ test(1090.2, requires_utf8=TRUE, x1 %chin% x4, x1 %in% x4)
3584+ # both are symbols to characters
3585+ test(1091.1, requires_utf8=TRUE, chmatch(x3, x4), match(x3, x4)) # should fallback to "match" in "x" as well.
3586+ test(1091.2, requires_utf8=TRUE, x3 %chin% x4, x3 %in% x4)
3587+ # for completness, include test from #2528 of non ascii LHS of := (it could feasibly fail in future due to something other than chmatch)
3588+
3589+ DT = data.table(pas = c(1:5, NA, 6:10), good = c(1:10, NA))
3590+ setnames(DT, "pas", "p\u00E4s")
3591+ test(1092, requires_utf8=TRUE, eval(parse(text="DT[is.na(p\u00E4s), p\u00E4s := 99L]")), data.table("p\u00E4s" = c(1:5, 99L, 6:10), good = c(1:10,NA)))
3592+ test(1093, requires_utf8=TRUE, eval(parse(text="DT[, p\u00E4s := 34L]")), data.table("p\u00E4s" = 34L, good=c(1:10,NA)))
35723593
35733594# print of unnamed DT with >20 <= 100 rows, #97 (RF#4934)
35743595DT <- data.table(x=1:25, y=letters[1:25])
@@ -4320,7 +4341,10 @@ test(1162.24, is.sorted(rep(NA_character_, 2)))
43204341x <- character(0)
43214342test(1163, last(x), character(0))
43224343
4323- # Test 1164 was a non-ASCII test, now in DtNonAsciiTests
4344+ # Bug fix for #5159 - chmatch and character encoding (for some reason this seems to pass the test on a mac as well)
4345+ a<-c("a","\u00E4","\u00DF","z")
4346+ au<-iconv(a,"UTF8","latin1")
4347+ test(1164.1, requires_utf8=TRUE, chmatch(a, au), match(a, au))
43244348
43254349# Bug fix for #73 - segfault when rbindlist on empty data.tables
43264350x <- as.data.table(BOD)
@@ -4606,7 +4630,22 @@ test(1228.4, class(DT), class(DT[, sum(b), by=a]))
46064630test(1228.5, class(DT), class(DT[a>1, sum(b), by=a]))
46074631test(1228.6, class(DT), class(DT[a>1, c:=sum(b), by=a]))
46084632
4609- # test 1229 was non-ASCII, now in package DtNonAsciiTests
4633+ # savetl_init error after error, in v1.9.2, thanks Arun
4634+ DT <- data.table(x=1:5, y=10:6)
4635+ test(1229.1, DT[forderv(DT, -1)], error="non-existing column")
4636+ test(1229.2, setkey(DT), data.table(x=1:5, y=10:6, key="x,y"))
4637+ # umlaut in column names (red herring I think, but testing anyway)
4638+ sentEx = data.table(abend = c(1, 1, 0, 0, 2),
4639+ aber = c(0, 1, 0, 0, 0),
4640+ "\u00FCber" = c(1, 0, 0, 0, 0),
4641+ "\u00FCberall" = c(0, 0, 0, 0, 0),
4642+ "\u00FCberlegt" = c(0, 0, 0, 0, 0),
4643+ ID = structure(c(1L, 1L, 2L, 2L, 2L), .Label = c("0019", "0021"), class = "factor"),
4644+ abgeandert = c(1, 1, 1, 0, 0),
4645+ abgebildet = c(0, 0, 1, 1, 0),
4646+ abgelegt = c(0, 0, 0, 0, 3))
4647+ test(1229.3, requires_utf8=TRUE, sentEx[, lapply(.SD, sum), by=ID], data.table(ID=factor(c("0019","0021")), abend=c(2,2), aber=c(1,0), "\u00FCber"=c(1,0),
4648+ "\u00FCberall"=c(0,0), "\u00FCberlegt" = c(0,0), abgeandert=c(2,1), abgebildet = c(0,2), abgelegt=c(0,3)))
46104649
46114650# Test that ad hoc by detects if ordered and dogroups switches to memcpy if contiguous, #1050
46124651DT = data.table(a=1:3,b=1:6,key="a")
@@ -17655,11 +17694,7 @@ test(2194.5, endsWithAny(NA_character_, 'a'), FALSE)
1765517694test(2194.6, endsWithAny(character(), 'a'), error="Internal error.*types or lengths incorrect")
1765617695# file used in encoding tests
1765717696txt = readLines(testDir("issue_563_fread.txt"))
17658- local(if (eval(utf8_check_expr)) {
17659- test(2194.7, endsWithAny(txt, 'B'), error="Internal error.*types or lengths incorrect") # txt is length 5
17660- } else {
17661- cat("Test 2194.7 skipped because it needs a UTF-8 locale.\n")
17662- })
17697+ test(2194.7, requires_utf8=TRUE, endsWithAny(txt, 'B'), error="Internal error.*types or lengths incorrect") # txt is length 5
1766317698test(2194.8, endsWith('abcd', 'd'), error="Internal error.*use endsWithAny")
1766417699
1766517700# uniqueN(x, by=character()) was internal error, #4594
@@ -18641,59 +18676,55 @@ test(2252.2, dt[, let(b=2L)], error = "\\[ was called on a data.table.*not data.
1864118676rm(.datatable.aware)
1864218677
1864318678# tests for trunc.char handling wide characters #5096
18644- local(if (eval(utf8_check_expr)) {
18645- accented_a = "\u0061\u0301"
18646- ja_ichi = "\u4E00"
18647- ja_ni = "\u4E8C"
18648- ja_ko = "\u3053"
18649- ja_n = "\u3093"
18650- dots = "..."
18651- clean_regex = "^\\d+:\\s+" # removes row numbering from beginning of output
18652- # Tests for combining character latin a and acute accent, single row
18653- DT = data.table(strrep(accented_a, 4L))
18654- test(2253.01, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(accented_a, 4L))
18655- test(2253.02, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(accented_a, 3L), dots))
18656- test(2253.03, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(accented_a, 1L), dots))
18657- # Tests for full-width japanese character ichi, single row
18658- DT = data.table(strrep(ja_ichi, 4L))
18659- test(2253.04, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(ja_ichi, 4L))
18660- test(2253.05, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(ja_ichi, 3L), dots))
18661- test(2253.06, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(ja_ichi, 1L), dots))
18662- # Tests for multiple, different length combining character rows
18663- DT = data.table(strrep(accented_a, 1L:4L))
18664- test(2253.07, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(accented_a, 1:4L))
18665- test(2253.08, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(accented_a, 1:3), paste0(strrep(accented_a, 3L), dots)))
18666- test(2253.09, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(accented_a, rep(paste0(accented_a, dots), 3L)))
18667- # Tests for multiple, different length full-width characters
18668- DT = data.table(strrep(ja_ichi, 1L:4L))
18669- test(2253.10, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(ja_ichi, 1:4L))
18670- test(2253.11, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(ja_ichi, 1:3), paste0(strrep(ja_ichi, 3L), dots)))
18671- test(2253.12, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(ja_ichi, rep(paste0(ja_ichi, dots), 3L)))
18672- # Tests for combined characters, multiple columns
18673- DT = data.table(paste0(ja_ichi), strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa")
18674- test(2253.13, options=list(datatable.prettyprint.char = 4L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
18675- test(2253.14, options=list(datatable.prettyprint.char = 3L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
18676- test(2253.15, options=list(datatable.prettyprint.char = 2L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2), paste0(strrep(ja_ko, 2), dots) , strrep(accented_a, 2), "aa..."))
18677- test(2253.16, options=list(datatable.prettyprint.char = 1L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, paste0(ja_ni, dots), paste0(ja_ko, dots), paste0(accented_a, dots), "a..."))
18678- # Tests for multiple columns, multiple rows
18679- DT = data.table(strrep(ja_ko, 1:3L), strrep(ja_n, 2:4L), strrep(accented_a, 3))
18680- test(2253.17, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
18681- c(paste0(ja_ko, " ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
18682- paste0(strrep(ja_ko, 2L), " ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
18683- paste(strrep(ja_ko, 3L), strrep(ja_n, 4L), strrep(accented_a, 3L))))
18684- test(2253.18, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
18685- c(paste0(ja_ko, " ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
18686- paste0(strrep(ja_ko, 2L), " ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
18687- paste(strrep(ja_ko, 3L), paste0(strrep(ja_n, 3L), dots), strrep(accented_a, 3L))))
18688- test(2253.19, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
18689- c(paste0(ja_ko, " ", paste0(ja_n, dots), " ", paste0(accented_a, dots)),
18690- paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" "),
18691- paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" ")))
18692- # test for data.table with NA, #6441
18693- test(2253.20, options=list(datatable.prettyprint.char = 1L), data.table(a = c("abc", NA)), output=" a\n1: a...\n2: <NA>")
18694- } else {
18695- cat("Tests 2253* skipped because they need a UTF-8 locale.\n")
18696- })
18679+ accented_a = "\u0061\u0301"
18680+ ja_ichi = "\u4E00"
18681+ ja_ni = "\u4E8C"
18682+ ja_ko = "\u3053"
18683+ ja_n = "\u3093"
18684+ dots = "..."
18685+ clean_regex = "^\\d+:\\s+" # removes row numbering from beginning of output
18686+ # Tests for combining character latin a and acute accent, single row
18687+ DT = data.table(strrep(accented_a, 4L))
18688+ test(2253.01, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(accented_a, 4L))
18689+ test(2253.02, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(accented_a, 3L), dots))
18690+ test(2253.03, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(accented_a, 1L), dots))
18691+ # Tests for full-width japanese character ichi, single row
18692+ DT = data.table(strrep(ja_ichi, 4L))
18693+ test(2253.04, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(ja_ichi, 4L))
18694+ test(2253.05, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(ja_ichi, 3L), dots))
18695+ test(2253.06, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(ja_ichi, 1L), dots))
18696+ # Tests for multiple, different length combining character rows
18697+ DT = data.table(strrep(accented_a, 1L:4L))
18698+ test(2253.07, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(accented_a, 1:4L))
18699+ test(2253.08, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(accented_a, 1:3), paste0(strrep(accented_a, 3L), dots)))
18700+ test(2253.09, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(accented_a, rep(paste0(accented_a, dots), 3L)))
18701+ # Tests for multiple, different length full-width characters
18702+ DT = data.table(strrep(ja_ichi, 1L:4L))
18703+ test(2253.10, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(ja_ichi, 1:4L))
18704+ test(2253.11, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(ja_ichi, 1:3), paste0(strrep(ja_ichi, 3L), dots)))
18705+ test(2253.12, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(ja_ichi, rep(paste0(ja_ichi, dots), 3L)))
18706+ # Tests for combined characters, multiple columns
18707+ DT = data.table(paste0(ja_ichi), strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa")
18708+ test(2253.13, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
18709+ test(2253.14, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
18710+ test(2253.15, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 2L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2), paste0(strrep(ja_ko, 2), dots) , strrep(accented_a, 2), "aa..."))
18711+ test(2253.16, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, paste0(ja_ni, dots), paste0(ja_ko, dots), paste0(accented_a, dots), "a..."))
18712+ # Tests for multiple columns, multiple rows
18713+ DT = data.table(strrep(ja_ko, 1:3L), strrep(ja_n, 2:4L), strrep(accented_a, 3))
18714+ test(2253.17, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
18715+ c(paste0(ja_ko, " ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
18716+ paste0(strrep(ja_ko, 2L), " ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
18717+ paste(strrep(ja_ko, 3L), strrep(ja_n, 4L), strrep(accented_a, 3L))))
18718+ test(2253.18, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
18719+ c(paste0(ja_ko, " ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
18720+ paste0(strrep(ja_ko, 2L), " ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
18721+ paste(strrep(ja_ko, 3L), paste0(strrep(ja_n, 3L), dots), strrep(accented_a, 3L))))
18722+ test(2253.19, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
18723+ c(paste0(ja_ko, " ", paste0(ja_n, dots), " ", paste0(accented_a, dots)),
18724+ paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" "),
18725+ paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" ")))
18726+ # test for data.table with NA, #6441
18727+ test(2253.20, requires_utf8=TRUE, options=list(datatable.prettyprint.char = 1L), data.table(a = c("abc", NA)), output=" a\n1: a...\n2: <NA>")
1869718728
1869818729# allow 1-D matrix in j for consistency, #783
1869918730DT=data.table(a = rep(1:2, 3), b = 1:6)
@@ -20830,21 +20861,17 @@ x = data.table(a=1, b=2L)
2083020861y = data.table(c=1.5, d=1L)
2083120862test(2297.31, y[x, on=.(c == a, d == a), nomatch=NULL], output="Empty data.table (0 rows and 3 cols): c,d,b")
2083220863
20833- local(if (eval(utf8_check_expr)) {
20834- # rbindlist(l, use.names=TRUE) should handle different colnames encodings #5452
20835- x = data.table(a = 1, b = 2, c = 3)
20836- y = data.table(x = 4, y = 5, z = 6)
20837- # a-umlaut, o-umlaut, u-umlaut
20838- setnames(x , c("\u00e4", "\u00f6", "\u00fc"))
20839- setnames(y , iconv(c("\u00f6", "\u00fc", "\u00e4"), from = "UTF-8", to = "latin1"))
20840- test(2298.1, rbindlist(list(x,y), use.names=TRUE), data.table("\u00e4"=c(1,6), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
20841- test(2298.2, rbindlist(list(y,x), use.names=TRUE), data.table("\u00f6"=c(4,2), "\u00fc"=c(5,3), "\u00e4"=c(6,1)))
20842- set(y, j="\u00e4", value=NULL)
20843- test(2298.3, rbindlist(list(x,y), use.names=TRUE, fill=TRUE), data.table("\u00e4"=c(1,NA), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
20844- test(2298.4, rbindlist(list(y,x), use.names=TRUE, fill=TRUE), data.table("\u00f6"=c(4,2), "\u00fc"=c(5,3), "\u00e4"=c(NA,1)))
20845- } else {
20846- cat("Tests 2298.* skipped because they need a UTF-8 locale.\n")
20847- })
20864+ # rbindlist(l, use.names=TRUE) should handle different colnames encodings #5452
20865+ x = data.table(a = 1, b = 2, c = 3)
20866+ y = data.table(x = 4, y = 5, z = 6)
20867+ # a-umlaut, o-umlaut, u-umlaut
20868+ setnames(x , c("\u00e4", "\u00f6", "\u00fc"))
20869+ setnames(y , iconv(c("\u00f6", "\u00fc", "\u00e4"), from = "UTF-8", to = "latin1"))
20870+ test(2298.1, requires_utf8=TRUE, rbindlist(list(x,y), use.names=TRUE), data.table("\u00e4"=c(1,6), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
20871+ test(2298.2, requires_utf8=TRUE, rbindlist(list(y,x), use.names=TRUE), data.table("\u00f6"=c(4,2), "\u00fc"=c(5,3), "\u00e4"=c(6,1)))
20872+ set(y, j="\u00e4", value=NULL)
20873+ test(2298.3, requires_utf8=TRUE, rbindlist(list(x,y), use.names=TRUE, fill=TRUE), data.table("\u00e4"=c(1,NA), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
20874+ test(2298.4, requires_utf8=TRUE, rbindlist(list(y,x), use.names=TRUE, fill=TRUE), data.table("\u00f6"=c(4,2), "\u00fc"=c(5,3), "\u00e4"=c(NA,1)))
2084820875
2084920876# #6592: printing nested single-column frames
2085020877test(2299.01, format_list_item(data.frame(a=1)), output="<data.frame[1x1]>")
@@ -21615,13 +21642,13 @@ if (base::getRversion() >= "4.3.0") { ## follow up of #7213, see #7321
2161521642}
2161621643
2161721644# fwrite: allow dec=',' with single column, #7227
21618- test(2337.1, fwrite(data.table(1), dec=","), NULL )
21645+ test(2337.1, fwrite(data.table(1), dec=","), output = "V1\n1" )
2161921646if (base::getRversion() >= "4.0.0") { # rely on stopifnot(named = ...) for correct message
2162021647 test(2337.2, fwrite(data.table(0.1, 0.2), dec=",", sep=","), error = "dec and sep must be distinct")
2162121648}
21622- test(2337.3, is.null( fwrite(data.table(c(0.1, 0.2)), dec=",", sep="\t")) )
21623- test(2337.4, is.null( fwrite(data.table(a=numeric(), b=numeric()), dec=",", sep=",")) )
21624- test(2337.5, is.null( fwrite(data.table(a=numeric()), dec=",", sep=",")) )
21649+ test(2337.3, fwrite(data.table(c(0.1, 0.2)), dec=",", sep="\t"), output = "V1\n0,1\n0,2" )
21650+ test(2337.4, fwrite(data.table(a=numeric(), b=numeric()), dec=",", sep=","), output = "a,b" )
21651+ test(2337.5, fwrite(data.table(a=numeric()), dec=",", sep=","), output = "a" )
2162521652
2162621653# 2864 force decimal points for whole numbers in numeric columns
2162721654dd = data.table(x=c(1, 2, 3))
0 commit comments