Skip to content

Commit c905427

Browse files
authored
escape two tests for non utf8 environment (#7335)
1 parent 466a2fd commit c905427

File tree

1 file changed

+36
-25
lines changed

1 file changed

+36
-25
lines changed

inst/tests/tests.Rraw

Lines changed: 36 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,20 @@ if (!test_longdouble) {
147147
tt = Sys.getenv("TZ", unset=NA)
148148
TZnotUTC = !identical(tt,"") && !is_utc(tt)
149149

150+
## nice utf8 testing code added in #7210 - to be called from inside local() !
151+
utf8_check_expr = quote(l10n_info()$`UTF-8` || {
152+
lc_ctype = Sys.getlocale('LC_CTYPE')
153+
lc_wantctype = 'en_US.UTF-8'
154+
# Japanese multibyte characters require utf8. As of 2025, we're likely to be already running in a UTF-8 locale, but if not, try this setlocale() call as a last chance.
155+
# Unfortunately, there is no guaranteed, portable way of switching to UTF-8 US English.
156+
# Avoid the warning upon possible failure, #7210.
157+
lc_newctype = suppressWarnings(Sys.setlocale('LC_CTYPE', lc_wantctype))
158+
if (identical(lc_newctype, lc_wantctype)) {
159+
on.exit(Sys.setlocale('LC_CTYPE', lc_ctype))
160+
TRUE
161+
} else FALSE
162+
})
163+
150164
# generate simple error messages from base that are checked against in our tests. this helps
151165
# protect us against these messages evolving in base in the future, and against these messages
152166
# potentially not being produced in English.
@@ -17638,7 +17652,11 @@ test(2194.5, endsWithAny(NA_character_, 'a'), FALSE)
1763817652
test(2194.6, endsWithAny(character(), 'a'), error="Internal error.*types or lengths incorrect")
1763917653
# file used in encoding tests
1764017654
txt = readLines(testDir("issue_563_fread.txt"))
17641-
test(2194.7, endsWithAny(txt, 'B'), error="Internal error.*types or lengths incorrect") # txt is length 5
17655+
local(if (eval(utf8_check_expr)) {
17656+
test(2194.7, endsWithAny(txt, 'B'), error="Internal error.*types or lengths incorrect") # txt is length 5
17657+
} else {
17658+
cat("Test 2194.7 skipped because it needs a UTF-8 locale.\n")
17659+
})
1764217660
test(2194.8, endsWith('abcd', 'd'), error="Internal error.*use endsWithAny")
1764317661

1764417662
# uniqueN(x, by=character()) was internal error, #4594
@@ -18619,19 +18637,8 @@ test(2252.1, dt[, b:=2L], error = "\\[ was called on a data.table.*not data.tabl
1861918637
test(2252.2, dt[, let(b=2L)], error = "\\[ was called on a data.table.*not data.table-aware.*'let'")
1862018638
rm(.datatable.aware)
1862118639

18622-
# tests for trunc.char handling wide characters # 5096
18623-
local(if (l10n_info()$`UTF-8` || {
18624-
lc_ctype = Sys.getlocale('LC_CTYPE')
18625-
lc_wantctype = 'en_US.UTF-8'
18626-
# Japanese multibyte characters require utf8. As of 2025, we're likely to be already running in a UTF-8 locale, but if not, try this setlocale() call as a last chance.
18627-
# Unfortunately, there is no guaranteed, portable way of switching to UTF-8 US English.
18628-
# Avoid the warning upon possible failure, #7210.
18629-
lc_newctype = suppressWarnings(Sys.setlocale('LC_CTYPE', lc_wantctype))
18630-
if (identical(lc_newctype, lc_wantctype)) {
18631-
on.exit(Sys.setlocale('LC_CTYPE', lc_ctype))
18632-
TRUE
18633-
} else FALSE
18634-
}) {
18640+
# tests for trunc.char handling wide characters #5096
18641+
local(if (eval(utf8_check_expr)) {
1863518642
accented_a = "\u0061\u0301"
1863618643
ja_ichi = "\u4E00"
1863718644
ja_ni = "\u4E8C"
@@ -20820,17 +20827,21 @@ x = data.table(a=1, b=2L)
2082020827
y = data.table(c=1.5, d=1L)
2082120828
test(2297.31, y[x, on=.(c == a, d == a), nomatch=NULL], output="Empty data.table (0 rows and 3 cols): c,d,b")
2082220829

20823-
# rbindlist(l, use.names=TRUE) should handle different colnames encodings #5452
20824-
x = data.table(a = 1, b = 2, c = 3)
20825-
y = data.table(x = 4, y = 5, z = 6)
20826-
# a-umlaut, o-umlaut, u-umlaut
20827-
setnames(x , c("\u00e4", "\u00f6", "\u00fc"))
20828-
setnames(y , iconv(c("\u00f6", "\u00fc", "\u00e4"), from = "UTF-8", to = "latin1"))
20829-
test(2298.1, rbindlist(list(x,y), use.names=TRUE), data.table("\u00e4"=c(1,6), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
20830-
test(2298.2, rbindlist(list(y,x), use.names=TRUE), data.table("\u00f6"=c(4,2), "\u00fc"=c(5,3), "\u00e4"=c(6,1)))
20831-
set(y, j="\u00e4", value=NULL)
20832-
test(2298.3, rbindlist(list(x,y), use.names=TRUE, fill=TRUE), data.table("\u00e4"=c(1,NA), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
20833-
test(2298.4, rbindlist(list(y,x), use.names=TRUE, fill=TRUE), data.table("\u00f6"=c(4,2), "\u00fc"=c(5,3), "\u00e4"=c(NA,1)))
20830+
local(if (eval(utf8_check_expr)) {
20831+
# rbindlist(l, use.names=TRUE) should handle different colnames encodings #5452
20832+
x = data.table(a = 1, b = 2, c = 3)
20833+
y = data.table(x = 4, y = 5, z = 6)
20834+
# a-umlaut, o-umlaut, u-umlaut
20835+
setnames(x , c("\u00e4", "\u00f6", "\u00fc"))
20836+
setnames(y , iconv(c("\u00f6", "\u00fc", "\u00e4"), from = "UTF-8", to = "latin1"))
20837+
test(2298.1, rbindlist(list(x,y), use.names=TRUE), data.table("\u00e4"=c(1,6), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
20838+
test(2298.2, rbindlist(list(y,x), use.names=TRUE), data.table("\u00f6"=c(4,2), "\u00fc"=c(5,3), "\u00e4"=c(6,1)))
20839+
set(y, j="\u00e4", value=NULL)
20840+
test(2298.3, rbindlist(list(x,y), use.names=TRUE, fill=TRUE), data.table("\u00e4"=c(1,NA), "\u00f6"=c(2,4), "\u00fc"=c(3,5)))
20841+
test(2298.4, rbindlist(list(y,x), use.names=TRUE, fill=TRUE), data.table("\u00f6"=c(4,2), "\u00fc"=c(5,3), "\u00e4"=c(NA,1)))
20842+
} else {
20843+
cat("Tests 2298.* skipped because they need a UTF-8 locale.\n")
20844+
})
2083420845

2083520846
# #6592: printing nested single-column frames
2083620847
test(2299.01, format_list_item(data.frame(a=1)), output="<data.frame[1x1]>")

0 commit comments

Comments
 (0)