Skip to content

Commit 10c7dd6

Browse files
authored
Fix Windows Parsing Issue (#6150)
* refactor tests to no longer use explicit japanese characters, surround by setlocale * wrap failing tests to use utf8 locale * remove trailing whitespaces * review suggestions * move strrep to top of file
1 parent cbe491f commit 10c7dd6

File tree

1 file changed

+57
-38
lines changed

1 file changed

+57
-38
lines changed

inst/tests/tests.Rraw

Lines changed: 57 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,11 @@ with_c_collate = function(expr) {
207207
expr
208208
}
209209

210+
# strrep is used many times in tests, but is from R 3.3.0, so use this equivalent if it is missing.
211+
if (!exists("strrep", "package:base")) {
212+
strrep = function(x, times) mapply(function(x, times) paste(rep(x, times), collapse=""), rep_len(x, length(times)), times, USE.NAMES=FALSE)
213+
}
214+
210215
##########################
211216
.do_not_rm = ls() # objects that exist at this point should not be removed by rm_all(); e.g. test_*, base_messages, Ctest_dt_win_snprintf, prevtest, etc
212217
##########################
@@ -18428,44 +18433,58 @@ test(2252.2, dt[, let(b=2L)], error = "\\[ was called on a data.table.*not data.
1842818433
rm(.datatable.aware)
1842918434

1843018435
# tests for trunc.char handling wide characters # 5096
18431-
accented_a = "\u0061\u0301"
18432-
ja_ichi = "\u4E00"
18433-
ja_ni = "\u4E8C"
18434-
ja_ko = "\u3053"
18435-
ja_n = "\u3093"
18436-
dots = "..."
18437-
clean_regex = "^\\d+:\\s+" # removes row numbering from beginning of output
18438-
# Tests for combining character latin a and acute accent, single row
18439-
DT = data.table(strrep(accented_a, 4L))
18440-
test(2253.01, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(accented_a, 4L))
18441-
test(2253.02, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(accented_a, 3L), dots))
18442-
test(2253.03, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(accented_a, 1L), dots))
18443-
# Tests for full-width japanese character ichi, single row
18444-
DT = data.table(strrep(ja_ichi, 4L))
18445-
test(2253.04, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(ja_ichi, 4L))
18446-
test(2253.05, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(ja_ichi, 3L), dots))
18447-
test(2253.06, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(ja_ichi, 1L), dots))
18448-
# Tests for multiple, different length combining character rows
18449-
DT = data.table(strrep(accented_a, 1L:4L))
18450-
test(2253.07, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c("á", "áá", "ááá", "áááá"))
18451-
test(2253.08, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c("á", "áá", "ááá", "ááá..."))
18452-
test(2253.09, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c("á", "á...", "á...", "á..."))
18453-
# Tests for multiple, different length full-width characters
18454-
DT = data.table(strrep(ja_ichi, 1L:4L))
18455-
test(2253.10, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c("一", "一一", "一一一", "一一一一"))
18456-
test(2253.11, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c("一", "一一", "一一一", "一一一..."))
18457-
test(2253.12, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c("一", "一...", "一...", "一..."))
18458-
# Tests for combined characters, multiple columns
18459-
DT = data.table(paste0(ja_ichi), strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa")
18460-
test(2253.13, options=list(datatable.prettyprint.char = 4L), capture.output(print(DT))[-1L], "1: 一 二二 こここ áá aaa")
18461-
test(2253.14, options=list(datatable.prettyprint.char = 3L), capture.output(print(DT))[-1L], "1: 一 二二 こここ áá aaa")
18462-
test(2253.15, options=list(datatable.prettyprint.char = 2L), capture.output(print(DT))[-1L], "1: 一 二二 ここ... áá aa...")
18463-
test(2253.16, options=list(datatable.prettyprint.char = 1L), capture.output(print(DT))[-1L], "1: 一 二... こ... á... a...")
18464-
# Tests for multiple columns, multiple rows
18465-
DT = data.table(strrep(ja_ko, 1:3L), strrep(ja_n, 2:4L), strrep(accented_a, 3))
18466-
test(2253.17, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c("こ んん ááá", "ここ んんん ááá", "こここ んんんん ááá"))
18467-
test(2253.18, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c("こ んん ááá", "ここ んんん ááá", "こここ んんん... ááá"))
18468-
test(2253.19, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c("こ ん... á...", "こ... ん... á...", "こ... ん... á..."))
18436+
local({
18437+
lc_ctype = Sys.getlocale('LC_CTYPE')
18438+
Sys.setlocale('LC_CTYPE', "en_US.UTF-8") # Japanese multibyte characters require utf8
18439+
on.exit({Sys.setlocale('LC_CTYPE', lc_ctype)})
18440+
accented_a = "\u0061\u0301"
18441+
ja_ichi = "\u4E00"
18442+
ja_ni = "\u4E8C"
18443+
ja_ko = "\u3053"
18444+
ja_n = "\u3093"
18445+
dots = "..."
18446+
clean_regex = "^\\d+:\\s+" # removes row numbering from beginning of output
18447+
# Tests for combining character latin a and acute accent, single row
18448+
DT = data.table(strrep(accented_a, 4L))
18449+
test(2253.01, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(accented_a, 4L))
18450+
test(2253.02, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(accented_a, 3L), dots))
18451+
test(2253.03, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(accented_a, 1L), dots))
18452+
# Tests for full-width japanese character ichi, single row
18453+
DT = data.table(strrep(ja_ichi, 4L))
18454+
test(2253.04, options=list(datatable.prettyprint.char = 4L), DT, output=strrep(ja_ichi, 4L))
18455+
test(2253.05, options=list(datatable.prettyprint.char = 3L), DT, output=paste0(strrep(ja_ichi, 3L), dots))
18456+
test(2253.06, options=list(datatable.prettyprint.char = 1L), DT, output=paste0(strrep(ja_ichi, 1L), dots))
18457+
# Tests for multiple, different length combining character rows
18458+
DT = data.table(strrep(accented_a, 1L:4L))
18459+
test(2253.07, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(accented_a, 1:4L))
18460+
test(2253.08, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(accented_a, 1:3), paste0(strrep(accented_a, 3L), dots)))
18461+
test(2253.09, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(accented_a, rep(paste0(accented_a, dots), 3L)))
18462+
# Tests for multiple, different length full-width characters
18463+
DT = data.table(strrep(ja_ichi, 1L:4L))
18464+
test(2253.10, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), strrep(ja_ichi, 1:4L))
18465+
test(2253.11, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(strrep(ja_ichi, 1:3), paste0(strrep(ja_ichi, 3L), dots)))
18466+
test(2253.12, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]), c(ja_ichi, rep(paste0(ja_ichi, dots), 3L)))
18467+
# Tests for combined characters, multiple columns
18468+
DT = data.table(paste0(ja_ichi), strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa")
18469+
test(2253.13, options=list(datatable.prettyprint.char = 4L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
18470+
test(2253.14, options=list(datatable.prettyprint.char = 3L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2L), strrep(ja_ko, 3L), strrep(accented_a, 2L), "aaa"))
18471+
test(2253.15, options=list(datatable.prettyprint.char = 2L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, strrep(ja_ni, 2), paste0(strrep(ja_ko, 2), dots) , strrep(accented_a, 2), "aa..."))
18472+
test(2253.16, options=list(datatable.prettyprint.char = 1L), capture.output(print(DT))[-1L], paste("1:", ja_ichi, paste0(ja_ni, dots), paste0(ja_ko, dots), paste0(accented_a, dots), "a..."))
18473+
# Tests for multiple columns, multiple rows
18474+
DT = data.table(strrep(ja_ko, 1:3L), strrep(ja_n, 2:4L), strrep(accented_a, 3))
18475+
test(2253.17, options=list(datatable.prettyprint.char = 4L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
18476+
c(paste0(ja_ko, " ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
18477+
paste0(strrep(ja_ko, 2L), " ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
18478+
paste(strrep(ja_ko, 3L), strrep(ja_n, 4L), strrep(accented_a, 3L))))
18479+
test(2253.18, options=list(datatable.prettyprint.char = 3L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
18480+
c(paste0(ja_ko, " ", strrep(ja_n, 2L), " ", strrep(accented_a, 3L)),
18481+
paste0(strrep(ja_ko, 2L), " ", strrep(ja_n, 3L), " ", strrep(accented_a, 3L)),
18482+
paste(strrep(ja_ko, 3L), paste0(strrep(ja_n, 3L), dots), strrep(accented_a, 3L))))
18483+
test(2253.19, options=list(datatable.prettyprint.char = 1L), gsub(clean_regex, "", capture.output(print(DT))[-1L]),
18484+
c(paste0(ja_ko, " ", paste0(ja_n, dots), " ", paste0(accented_a, dots)),
18485+
paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" "),
18486+
paste0(c(ja_ko, ja_n, accented_a), dots, collapse=" ")))
18487+
})
1846918488

1847018489
# allow 1-D matrix in j for consistency, #783
1847118490
DT=data.table(a = rep(1:2, 3), b = 1:6)

0 commit comments

Comments
 (0)