Skip to content

Commit 38b7af2

Browse files
committed
tests/lapi: utf8 tests
1 parent 0508724 commit 38b7af2

File tree

5 files changed

+202
-0
lines changed

5 files changed

+202
-0
lines changed

tests/lapi/utf8_char_test.lua

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
--[[
2+
SPDX-License-Identifier: ISC
3+
Copyright (c) 2023-2025, Sergey Bronnikov.
4+
5+
6.5 – UTF-8 Support
6+
https://www.lua.org/manual/5.3/manual.html#6.5
7+
8+
'utf8.codes' does not raise an error on spurious continuation bytes,
9+
https://github.com/lua/lua/commit/a1089b415a3f5c753aa1b40758ffdaf28d5701b0
10+
11+
Synopsis: utf8.char(...)
12+
]]
13+
14+
-- TODO: https://github.com/tst2005/lua-utf8string
15+
-- - https://www.lua.org/manual/5.3/manual.html#6.5
16+
-- - Numbers taken from table 3-7 in
17+
-- - www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf
18+
-- - Find-based solution inspired by
19+
-- - http://notebook.kulchenko.com/programming/fixing-malformed-utf8-in-lua
20+
-- - check utf8 https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c
21+
-- - corpus: https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
22+
-- - https://git.neulandlabor.de/j3d1/json/commit/15b6421d0789a5402275358d43719f4b37979929
23+
-- - https://github.com/geoffmcl/utf8-test/tree/master/src
24+
-- - https://github.com/kikito/utf8_validator.lua/blob/master/utf8_validator.lua
25+
-- - https://en.wikipedia.org/wiki/UTF-8#Invalid_sequences_and_error_handling
26+
27+
local luzer = require("luzer")
28+
local test_lib = require("lib")
29+
local MAX_INT = test_lib.MAX_INT
30+
31+
-- The function introduced in Lua 5.3.
32+
if test_lib.lua_version() == "LuaJIT" then
33+
os.exit()
34+
end
35+
36+
local function TestOneInput(buf)
37+
local fdp = luzer.FuzzedDataProvider(buf)
38+
local n = fdp:consume_integer(1, MAX_INT)
39+
local ch = fdp:consume_integers(0, MAX_INT, n)
40+
os.setlocale(test_lib.random_locale(fdp), "all")
41+
utf8.char(unpack(ch))
42+
end
43+
44+
local args = {
45+
artifact_prefix = "utf8_char_",
46+
}
47+
luzer.Fuzz(TestOneInput, nil, args)

tests/lapi/utf8_codepoint_test.lua

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
--[=[[
2+
SPDX-License-Identifier: ISC
3+
Copyright (c) 2023-2025, Sergey Bronnikov.
4+
5+
6.5 – UTF-8 Support
6+
https://www.lua.org/manual/5.3/manual.html#6.5
7+
8+
'utf8.codes' does not raise an error on spurious continuation bytes,
9+
https://github.com/lua/lua/commit/a1089b415a3f5c753aa1b40758ffdaf28d5701b0
10+
11+
Synopsis: utf8.codepoint(s [, i [, j [, lax]]])
12+
]]=]
13+
14+
-- http://howardhinnant.github.io/utf_summary.html
15+
-- TODO: https://github.com/tst2005/lua-utf8string
16+
17+
local luzer = require("luzer")
18+
local test_lib = require("lib")
19+
local MAX_INT = test_lib.MAX_INT
20+
21+
-- The function introduced in Lua 5.3.
22+
if test_lib.lua_version() == "LuaJIT" then
23+
os.exit()
24+
end
25+
26+
local function TestOneInput(buf)
27+
local fdp = luzer.FuzzedDataProvider(buf)
28+
local max_len = fdp:consume_integer(1, MAX_INT)
29+
local s = fdp:consume_string(max_len)
30+
local i = fdp:consume_integer(0, max_len)
31+
local j = fdp:consume_integer(0, max_len)
32+
local lax = fdp:consume_integer(1, test_lib.MAX_INT)
33+
os.setlocale(test_lib.random_locale(fdp), "all")
34+
utf8.codepoint(s, i, j, lax)
35+
end
36+
37+
local args = {
38+
artifact_prefix = "utf8_codepoint_",
39+
}
40+
luzer.Fuzz(TestOneInput, nil, args)

tests/lapi/utf8_codes_test.lua

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--[[
2+
SPDX-License-Identifier: ISC
3+
Copyright (c) 2023-2025, Sergey Bronnikov.
4+
5+
6.5 – UTF-8 Support
6+
https://www.lua.org/manual/5.3/manual.html#6.5
7+
8+
'utf8.codes' does not raise an error on spurious continuation bytes,
9+
https://github.com/lua/lua/commit/a1089b415a3f5c753aa1b40758ffdaf28d5701b0
10+
11+
Synopsis: utf8.codes(s [, lax])
12+
]]
13+
14+
-- TODO: https://github.com/tst2005/lua-utf8string
15+
16+
local luzer = require("luzer")
17+
local test_lib = require("lib")
18+
local MAX_INT = test_lib.MAX_INT
19+
20+
-- The function introduced in Lua 5.3.
21+
if test_lib.lua_version() == "LuaJIT" then
22+
os.exit()
23+
end
24+
25+
local function TestOneInput(buf)
26+
local fdp = luzer.FuzzedDataProvider(buf)
27+
local max_len = fdp:consume_integer(1, MAX_INT)
28+
local s = fdp:consume_string(max_len)
29+
local lax = fdp:consume_integer(0, MAX_INT)
30+
os.setlocale(test_lib.random_locale(fdp), "all")
31+
utf8.codes(s, lax)
32+
end
33+
34+
local args = {
35+
artifact_prefix = "utf8_codes_",
36+
}
37+
luzer.Fuzz(TestOneInput, nil, args)

tests/lapi/utf8_len_test.lua

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
--[=[[
2+
SPDX-License-Identifier: ISC
3+
Copyright (c) 2023-2025, Sergey Bronnikov.
4+
5+
6.5 – UTF-8 Support
6+
https://www.lua.org/manual/5.3/manual.html#6.5
7+
8+
'utf8.codes' does not raise an error on spurious continuation bytes,
9+
https://github.com/lua/lua/commit/a1089b415a3f5c753aa1b40758ffdaf28d5701b0
10+
11+
Synopsis: utf8.len(s [, i [, j [, lax]]])
12+
]]=]
13+
14+
-- TODO: https://github.com/tst2005/lua-utf8string
15+
16+
local luzer = require("luzer")
17+
local test_lib = require("lib")
18+
local MAX_INT = test_lib.MAX_INT
19+
20+
-- The function introduced in Lua 5.3.
21+
if test_lib.lua_version() == "LuaJIT" then
22+
os.exit()
23+
end
24+
25+
local function TestOneInput(buf)
26+
local fdp = luzer.FuzzedDataProvider(buf)
27+
local max_len = fdp:consume_integer(1, MAX_INT)
28+
local s = fdp:consume_string(max_len)
29+
local i = fdp:consume_integer(0, MAX_INT)
30+
local j = fdp:consume_integer(0, MAX_INT)
31+
local lax = fdp:consume_integer(0, MAX_INT)
32+
os.setlocale(test_lib.random_locale(fdp), "all")
33+
utf8.len(s, i, j, lax)
34+
end
35+
36+
local args = {
37+
artifact_prefix = "utf8_len_",
38+
}
39+
luzer.Fuzz(TestOneInput, nil, args)

tests/lapi/utf8_offset_test.lua

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
--[[
2+
SPDX-License-Identifier: ISC
3+
Copyright (c) 2023-2025, Sergey Bronnikov.
4+
5+
6.5 – UTF-8 Support
6+
https://www.lua.org/manual/5.3/manual.html#6.5
7+
8+
'utf8.codes' does not raise an error on spurious continuation bytes,
9+
https://github.com/lua/lua/commit/a1089b415a3f5c753aa1b40758ffdaf28d5701b0
10+
11+
Synopsis: utf8.offset(s, n [, i])
12+
]]
13+
14+
-- TODO: https://github.com/tst2005/lua-utf8string
15+
16+
local luzer = require("luzer")
17+
local test_lib = require("lib")
18+
local MAX_INT = test_lib.MAX_INT
19+
local MIN_INT = test_lib.MIN_INT
20+
21+
-- The function introduced in Lua 5.3.
22+
if test_lib.lua_version() == "LuaJIT" then
23+
os.exit()
24+
end
25+
26+
local function TestOneInput(buf)
27+
local fdp = luzer.FuzzedDataProvider(buf)
28+
local max_len = fdp:consume_integer(0, MAX_INT)
29+
local s = fdp:consume_string(max_len)
30+
local n = fdp:consume_integer(MIN_INT, MAX_INT)
31+
local i = fdp:consume_integer(1, MAX_INT)
32+
os.setlocale(test_lib.random_locale(fdp), "all")
33+
utf8.offset(s, n, i)
34+
end
35+
36+
local args = {
37+
artifact_prefix = "utf8_offset_",
38+
}
39+
luzer.Fuzz(TestOneInput, nil, args)

0 commit comments

Comments
 (0)