From 0837c064f7785e5b74dab434ce4d66d49668db57 Mon Sep 17 00:00:00 2001 From: Sergey Bronnikov Date: Wed, 26 Mar 2025 14:06:34 +0300 Subject: [PATCH] tests/lapi: add string.buffer tests The patch add tests for LuaJIT's string buffer library [1]. Note, as it is stated in documentation [1] this serialization format is designed for internal use by LuaJIT applications, and this format is explicitly not intended to be a 'public standard' for structured data interchange across computer languages (like JSON or MessagePack). The purpose of the proposed tests is testing the library because other LuaJIT components relies on it and also the proposed tests indirectly tests FFI library. 1. https://luajit.org/ext_buffer.html --- tests/lapi/string_buffer_encode_test.lua | 52 ++++ tests/lapi/string_buffer_torture_test.lua | 288 ++++++++++++++++++++++ 2 files changed, 340 insertions(+) create mode 100644 tests/lapi/string_buffer_encode_test.lua create mode 100644 tests/lapi/string_buffer_torture_test.lua diff --git a/tests/lapi/string_buffer_encode_test.lua b/tests/lapi/string_buffer_encode_test.lua new file mode 100644 index 0000000..3aac0e8 --- /dev/null +++ b/tests/lapi/string_buffer_encode_test.lua @@ -0,0 +1,52 @@ +--[[ +SPDX-License-Identifier: ISC +Copyright (c) 2023-2025, Sergey Bronnikov. + +String Buffer Library, +https://luajit.org/ext_buffer.html + +ITERN deoptimization might skip elements, +https://github.com/LuaJIT/LuaJIT/issues/727 + +buffer.decode() may produce ill-formed cdata resulting in invalid memory accesses, +https://github.com/LuaJIT/LuaJIT/issues/795 + +Add missing GC steps to string buffer methods, +https://github.com/LuaJIT/LuaJIT/commit/9c3df68a + +Fix string buffer method recording, +https://github.com/LuaJIT/LuaJIT/commit/bfd07653 +]] + +local luzer = require("luzer") +local test_lib = require("lib") + +-- LuaJIT only. +if test_lib.lua_version() ~= "LuaJIT" then + print("Unsupported version.") + os.exit(0) +end + +local string_buf = require("string.buffer") + +local function TestOneInput(buf, _size) + local fdp = luzer.FuzzedDataProvider(buf) + local obj = fdp:consume_string(test_lib.MAX_STR_LEN) + + local MAX_SIZE = 1000 + local buf_size = fdp:consume_integer(0, MAX_SIZE) + local b = string_buf.new(buf_size) + local decoded, err = pcall(b.decode, obj) + if err then + return + end + local encoded = b:encode(decoded) + assert(obj == encoded) + b:reset() + b:free() +end + +local args = { + artifact_prefix = "string_buffer_encode_", +} +luzer.Fuzz(TestOneInput, nil, args) diff --git a/tests/lapi/string_buffer_torture_test.lua b/tests/lapi/string_buffer_torture_test.lua new file mode 100644 index 0000000..70724b3 --- /dev/null +++ b/tests/lapi/string_buffer_torture_test.lua @@ -0,0 +1,288 @@ +--[[ +SPDX-License-Identifier: ISC +Copyright (c) 2023-2025, Sergey Bronnikov. + +String Buffer Library, +https://luajit.org/ext_buffer.html + +Recording of buffer:set can anchor wrong object, +https://github.com/LuaJIT/LuaJIT/issues/1125 + +String buffer methods may be called one extra time after loop, +https://github.com/LuaJIT/LuaJIT/issues/755 + +Traceexit in recff_buffer_method_put and recff_buffer_method_get +might redo work, https://github.com/LuaJIT/LuaJIT/issues/798 + +Invalid bufput_bufstr fold over lj_serialize_encode, +https://github.com/LuaJIT/LuaJIT/issues/799 + +COW buffer might not copy, +https://github.com/LuaJIT/LuaJIT/issues/816 + +String buffer API, +https://github.com/LuaJIT/LuaJIT/issues/14 + +Add missing GC steps to string buffer methods, +https://github.com/LuaJIT/LuaJIT/commit/9c3df68a +]] + +local luzer = require("luzer") +local test_lib = require("lib") + +-- LuaJIT only. +if test_lib.lua_version() ~= "LuaJIT" then + print("Unsupported version.") + os.exit(0) +end + +local ffi = require("ffi") +local string_buf = require("string.buffer") +local unpack = unpack or table.unpack + +local formats = { -- luacheck: no unused + "complex", + "false", + "int", + "int64", + "lightud32", + "lightud64", + "nil", + "null", + "num", + "string", + "tab", + "tab_mt", + "true", + "uint64", +} + +-- Reset (empty) the buffer. The allocated buffer space is not +-- freed and may be reused. +-- Usage: buf = buf:reset() +local function buffer_reset(self) + self.buf:reset() +end + +-- Appends the formatted arguments to the buffer. The format +-- string supports the same options as `string.format()`. +-- Usage: buf = buf:putf(format, ...) +local function buffer_putf(self) -- luacheck: no unused + local MAX_N = 1000 + local str = self.fdp:consume_string(MAX_N) + self.buf:putf("%s", str) +end + +-- Appends the given len number of bytes from the memory pointed +-- to by the FFI cdata object to the buffer. The object needs to +-- be convertible to a (constant) pointer. +-- Usage: buf = buf:putcdata(cdata, len) +local function buffer_putcdata(self) -- luacheck: no unused + local cdata = ffi.new("uint8_t[?]", 1) + self.buf:putcdata(cdata, ffi.sizeof(cdata)) +end + +-- This method allows zero-copy consumption of a string or an FFI +-- cdata object as a buffer. It stores a reference to the passed +-- string `str` or the FFI cdata object in the buffer. Any buffer +-- space originally allocated is freed. This is not an append +-- operation, unlike the buf:put*() methods. +local function buffer_set(self) + local MAX_N = 1000 + local str = self.fdp:consume_string(MAX_N) + self.buf:set(str) +end + +-- Appends a string str, a number num or any object obj with +-- a `__tostring` metamethod to the buffer. Multiple arguments are +-- appended in the given order. Appending a buffer to a buffer is +-- possible and short-circuited internally. But it still involves +-- a copy. Better combine the buffer writes to use a single buffer. +-- Usage: buf = buf:put([str | num | obj] [, ...]) +local function buffer_put(self) + local obj_type = self.fdp:oneof({ "string", "number" }) + local MAX_COUNT = 10 + local MAX_N = 1000 + local count = self.fdp:consume_integer(0, MAX_COUNT) + local objects + if obj_type == "string" then + objects = self.fdp:consume_strings(MAX_N, count) + elseif obj_type == "number" then + objects = self.fdp:consume_numbers( + test_lib.MIN_INT64, test_lib.MAX_INT64, count) + else + assert(nil, "object type is unsupported") + end + local buf = self.buf:put(unpack(objects)) + assert(type(buf) == "cdata") +end + +-- Consumes the buffer data and returns one or more strings. If +-- called without arguments, the whole buffer data is consumed. +-- If called with a number, up to len bytes are consumed. A `nil` +-- argument consumes the remaining buffer space (this only makes +-- sense as the last argument). Multiple arguments consume the +-- buffer data in the given order. +-- Note: a zero length or no remaining buffer data returns an +-- empty string and not nil. +-- Usage: str, ... = buf:get([ len|nil ] [,...]) +local function buffer_get(self) + local MAX_N = 1000 + local len = self.fdp:consume_integer(0, MAX_N) + local str = self.buf:get(len) + assert(type(str) == "string") +end + +local function buffer_tostring(self) + local str = self.buf:tostring() + assert(type(str) == "string") +end + +-- The commit method appends the `used` bytes of the previously +-- returned write space to the buffer data. +-- Usage: buf = buf:commit(used) +local function buffer_commit(self) + local MAX_N = 1000 + local used = self.fdp:consume_integer(0, MAX_N) + local _ = self.buf:commit(used) -- luacheck: no unused +end + +-- The reserve method reserves at least `size` bytes of write +-- space in the buffer. It returns an `uint8_t *` FFI cdata +-- pointer `ptr` that points to this space. The space returned by +-- `buf:reserve()` starts at the returned pointer and ends before +-- len bytes after that. +-- Usage: ptr, len = buf:reserve(size) +local function buffer_reserve(self) + local size = self.fdp:consume_integer(0, test_lib.MAX_INT) + local ptr, len = self.buf:reserve(size) + assert(type(ptr) == "number") + assert(type(len) == "number") +end + +-- Skips (consumes) `len` bytes from the buffer up to the current +-- length of the buffer data. +-- Usage: buf = buf:skip(len) +local function buffer_skip(self) + local len = self.fdp:consume_integer(0, test_lib.MAX_INT) + local buf = self.buf:skip(len) + assert(type(buf) == "cdata") +end + +-- Returns an uint8_t * FFI cdata pointer ptr that points to the +-- buffer data. The length of the buffer data in bytes is returned +-- in `len`. The space returned by `buf:ref()` starts at the +-- returned pointer and ends before len bytes after that. +-- Synopsis: ptr, len = buf:ref() +local function buffer_ref(self) + local ptr, len = self.buf:ref() + assert(type(ptr) == "number") + assert(type(len) == "number") +end + +-- Returns the current length of the buffer data in bytes. +local function buffer_len(self) + local len = #self.buf + assert(type(len) == "number") +end + +-- The Lua concatenation operator `..` also accepts buffers, just +-- like strings or numbers. It always returns a string and not +-- a buffer. +local function buffer_concat(self) + local MAX_N = 1000 + local str = self.fdp:consume_string(0, MAX_N) + local _ = self.buf .. str +end + +-- Serializes (encodes) the Lua object `obj`. The stand-alone +-- function returns a string `str`. The buffer method appends the +-- encoding to the buffer. `obj` can be any of the supported Lua +-- types - it doesn't need to be a Lua table. +-- This function may throw an error when attempting to serialize +-- unsupported object types, circular references or deeply nested +-- tables. +-- Usage: +-- str = buffer.encode(obj) +-- buf = buf:encode(obj) +local function buffer_encode(self) + local str = self.buf:encode() + assert(type(str) == "string") +end + +-- The stand-alone function deserializes (decodes) the string +-- `str`, the buffer method deserializes one object from the +-- buffer. Both return a Lua object `obj`. +-- The returned object may be any of the supported Lua types - +-- even nil. This function may throw an error when fed with +-- malformed or incomplete encoded data. The stand-alone function +-- throws when there's left-over data after decoding a single +-- top-level object. The buffer method leaves any left-over data +-- in the buffer. +-- Attempting to deserialize an FFI type will throw an error, if +-- the FFI library is not built-in or has not been loaded, yet. +-- Usage: +-- obj = buffer.decode(str) +-- obj = buf:decode() +local function buffer_decode(self) + local MAX_N = 1000 + local str = self.fdp:consume_string(0, MAX_N) + local obj = self.buf:decode(str) + assert(type(obj) == "cdata") +end + +-- The buffer space of the buffer object is freed. The object +-- itself remains intact, empty and may be reused. +local function buffer_free(self) + self.buf:free() + assert(#self.buf == 0) +end + +local buffer_methods = { + buffer_commit, + buffer_concat, + buffer_decode, + buffer_encode, + buffer_get, + buffer_len, + buffer_put, + buffer_putcdata, + buffer_putf, + buffer_ref, + buffer_reserve, + buffer_reset, + buffer_set, + buffer_skip, + buffer_tostring, +} + +local function buffer_random_op(self) + local buffer_method= self.fdp:oneof(buffer_methods) + buffer_method(self) +end + +local function buffer_new(fdp) + local buf_size = fdp:consume_number(1, test_lib.MAX_INT) + local b = string_buf.new(buf_size) + return { + buf = b, + fdp = fdp, + free = buffer_free, + random_operation = buffer_random_op, + } +end + +local function TestOneInput(buf, _size) + local fdp = luzer.FuzzedDataProvider(buf) + local nops = fdp:consume_number(1, test_lib.MAX_INT) + local b = buffer_new(fdp) + for _ = 1, nops do + b:random_operation() + end + b:free() +end + +local args = { + artifact_prefix = "string_buffer_torture_", +} +luzer.Fuzz(TestOneInput, nil, args)