From e5b5d8ab9368ee3c0d072ecb5d10b9675e2aa19c Mon Sep 17 00:00:00 2001 From: Sergey Bronnikov Date: Tue, 21 Jan 2025 14:56:37 +0300 Subject: [PATCH 1/8] Fix typos in memprof doc --- doc/tooling/luajit_memprof.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/tooling/luajit_memprof.rst b/doc/tooling/luajit_memprof.rst index 87a793e9f5..73d71b82b2 100644 --- a/doc/tooling/luajit_memprof.rst +++ b/doc/tooling/luajit_memprof.rst @@ -60,7 +60,7 @@ Below is a chunk of Lua code named ``test.lua`` to illustrate this. append('q', i) ) end - local stp, err = misc.memprof.stop() + local str, err = misc.memprof.stop() The Lua code for starting the profiler -- as in line 3 in the test.lua example above -- is: @@ -81,7 +81,7 @@ The Lua code for stopping the profiler -- as in line 18 in the test.lua example .. code-block:: lua - local stp, err = misc.memprof.stop() + local str, err = misc.memprof.stop() If the operation fails, for example if there is an error when the file descriptor is being closed From 6f2116102c98152f004014f5bdc9dcc29cf2e839 Mon Sep 17 00:00:00 2001 From: Sergey Bronnikov Date: Mon, 20 Jan 2025 12:38:59 +0300 Subject: [PATCH 2/8] Fix a broken link in memprof doc --- doc/tooling/luajit_memprof.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/tooling/luajit_memprof.rst b/doc/tooling/luajit_memprof.rst index 73d71b82b2..6e548a1f2d 100644 --- a/doc/tooling/luajit_memprof.rst +++ b/doc/tooling/luajit_memprof.rst @@ -115,7 +115,7 @@ a profiling report: :ref:`note about jitoff ` for more details. * Lines 6-8: Tail call optimization doesn't create a new call frame, so all - allocations inside the function called via the ``CALLT/CALLMT`` `bytecodes `_ + allocations inside the function called via the ``CALLT/CALLMT`` `bytecodes `_ are attributed to the function's caller. See also the comments preceding these lines. * Lines 14-16: Usually the information about allocations inside Lua built‑ins From a1210441e69a2b13209b32dae7d61e48343f155d Mon Sep 17 00:00:00 2001 From: Sergey Bronnikov Date: Fri, 17 Jan 2025 15:17:39 +0300 Subject: [PATCH 3/8] LuaJIT platform profiler documentation Introduce a new document on LuaJIT platform profiler * LuaJIT platform profiler is a new feature implemented in Tarantool 2.10.0. The document describes the profiler's behavior as of this and next Tarantool versions. * The document is placed in the Tooling chapter. Closes #2587 --- doc/tooling/index.rst | 1 + doc/tooling/luajit_sysprof.rst | 320 +++++++++++++++++++++++++++++++++ 2 files changed, 321 insertions(+) create mode 100644 doc/tooling/luajit_sysprof.rst diff --git a/doc/tooling/index.rst b/doc/tooling/index.rst index b078a6a6ab..218586f315 100644 --- a/doc/tooling/index.rst +++ b/doc/tooling/index.rst @@ -11,4 +11,5 @@ to work with Tarantool. tcm/index interactive_console luajit_memprof + luajit_sysprof luajit_getmetrics diff --git a/doc/tooling/luajit_sysprof.rst b/doc/tooling/luajit_sysprof.rst new file mode 100644 index 0000000000..1037766295 --- /dev/null +++ b/doc/tooling/luajit_sysprof.rst @@ -0,0 +1,320 @@ +.. _luajit_sysprof: + +LuaJIT platform profiler +======================== + +The default profiling options for LuaJIT are not fine enough to +get an understanding of performance. For example, performance only +able to show host stack, so all the Lua calls are seen as single +``pcall()``. Oppositely, the ``jit.p`` module provided with LuaJIT +is not able to give any information about the host stack. + +Starting from version :doc:`2.10.0 `, Tarantool +has a built‑in module called ``misc.syprof`` that implements a +LuaJIT sampling profiler (which we will just call *the profiler* +in this section). The profiler is able to capture both guest and +host stacks simultaneously, along with virtual machine states, so +it can show the whole picture. + +The following profiling modes are available: + +* **Default**: only virtual machine state counters. +* **Leaf**: shows the last frame on the stack. +* **Callchain**: performs a complete stack dump. + +The profiler comes with the default parser, which produces output in +a `flamegraph.pl`-suitable format. + +.. contents:: + :local: + :depth: 2 + +.. _profiler_usage: + +Working with the profiler +------------------------- + +Usage of the profiler involves two steps: + +1. :ref:`Collecting ` a binary profile of + stacks, (further, *binary sampling profile* or *binary profile* + for short). +2. :ref:`Parsing ` the collected binary + profile to get a human-readable profiling report. + +.. _profiler_usage_get: + +Collecting binary profile +~~~~~~~~~~~~~~~~~~~~~~~~~ + +To collect a binary profile for a particular part of the Lua and C code, +you need to place this part between two ``misc.sysprof`` functions, +namely, ``misc.sysprof.start()`` and ``misc.sysprof.stop()``, and +then execute the code under Tarantool. + +Below is a chunk of Lua code named ``test.lua`` to illustrate this. + +.. _profiler_usage_example01: + +.. code-block:: lua + :linenos: + + local function payload() + local function fib(n) + if n <= 1 then + return n + end + return fib(n - 1) + fib(n - 2) + end + return fib(32) + end + + payload() + + local res, err = misc.sysprof.start({mode = 'C', interval = 1, path = 'sysprof.bin'}) + assert(res, err) + + payload() + + res, err = misc.sysprof.stop() + assert(res, err) + +The Lua code for starting the profiler -- as in line 1 in the +``test.lua`` example above -- is: + +.. code-block:: lua + + local str, err = misc.sysprof.start({mode = 'C', interval = 1, path = 'sysprof.bin'}) + +where ``mode`` is a profiling mode, ``interval`` is a sampling interval, +and ``sysprof.bin`` is the name of the binary file where +profiling events are written. + +If the operation fails, for example if it is not possible to open +a file for writing or if the profiler is already running, +``misc.sysprof.start()`` returns ``nil`` as the first result, +an error-message string as the second result, +and a system-dependent error code number as the third result. +If the operation succeeds, ``misc.sysprof.start()`` returns ``true``. + +The Lua code for stopping the profiler -- as in line 15 in the +``test.lua`` example above -- is: + +.. code-block:: lua + + local res, err = misc.sysprof.stop() + +If the operation fails, for example if there is an error when the +file descriptor is being closed or if there is a failure during +reporting, ``misc.sysprof.stop()`` returns ``nil`` as the first +result, an error-message string as the second result, +and a system-dependent error code number as the third result. +If the operation succeeds, ``misc.sysprof.stop()`` returns ``true``. + +.. _profiler_usage_generate: + +To generate the file with memory profile in binary format +(in the :ref:`test.lua code example above ` +the file name is ``sysprof.bin``), execute the code under Tarantool: + +.. code-block:: console + + $ tarantool test.lua + +Tarantool collects the allocation events in ``sysprof.bin``, puts +the file in its :ref:`working directory `, +and closes the session. + +.. _profiler_usage_parse: + +Parsing binary profile and generating profiling report +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _profiler_usage_parse_command: + +After getting the platform profile in binary format, the next step is +to parse it to get a human-readable profiling report. You can do this +via Tarantool by using the following command +(mind the hyphen ``-`` before the filename): + +.. code-block:: console + + $ tarantool -e 'require("sysprof")(arg)' - sysprof.bin > tmp + $ curl -O https://raw.githubusercontent.com/brendangregg/FlameGraph/refs/heads/master/flamegraph.pl + $ perl flamegraph.pl tmp > sysprof.svg + +where ``sysprof.bin`` is the binary profile +:ref:`generated earlier ` by ``tarantool test.lua``. +(Warning: there is a slight behavior change here, the ``tarantool -e ...`` +command was slightly different in Tarantool versions prior to Tarantool 2.8.1.) +Resulted SVG image contains a flamegraph with collected stacks and can be opened +by modern web-browser for analysis. + +As for investigating the Lua code with the help of profiling reports, +it is always code-dependent and there can't be hundred per cent definite +recommendations in this regard. Nevertheless, you can see some of the things +in the :ref:`Profiling report analysis example ` later. + +.. _profiler_api: + +The Lua API +~~~~~~~~~~~ + +* ``misc.sysprof.start(opts)`` +* ``misc.sysprof.stop()`` +* ``misc.sysprof.report()`` + +First two functions return boolean ``res`` and ``err``, which is +``nil`` on success and contains an error message on failure. + +``misc.sysprof.report`` returns a Lua table containing the +following counters: + +.. code-block:: console + + { + "samples" = int, + "INTERP" = int, + "LFUNC" = int, + "FFUNC" = int, + "CFUNC" = int, + "GC" = int, + "EXIT" = int, + "RECORD" = int, + "OPT" = int, + "ASM" = int, + "TRACE" = int + } + +Parameter opts for the ``misc.sysprof.start`` can contain the +following parameters: + +.. code-block:: console + + { + mode = 'D'/'L'/'C', -- 'D' = DEFAULT, 'L' = LEAF, 'C' = CALLGRAPH + interval = 10, -- sampling interval in msec. + path = '/path/to/file' -- location to store profile data. + } + +Mode MUST be provided always, interval and path are optional. +The default interval is 10 msec, default path is ``sysprof.bin``. + +The C API +~~~~~~~~~ + +The platform profiler provides a low-level C interface: + +.. code-block:: console + + int luaM_sysprof_set_writer(sp_writer writer). Sets writer function for sysprof. + + int luaM_sysprof_set_on_stop(sp_on_stop on_stop). Sets on stop callback for sysprof to clear resources. + + int luaM_sysprof_set_backtracer(sp_backtracer backtracer). Sets backtracking function. If backtracer arg is NULL, the default backtracer is set. + There is no need to call the configuration functions multiple times, if you are starting and stopping profiler several times in a single program. Also, it is not necessary to configure sysprof for the default mode, however, one MUST configure it for the other modes. + + int luaM_sysprof_start(lua_State *L, const struct luam_Sysprof_Options *opt) + + int luaM_sysprof_stop(lua_State *L) + + int luaM_sysprof_report(struct luam_Sysprof_Counters *counters). Writes profiling counters for each vmstate. + +All of the functions return 0 on success and an error code on failure. + +The configuration C types are: + +.. code-block:: console + + /* Profiler configurations. */ + /* + ** Writer function for profile events. Must be async-safe, see also + ** `man 7 signal-safety`. + ** Should return amount of written bytes on success or zero in case of error. + ** Setting *data to NULL means end of profiling. + ** For details see . + */ + + typedef size_t (*sp_writer)(const void **data, size_t len, void *ctx); + /* + ** Callback on profiler stopping. Required for correctly cleaning + ** at VM finalization when profiler is still running. + ** Returns zero on success. + */ + typedef int (*sp_on_stop)(void *ctx, uint8_t *buf); + /* + ** Backtracing function for the host stack. Should call `frame_writer` on + ** each frame in the stack in the order from the stack top to the stack + ** bottom. The `frame_writer` function is implemented inside the sysprof + ** and will be passed to the `backtracer` function. If `frame_writer` returns + ** NULL, backtracing should be stopped. If `frame_writer` returns not NULL, + ** the backtracing should be continued if there are frames left. + */ + typedef void (*sp_backtracer)(void *(*frame_writer)(int frame_no, void *addr)); + +Profiler options are the following: + +.. code-block:: console + + struct luam_Sysprof_Options { + /* Profiling mode. */ + uint8_t mode; + /* Sampling interval in msec. */ + uint64_t interval; + /* Custom buffer to write data. */ + uint8_t *buf; + /* The buffer's size. */ + size_t len; + /* Context for the profile writer and final callback. */ + void *ctx; + }; + +Profiling modes: + +.. code-block:: console + + /* + ** DEFAULT mode collects only data for luam_sysprof_counters, which is stored + ** in memory and can be collected with luaM_sysprof_report after profiler + ** stops. + */ + #define LUAM_SYSPROF_DEFAULT 0 + /* + ** LEAF mode = DEFAULT + streams samples with only top frames of host and + ** guests stacks in format described in + */ + #define LUAM_SYSPROF_LEAF 1 + /* + ** CALLGRAPH mode = DEFAULT + streams samples with full callchains of host + ** and guest stacks in format described in + */ + #define LUAM_SYSPROF_CALLGRAPH 2 + +Counters structure for the luaM_Sysprof_Report: + +.. code-block:: console + + struct luam_Sysprof_Counters { + uint64_t vmst_interp; + uint64_t vmst_lfunc; + uint64_t vmst_ffunc; + uint64_t vmst_cfunc; + uint64_t vmst_gc; + uint64_t vmst_exit; + uint64_t vmst_record; + uint64_t vmst_opt; + uint64_t vmst_asm; + uint64_t vmst_trace; + /* + ** XXX: Order of vmst counters is important: it should be the same as the + ** order of the vmstates. + */ + uint64_t samples; + }; + +Caveats: + +* Providing writers, backtracers, etc; in the Default mode is pointless, since + it just collect counters. +* There is NO default configuration for sysprof, so the ``luaM_Sysprof_Configure`` + must be called before the first run of the sysprof. Mind the async-safety. From 0173beffce0cab0e937387ca74d1692c7602d578 Mon Sep 17 00:00:00 2001 From: lenkis Date: Tue, 4 Feb 2025 20:55:08 +0300 Subject: [PATCH 4/8] Review by doc team --- doc/tooling/luajit_memprof.rst | 27 ++-- doc/tooling/luajit_sysprof.rst | 273 +++++++++++++++++++-------------- 2 files changed, 172 insertions(+), 128 deletions(-) diff --git a/doc/tooling/luajit_memprof.rst b/doc/tooling/luajit_memprof.rst index 6e548a1f2d..617f46a8e1 100644 --- a/doc/tooling/luajit_memprof.rst +++ b/doc/tooling/luajit_memprof.rst @@ -62,7 +62,7 @@ Below is a chunk of Lua code named ``test.lua`` to illustrate this. end local str, err = misc.memprof.stop() -The Lua code for starting the profiler -- as in line 3 in the test.lua example above -- is: +The Lua code for starting the profiler -- as in line 3 in the ``test.lua`` example above -- is: .. code-block:: lua @@ -77,14 +77,14 @@ an error-message string as the second result, and a system-dependent error code number as the third result. If the operation succeeds, ``misc.memprof.start()`` returns ``true``. -The Lua code for stopping the profiler -- as in line 18 in the test.lua example above -- is: +The Lua code for stopping the profiler -- as in line 18 in the ``test.lua`` example above -- is: .. code-block:: lua local str, err = misc.memprof.stop() If the operation fails, -for example if there is an error when the file descriptor is being closed +for example if there is an error when the file descriptor is being closed or if there is a failure during reporting, ``misc.memprof.stop()`` returns ``nil`` as the first result, an error-message string as the second result, @@ -203,21 +203,20 @@ An event record has the following format: .. code-block:: text @:: events + bytes - bytes - +where: -* —a name of the file containing Lua code. -* —the line number where the event is detected. -* —a number of events for this code line. -* + bytes—amount of memory allocated during all the events on this line. -* - bytes—amount of memory freed during all the events on this line. +* ```` -— a name of the file containing Lua code. +* ```` -— the line number where the event is detected. +* ```` —- a number of events for this code line. +* ``+ bytes`` —- amount of memory allocated during all the events on this line. +* ``- bytes`` —- amount of memory freed during all the events on this line. The ``Overrides`` label shows what allocation has been overridden. See the :ref:`test.lua chunk above ` with the explanation in the comments for some examples. - .. _profiler_usage_internal_jitoff: The ``INTERNAL`` label indicates that this event is caused by internal LuaJIT @@ -423,9 +422,9 @@ you will get the following profiling report: Reasonable questions regarding the report can be: -* Why are there no allocations related to the ``concat()`` function? -* Why is the number of allocations not a round number? -* Why are there about 20K allocations instead of 10K? +* Why are there no allocations related to the ``concat()`` function? +* Why is the number of allocations not a round number? +* Why are there about 20K allocations instead of 10K? First of all, LuaJIT doesn't create a new string if the string with the same payload exists (see details on `lua-users.org/wiki `_). @@ -494,8 +493,6 @@ allocations are JIT-related (see also the related @format_concat.lua:0 holds 640 bytes: 4 allocs, 0 frees INTERNAL holds 360 bytes: 2 allocs, 1 frees - - This happens because a trace has been compiled after 56 iterations (the default value of the ``hotloop`` compiler parameter). Then, the JIT-compiler removed the unused variable ``c`` from the trace, and, therefore, diff --git a/doc/tooling/luajit_sysprof.rst b/doc/tooling/luajit_sysprof.rst index 1037766295..1f7fce6d85 100644 --- a/doc/tooling/luajit_sysprof.rst +++ b/doc/tooling/luajit_sysprof.rst @@ -4,26 +4,26 @@ LuaJIT platform profiler ======================== The default profiling options for LuaJIT are not fine enough to -get an understanding of performance. For example, performance only -able to show host stack, so all the Lua calls are seen as single +get an understanding of performance. For example, ``perf`` is only +able to show the host stack, so all the Lua calls are displayed as a single ``pcall()``. Oppositely, the ``jit.p`` module provided with LuaJIT is not able to give any information about the host stack. Starting from version :doc:`2.10.0 `, Tarantool -has a built‑in module called ``misc.syprof`` that implements a +has a built‑in module called ``misc.sysprof`` that implements a LuaJIT sampling profiler (which we will just call *the profiler* in this section). The profiler is able to capture both guest and host stacks simultaneously, along with virtual machine states, so it can show the whole picture. -The following profiling modes are available: +Three profiling modes are available: -* **Default**: only virtual machine state counters. -* **Leaf**: shows the last frame on the stack. -* **Callchain**: performs a complete stack dump. +* **Default**: shows only virtual machine state counters. +* **Leaf**: shows the last frame on the stack. +* **Callchain**: performs a complete stack dump. -The profiler comes with the default parser, which produces output in -a `flamegraph.pl`-suitable format. +The profiler comes with a default parser, which produces output in +a ``flamegraph.pl``-suitable format. .. contents:: :local: @@ -34,23 +34,23 @@ a `flamegraph.pl`-suitable format. Working with the profiler ------------------------- -Usage of the profiler involves two steps: +The profiler usage involves two steps: 1. :ref:`Collecting ` a binary profile of - stacks, (further, *binary sampling profile* or *binary profile* + stacks (further referred as *binary sampling profile* or *binary profile* for short). 2. :ref:`Parsing ` the collected binary profile to get a human-readable profiling report. .. _profiler_usage_get: -Collecting binary profile -~~~~~~~~~~~~~~~~~~~~~~~~~ +Collecting a binary profile +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To collect a binary profile for a particular part of the Lua and C code, -you need to place this part between two ``misc.sysprof`` functions, -namely, ``misc.sysprof.start()`` and ``misc.sysprof.stop()``, and -then execute the code under Tarantool. +you need to place this part between two ``misc.sysprof`` functions -- +namely, ``misc.sysprof.start()`` and ``misc.sysprof.stop()`` -- and +then execute the code in Tarantool. Below is a chunk of Lua code named ``test.lua`` to illustrate this. @@ -86,15 +86,17 @@ The Lua code for starting the profiler -- as in line 1 in the local str, err = misc.sysprof.start({mode = 'C', interval = 1, path = 'sysprof.bin'}) -where ``mode`` is a profiling mode, ``interval`` is a sampling interval, -and ``sysprof.bin`` is the name of the binary file where -profiling events are written. +where: +* ``mode`` is a profiling mode, +* ``interval`` is a sampling interval, +* ``sysprof.bin`` is the name of the binary file where profiling events are written. If the operation fails, for example if it is not possible to open a file for writing or if the profiler is already running, ``misc.sysprof.start()`` returns ``nil`` as the first result, an error-message string as the second result, and a system-dependent error code number as the third result. + If the operation succeeds, ``misc.sysprof.start()`` returns ``true``. The Lua code for stopping the profiler -- as in line 15 in the @@ -104,37 +106,38 @@ The Lua code for stopping the profiler -- as in line 15 in the local res, err = misc.sysprof.stop() -If the operation fails, for example if there is an error when the +If the operation fails, for example if there is an error when the file descriptor is being closed or if there is a failure during reporting, ``misc.sysprof.stop()`` returns ``nil`` as the first result, an error-message string as the second result, and a system-dependent error code number as the third result. + If the operation succeeds, ``misc.sysprof.stop()`` returns ``true``. .. _profiler_usage_generate: -To generate the file with memory profile in binary format +To generate a file with the memory profile in the binary format (in the :ref:`test.lua code example above ` -the file name is ``sysprof.bin``), execute the code under Tarantool: +the file name is ``sysprof.bin``), execute the code in Tarantool: .. code-block:: console $ tarantool test.lua -Tarantool collects the allocation events in ``sysprof.bin``, puts +Tarantool collects allocation events in ``sysprof.bin``, puts the file in its :ref:`working directory `, and closes the session. .. _profiler_usage_parse: -Parsing binary profile and generating profiling report -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Parsing a binary profile and generating a profiling report +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. _profiler_usage_parse_command: -After getting the platform profile in binary format, the next step is +After getting the platform profile in the binary format, the next step is to parse it to get a human-readable profiling report. You can do this -via Tarantool by using the following command +via Tarantool with the following command (mind the hyphen ``-`` before the filename): .. code-block:: console @@ -145,26 +148,31 @@ via Tarantool by using the following command where ``sysprof.bin`` is the binary profile :ref:`generated earlier ` by ``tarantool test.lua``. -(Warning: there is a slight behavior change here, the ``tarantool -e ...`` -command was slightly different in Tarantool versions prior to Tarantool 2.8.1.) -Resulted SVG image contains a flamegraph with collected stacks and can be opened -by modern web-browser for analysis. + +.. note:: + + There is a slight behavior change here: the ``tarantool -e ...`` command + was slightly different in Tarantool versions prior to Tarantool 2.8.1. + The resulting SVG image contains a flamegraph with collected stacks + and can be opened by a modern web-browser for analysis. As for investigating the Lua code with the help of profiling reports, -it is always code-dependent and there can't be hundred per cent definite -recommendations in this regard. Nevertheless, you can see some of the things -in the :ref:`Profiling report analysis example ` later. +it is always code-dependent and there are no definite recommendations +in this regard. Nevertheless, you can see some of the things +in the :ref:`Profiling report analysis example ` below. + +.. _profiler_lua_api: -.. _profiler_api: +Profiler Lua API +---------------- -The Lua API -~~~~~~~~~~~ +The platform profiler provides a Lua interface: -* ``misc.sysprof.start(opts)`` -* ``misc.sysprof.stop()`` -* ``misc.sysprof.report()`` +* ``misc.sysprof.start(opts)`` +* ``misc.sysprof.stop()`` +* ``misc.sysprof.report()`` -First two functions return boolean ``res`` and ``err``, which is +The first two functions return boolean ``res`` and ``err``, which is ``nil`` on success and contains an error message on failure. ``misc.sysprof.report`` returns a Lua table containing the @@ -186,111 +194,145 @@ following counters: "TRACE" = int } -Parameter opts for the ``misc.sysprof.start`` can contain the +The ``opts`` argument of ``misc.sysprof.start`` can contain the following parameters: -.. code-block:: console +* ``mode`` (required) -- one of the supported profiling modes: - { - mode = 'D'/'L'/'C', -- 'D' = DEFAULT, 'L' = LEAF, 'C' = CALLGRAPH - interval = 10, -- sampling interval in msec. - path = '/path/to/file' -- location to store profile data. - } + * ``'D'`` = DEFAULT + * ``'L'`` = LEAF + * ``'C'`` = CALLGRAPH + +* ``interval`` (optional) -- sampling interval in msec (default is 10 msec). +* ``path`` (optional) -- path to a file to store profile data + (default is ``sysprof.bin``). -Mode MUST be provided always, interval and path are optional. -The default interval is 10 msec, default path is ``sysprof.bin``. +.. _profiler_c_api: -The C API -~~~~~~~~~ +Profiler C API +-------------- The platform profiler provides a low-level C interface: -.. code-block:: console +* ``int luaM_sysprof_set_writer(sp_writer writer)`` -- sets a writer function + for sysprof. + +* ``int luaM_sysprof_set_on_stop(sp_on_stop on_stop)`` -- sets + an on-stop callback for ``sysprof`` to clear resources. - int luaM_sysprof_set_writer(sp_writer writer). Sets writer function for sysprof. +* ``int luaM_sysprof_set_backtracer(sp_backtracer backtracer)`` -- sets + a backtracing function. If the ``backtracer`` argument is NULL, + the default backtracer is set. - int luaM_sysprof_set_on_stop(sp_on_stop on_stop). Sets on stop callback for sysprof to clear resources. +.. note:: - int luaM_sysprof_set_backtracer(sp_backtracer backtracer). Sets backtracking function. If backtracer arg is NULL, the default backtracer is set. - There is no need to call the configuration functions multiple times, if you are starting and stopping profiler several times in a single program. Also, it is not necessary to configure sysprof for the default mode, however, one MUST configure it for the other modes. + There is no need to call the configuration functions multiple times + if you are starting and stopping the profiler several times + in a single program. - int luaM_sysprof_start(lua_State *L, const struct luam_Sysprof_Options *opt) + Also, it is not necessary to configure ``sysprof`` for the ``Default`` mode. + However, you MUST configure it for other modes. - int luaM_sysprof_stop(lua_State *L) +* ``int luaM_sysprof_start(lua_State *L, const struct luam_Sysprof_Options *opt)`` -- + see :ref:`Profiler options `. - int luaM_sysprof_report(struct luam_Sysprof_Counters *counters). Writes profiling counters for each vmstate. +* ``int luaM_sysprof_stop(lua_State *L)`` + +* ``int luaM_sysprof_report(struct luam_Sysprof_Counters *counters)`` -- writes + :ref:`profiling counters ` for each vmstate. All of the functions return 0 on success and an error code on failure. -The configuration C types are: +.. _profiler_c_config_types: -.. code-block:: console +Configuration C types +~~~~~~~~~~~~~~~~~~~~~ + +Profiler configuration settings include: + +* ``typedef size_t (*sp_writer)(const void **data, size_t len, void *ctx)`` -- + a writer function for profile events. + + Must be async-safe, see also ``man 7 signal-safety``. + + Should return the amount of written bytes on success, or zero in case of error. + + Setting ``*data`` to NULL means end of profiling. + For details see ``lj_wbuf.h``. + +* ``typedef int (*sp_on_stop)(void *ctx, uint8_t *buf)`` -- a callback + on profiler stopping. Required for a correct cleanup at VM finalization + when the profiler is still running. + + Returns zero on success. + +* ``typedef void (*sp_backtracer)(void *(*frame_writer)(int frame_no, void *addr))`` -- + a backtracing function for the host stack. + Should call ``frame_writer`` on each frame in the stack, in the order + from the stack top to the stack bottom. - /* Profiler configurations. */ - /* - ** Writer function for profile events. Must be async-safe, see also - ** `man 7 signal-safety`. - ** Should return amount of written bytes on success or zero in case of error. - ** Setting *data to NULL means end of profiling. - ** For details see . - */ - - typedef size_t (*sp_writer)(const void **data, size_t len, void *ctx); - /* - ** Callback on profiler stopping. Required for correctly cleaning - ** at VM finalization when profiler is still running. - ** Returns zero on success. - */ - typedef int (*sp_on_stop)(void *ctx, uint8_t *buf); - /* - ** Backtracing function for the host stack. Should call `frame_writer` on - ** each frame in the stack in the order from the stack top to the stack - ** bottom. The `frame_writer` function is implemented inside the sysprof - ** and will be passed to the `backtracer` function. If `frame_writer` returns - ** NULL, backtracing should be stopped. If `frame_writer` returns not NULL, - ** the backtracing should be continued if there are frames left. - */ - typedef void (*sp_backtracer)(void *(*frame_writer)(int frame_no, void *addr)); - -Profiler options are the following: + The ``frame_writer`` function is implemented inside ``sysprof`` + and will be passed to the ``backtracer`` function. + + If ``frame_writer`` returns NULL, backtracing should be stopped. + If ``frame_writer`` returns not NULL, the backtracing should be continued + if there are frames left. + +.. _profiler_c_options: + +Profiler options +~~~~~~~~~~~~~~~~ + +The options structure for ``luaM_sysprof_start`` is as follows: .. code-block:: console struct luam_Sysprof_Options { /* Profiling mode. */ uint8_t mode; + /* Sampling interval in msec. */ uint64_t interval; + /* Custom buffer to write data. */ uint8_t *buf; + /* The buffer's size. */ size_t len; + /* Context for the profile writer and final callback. */ void *ctx; }; -Profiling modes: +.. _profiler_c_modes: + +Profiling modes +~~~~~~~~~~~~~~~ + +The platform profiler supports three profiling modes: + +* ``DEFAULT`` mode collects only data for ``luam_sysprof_counters``, + which is stored in memory and can be collected with ``luaM_sysprof_report`` + after the profiler stops. + +* ``LEAF`` mode = ``DEFAULT`` + streams samples with only top frames of the host + and guests stacks in the format described in ``lj_sysprof.h``. + +* ``CALLGRAPH`` mode = DEFAULT + streams samples with full callchains of the host + and guest stacks in the format described in ``lj_sysprof.h``. .. code-block:: console - /* - ** DEFAULT mode collects only data for luam_sysprof_counters, which is stored - ** in memory and can be collected with luaM_sysprof_report after profiler - ** stops. - */ #define LUAM_SYSPROF_DEFAULT 0 - /* - ** LEAF mode = DEFAULT + streams samples with only top frames of host and - ** guests stacks in format described in - */ #define LUAM_SYSPROF_LEAF 1 - /* - ** CALLGRAPH mode = DEFAULT + streams samples with full callchains of host - ** and guest stacks in format described in - */ #define LUAM_SYSPROF_CALLGRAPH 2 -Counters structure for the luaM_Sysprof_Report: +.. _profiler_c_counters: + +Profiling counters +~~~~~~~~~~~~~~~~~~ + +The counters structure for ``luaM_sysprof_report`` is as follows: .. code-block:: console @@ -305,16 +347,21 @@ Counters structure for the luaM_Sysprof_Report: uint64_t vmst_opt; uint64_t vmst_asm; uint64_t vmst_trace; - /* - ** XXX: Order of vmst counters is important: it should be the same as the - ** order of the vmstates. - */ + uint64_t samples; }; -Caveats: +.. note:: + + The order of ``vmst_*`` counters is important: it should be the same as + the order of the vmstates. + +.. _profiler_caveats: + +Caveats +~~~~~~~ -* Providing writers, backtracers, etc; in the Default mode is pointless, since - it just collect counters. -* There is NO default configuration for sysprof, so the ``luaM_Sysprof_Configure`` - must be called before the first run of the sysprof. Mind the async-safety. +* Providing writers, backtracers and other settings in the ``Default`` mode + is pointless, since it only collects counters. +* There is NO default configuration for ``sysprof``, so ``luaM_Sysprof_Configure`` + must be called before the first run of ``sysprof``. Mind the async safety. From 0e623f280500bcc8ec3e4ad164a0d54a2c70bef1 Mon Sep 17 00:00:00 2001 From: lenkis Date: Wed, 5 Feb 2025 11:56:50 +0300 Subject: [PATCH 5/8] Review by doc team: some more text brushup --- doc/tooling/luajit_memprof.rst | 58 +++++++++++++++++++--------------- doc/tooling/luajit_sysprof.rst | 13 +++++--- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/doc/tooling/luajit_memprof.rst b/doc/tooling/luajit_memprof.rst index 617f46a8e1..fb89ab06df 100644 --- a/doc/tooling/luajit_memprof.rst +++ b/doc/tooling/luajit_memprof.rst @@ -3,11 +3,13 @@ LuaJIT memory profiler ====================== -Starting from version :doc:`2.7.1 `, Tarantool +Since version :doc:`2.7.1 `, Tarantool has a built‑in module called ``misc.memprof`` that implements a LuaJIT memory -profiler (which we will just call *the profiler* in this section). The profiler provides -a memory allocation report that helps analyze Lua code and find the places -that put the most pressure on the Lua garbage collector (GC). +profiler (further in this section we call it *the profiler* for short). +The profiler provides a memory allocation report that helps analyze Lua code +and find the places that put the most pressure on the Lua garbage collector (GC). + +Inside this section: .. contents:: :local: @@ -18,7 +20,7 @@ that put the most pressure on the Lua garbage collector (GC). Working with the profiler ------------------------- -Usage of the profiler involves two steps: +The profiler usage involves two steps: 1. :ref:`Collecting ` a binary profile of allocations, reallocations, and deallocations in memory related to Lua @@ -28,13 +30,13 @@ Usage of the profiler involves two steps: .. _profiler_usage_get: -Collecting binary profile -~~~~~~~~~~~~~~~~~~~~~~~~~ +Collecting a binary profile +~~~~~~~~~~~~~~~~~~~~~~~~~~~ To collect a binary profile for a particular part of the Lua code, you need to place this part between two ``misc.memprof`` functions, namely, ``misc.memprof.start()`` and ``misc.memprof.stop()``, and then execute -the code under Tarantool. +the code in Tarantool. Below is a chunk of Lua code named ``test.lua`` to illustrate this. @@ -75,6 +77,7 @@ for example if it is not possible to open a file for writing or if the profiler ``misc.memprof.start()`` returns ``nil`` as the first result, an error-message string as the second result, and a system-dependent error code number as the third result. + If the operation succeeds, ``misc.memprof.start()`` returns ``true``. The Lua code for stopping the profiler -- as in line 18 in the ``test.lua`` example above -- is: @@ -89,13 +92,14 @@ or if there is a failure during reporting, ``misc.memprof.stop()`` returns ``nil`` as the first result, an error-message string as the second result, and a system-dependent error code number as the third result. + If the operation succeeds, ``misc.memprof.stop()`` returns ``true``. .. _profiler_usage_generate: To generate the file with memory profile in binary format (in the :ref:`test.lua code example above ` -the file name is ``memprof_new.bin``), execute the code under Tarantool: +the file name is ``memprof_new.bin``), execute the code in Tarantool: .. code-block:: console @@ -127,8 +131,8 @@ a profiling report: .. _profiler_usage_parse: -Parsing binary profile and generating profiling report -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Parsing a binary profile and generating a profiling report +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. _profiler_usage_parse_command: @@ -143,8 +147,11 @@ via Tarantool by using the following command where ``memprof_new.bin`` is the binary profile :ref:`generated earlier ` by ``tarantool test.lua``. -(Warning: there is a slight behavior change here, the ``tarantool -e ...`` -command was slightly different in Tarantool versions prior to Tarantool 2.8.1.) + +.. note:: + + There is a slight behavior change here: the ``tarantool -e ...`` command + was slightly different in Tarantool versions prior to Tarantool 2.8.1. Tarantool generates a profiling report and displays it on the console before closing the session: @@ -187,7 +194,6 @@ the session: On macOS, a report will be different for the same chunk of code because Tarantool and LuaJIT are built with the GC64 mode enabled for macOS. - Let's examine the report structure. A report has four sections: * ALLOCATIONS @@ -255,10 +261,10 @@ structures. As for investigating the Lua code with the help of profiling reports, it is always code-dependent and there can't be hundred per cent definite recommendations in this regard. Nevertheless, you can see some of the things -in the :ref:`Profiling report analysis example ` later. +in the :ref:`Profiling a report analysis example ` later. Also, below is the :ref:`FAQ ` section with the questions that -most probably can arise while using profiler. +most probably can arise while using the profiler. .. _profiler_faq: @@ -342,18 +348,18 @@ console for a running instance. log.warn("end of profile") end) -where +where: -* ``FILENAME``—the name of the binary file where profiling events are written -* ``TIME``—duration of profiling, in seconds. +* ``FILENAME``—the name of the binary file where profiling events are written +* ``TIME``—duration of profiling, in seconds. Also, you can directly call ``misc.memprof.start()`` and ``misc.memprof.stop()`` from a console. .. _profiler_analysis: -Profiling report analysis example ---------------------------------- +Profiling a report analysis example +----------------------------------- In the example below, the following Lua code named ``format_concat.lua`` is investigated with the help of the memory profiler reports. @@ -395,7 +401,7 @@ investigated with the help of the memory profiler reports. os.exit() -When you run this code :ref:`under Tarantool ` and +When you run this code :ref:`in Tarantool ` and then :ref:`parse ` the binary memory profile in /tmp/memprof_format_concat.bin, you will get the following profiling report: @@ -468,8 +474,8 @@ To profile only the ``concat()`` function, comment out line 23 (which is **Q**: But what will change if JIT compilation is enabled? **A**: In the :ref:`code `, comment out line 2 (which is -``jit.off()``) and run -the profiler . Now there are only 56 allocations in the report, and all other +``jit.off()``) and run the profiler. +Now there are only 56 allocations in the report, and all the other allocations are JIT-related (see also the related `dev issue `_): @@ -535,7 +541,7 @@ identifier is not yet compiled via LuaJIT. So, a trace can't be recorded and the compiler doesn't perform the corresponding optimizations. If we change the ``format()`` function in lines 9-12 of the -:ref:`Profiling report analysis example ` +:ref:`Profiling a report analysis example ` in the following way .. code-block:: lua @@ -591,4 +597,4 @@ for example $ tarantool -e 'require("memprof")(arg)' - --leak-only memprof_new.bin -When `--leak-only`` is used, only the HEAP SUMMARY section is displayed. +When ``--leak-only`` is used, only the HEAP SUMMARY section is displayed. diff --git a/doc/tooling/luajit_sysprof.rst b/doc/tooling/luajit_sysprof.rst index 1f7fce6d85..9f2f18e70c 100644 --- a/doc/tooling/luajit_sysprof.rst +++ b/doc/tooling/luajit_sysprof.rst @@ -9,10 +9,10 @@ able to show the host stack, so all the Lua calls are displayed as a single ``pcall()``. Oppositely, the ``jit.p`` module provided with LuaJIT is not able to give any information about the host stack. -Starting from version :doc:`2.10.0 `, Tarantool +Since version :doc:`2.10.0 `, Tarantool has a built‑in module called ``misc.sysprof`` that implements a -LuaJIT sampling profiler (which we will just call *the profiler* -in this section). The profiler is able to capture both guest and +LuaJIT sampling profiler (further in this section we call it *the profiler* +for short). The profiler can capture both guest and host stacks simultaneously, along with virtual machine states, so it can show the whole picture. @@ -25,6 +25,8 @@ Three profiling modes are available: The profiler comes with a default parser, which produces output in a ``flamegraph.pl``-suitable format. +Inside this section: + .. contents:: :local: :depth: 2 @@ -87,8 +89,9 @@ The Lua code for starting the profiler -- as in line 1 in the local str, err = misc.sysprof.start({mode = 'C', interval = 1, path = 'sysprof.bin'}) where: -* ``mode`` is a profiling mode, -* ``interval`` is a sampling interval, + +* ``mode`` is a profiling mode; +* ``interval`` is a sampling interval; * ``sysprof.bin`` is the name of the binary file where profiling events are written. If the operation fails, for example if it is not possible to open From 80bd48f6538b5ce5fafee6551448fa12217502d8 Mon Sep 17 00:00:00 2001 From: lenkis Date: Wed, 5 Feb 2025 12:45:52 +0300 Subject: [PATCH 6/8] Fix line numbers in Lua code example --- doc/tooling/luajit_memprof.rst | 1 - doc/tooling/luajit_sysprof.rst | 1 - 2 files changed, 2 deletions(-) diff --git a/doc/tooling/luajit_memprof.rst b/doc/tooling/luajit_memprof.rst index fb89ab06df..d277a6fa66 100644 --- a/doc/tooling/luajit_memprof.rst +++ b/doc/tooling/luajit_memprof.rst @@ -43,7 +43,6 @@ Below is a chunk of Lua code named ``test.lua`` to illustrate this. .. _profiler_usage_example01: .. code-block:: lua - :linenos: -- Prevent allocations on traces. jit.off() diff --git a/doc/tooling/luajit_sysprof.rst b/doc/tooling/luajit_sysprof.rst index 9f2f18e70c..8fa1231ea9 100644 --- a/doc/tooling/luajit_sysprof.rst +++ b/doc/tooling/luajit_sysprof.rst @@ -59,7 +59,6 @@ Below is a chunk of Lua code named ``test.lua`` to illustrate this. .. _profiler_usage_example01: .. code-block:: lua - :linenos: local function payload() local function fib(n) From 64d2c9a073748b5d5f2ca18e94f272da91344b7b Mon Sep 17 00:00:00 2001 From: lenkis Date: Wed, 5 Feb 2025 12:57:22 +0300 Subject: [PATCH 7/8] Fix line numbers in Lua code example - 2 --- doc/tooling/luajit_memprof.rst | 1 + doc/tooling/luajit_sysprof.rst | 29 +++++++++++++++-------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/doc/tooling/luajit_memprof.rst b/doc/tooling/luajit_memprof.rst index d277a6fa66..fb89ab06df 100644 --- a/doc/tooling/luajit_memprof.rst +++ b/doc/tooling/luajit_memprof.rst @@ -43,6 +43,7 @@ Below is a chunk of Lua code named ``test.lua`` to illustrate this. .. _profiler_usage_example01: .. code-block:: lua + :linenos: -- Prevent allocations on traces. jit.off() diff --git a/doc/tooling/luajit_sysprof.rst b/doc/tooling/luajit_sysprof.rst index 8fa1231ea9..495581f9e3 100644 --- a/doc/tooling/luajit_sysprof.rst +++ b/doc/tooling/luajit_sysprof.rst @@ -59,26 +59,27 @@ Below is a chunk of Lua code named ``test.lua`` to illustrate this. .. _profiler_usage_example01: .. code-block:: lua - - local function payload() - local function fib(n) - if n <= 1 then - return n + :linenos: + + local function payload() + local function fib(n) + if n <= 1 then + return n + end + return fib(n - 1) + fib(n - 2) end - return fib(n - 1) + fib(n - 2) + return fib(32) end - return fib(32) - end - payload() + payload() - local res, err = misc.sysprof.start({mode = 'C', interval = 1, path = 'sysprof.bin'}) - assert(res, err) + local res, err = misc.sysprof.start({mode = 'C', interval = 1, path = 'sysprof.bin'}) + assert(res, err) - payload() + payload() - res, err = misc.sysprof.stop() - assert(res, err) + res, err = misc.sysprof.stop() + assert(res, err) The Lua code for starting the profiler -- as in line 1 in the ``test.lua`` example above -- is: From ca88d619453528e9b669c9ffdf6046ae7e61efc5 Mon Sep 17 00:00:00 2001 From: lenkis Date: Wed, 5 Feb 2025 13:12:49 +0300 Subject: [PATCH 8/8] Fix line numbers in Lua code example - 3 --- doc/tooling/luajit_memprof.rst | 116 ++++++++++++++++----------------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/doc/tooling/luajit_memprof.rst b/doc/tooling/luajit_memprof.rst index fb89ab06df..cb8e76601b 100644 --- a/doc/tooling/luajit_memprof.rst +++ b/doc/tooling/luajit_memprof.rst @@ -45,24 +45,24 @@ Below is a chunk of Lua code named ``test.lua`` to illustrate this. .. code-block:: lua :linenos: - -- Prevent allocations on traces. - jit.off() - local str, err = misc.memprof.start("memprof_new.bin") - -- Lua doesn't create a new frame to call string.rep, and all allocations - -- are attributed not to the append() function but to the parent scope. - local function append(str, rep) - return string.rep(str, rep) - end + -- Prevent allocations on traces. + jit.off() + local str, err = misc.memprof.start("memprof_new.bin") + -- Lua doesn't create a new frame to call string.rep, and all allocations + -- are attributed not to the append() function but to the parent scope. + local function append(str, rep) + return string.rep(str, rep) + end - local t = {} - for i = 1, 1e4 do - -- table.insert is the built-in function and all corresponding - -- allocations are reported in the scope of the main chunk. - table.insert(t, - append('q', i) - ) - end - local str, err = misc.memprof.stop() + local t = {} + for i = 1, 1e4 do + -- table.insert is the built-in function and all corresponding + -- allocations are reported in the scope of the main chunk. + table.insert(t, + append('q', i) + ) + end + local str, err = misc.memprof.stop() The Lua code for starting the profiler -- as in line 3 in the ``test.lua`` example above -- is: @@ -322,31 +322,31 @@ console for a running instance. .. code-block:: lua :linenos: - local fiber = require "fiber" - local log = require "log" + local fiber = require "fiber" + local log = require "log" - fiber.create(function() - fiber.name("memprof") + fiber.create(function() + fiber.name("memprof") - collectgarbage() -- Collect all objects already dead - log.warn("start of profile") + collectgarbage() -- Collect all objects already dead + log.warn("start of profile") - local st, err = misc.memprof.start(FILENAME) - if not st then - log.error("failed to start profiler: %s", err) - end + local st, err = misc.memprof.start(FILENAME) + if not st then + log.error("failed to start profiler: %s", err) + end - fiber.sleep(TIME) + fiber.sleep(TIME) - collectgarbage() - st, err = misc.memprof.stop() + collectgarbage() + st, err = misc.memprof.stop() - if not st then - log.error("profiler on stop error: %s", err) - end + if not st then + log.error("profiler on stop error: %s", err) + end - log.warn("end of profile") - end) + log.warn("end of profile") + end) where: @@ -369,37 +369,37 @@ investigated with the help of the memory profiler reports. .. code-block:: lua :linenos: - -- Prevent allocations on new traces. - jit.off() + -- Prevent allocations on new traces. + jit.off() - local function concat(a) - local nstr = a.."a" - return nstr - end + local function concat(a) + local nstr = a.."a" + return nstr + end - local function format(a) - local nstr = string.format("%sa", a) - return nstr - end + local function format(a) + local nstr = string.format("%sa", a) + return nstr + end - collectgarbage() + collectgarbage() - local binfile = "/tmp/memprof_"..(arg[0]):match("([^/]*).lua")..".bin" + local binfile = "/tmp/memprof_"..(arg[0]):match("([^/]*).lua")..".bin" - local st, err = misc.memprof.start(binfile) - assert(st, err) + local st, err = misc.memprof.start(binfile) + assert(st, err) - -- Payload. - for i = 1, 10000 do - local f = format(i) - local c = concat(i) - end - collectgarbage() + -- Payload. + for i = 1, 10000 do + local f = format(i) + local c = concat(i) + end + collectgarbage() - local st, err = misc.memprof.stop() - assert(st, err) + local st, err = misc.memprof.stop() + assert(st, err) - os.exit() + os.exit() When you run this code :ref:`in Tarantool ` and then :ref:`parse ` the binary memory profile