diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index edb7c8701b..ba1358926a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -47,7 +47,7 @@ jobs: - uses: actions/checkout@v6 - name: Set up dependencies run: | - apk add --update build-base libunwind-dev lz4-dev musl-dev python3-dev python3-dbg gdb lldb git bash perl perl-datetime build-base perl-app-cpanminus + apk add --update build-base lz4-dev musl-dev python3-dev python3-dbg gdb lldb git bash perl perl-datetime build-base perl-app-cpanminus cpanm Date::Parse cpanm Capture::Tiny # Build elfutils @@ -59,6 +59,14 @@ jobs: cd elfutils-$VERS CFLAGS='-Wno-error -DFNM_EXTMATCH=0 -g -O3' CXXFLAGS='-Wno-error -DFNM_EXTMATCH=0 -g -O3' ./configure --enable-libdebuginfod --disable-debuginfod --disable-nls --with-zstd make install + # Build libunwind from source + cd / + LIBUNWIND_VERS=1.8.3 + curl -LO https://github.com/libunwind/libunwind/releases/download/v$LIBUNWIND_VERS/libunwind-$LIBUNWIND_VERS.tar.gz + tar xf libunwind-$LIBUNWIND_VERS.tar.gz + cd libunwind-$LIBUNWIND_VERS + ./configure --disable-minidebuginfo --prefix=/usr + make install - name: Create virtual environment run: | python3 -m venv /venv diff --git a/.github/workflows/test_uv_python.yml b/.github/workflows/test_uv_python.yml index fa20eb71e4..c63a79c966 100644 --- a/.github/workflows/test_uv_python.yml +++ b/.github/workflows/test_uv_python.yml @@ -34,11 +34,22 @@ jobs: sudo apt-get install -qy \ pkg-config \ libdebuginfod-dev \ - libunwind-dev \ liblz4-dev \ gdb \ npm + - name: Build libunwind from source + run: | + cd /tmp + LIBUNWIND_VERS=1.8.3 + curl -LO https://github.com/libunwind/libunwind/releases/download/v$LIBUNWIND_VERS/libunwind-$LIBUNWIND_VERS.tar.gz + tar xf libunwind-$LIBUNWIND_VERS.tar.gz + cd libunwind-$LIBUNWIND_VERS + ./configure --disable-minidebuginfo --prefix=/usr/local + make + sudo make install + sudo ldconfig + - name: Install Python dependencies run: | uv pip install --upgrade pip cython pkgconfig diff --git a/.gitignore b/.gitignore index 32fc3e5815..30f8046ae5 100644 --- a/.gitignore +++ b/.gitignore @@ -179,3 +179,6 @@ src/vendor/libbacktrace/install # pytest-textual-snapshot snapshot_report.html + +# Object files +*.o diff --git a/package-lock.json b/package-lock.json index 1e63c010c8..800dd7f886 100644 --- a/package-lock.json +++ b/package-lock.json @@ -65,6 +65,7 @@ "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.17.9.tgz", "integrity": "sha512-5ug+SfZCpDAkVp9SFIZAzlW18rlzsOcJGaetCjkySnrXXDUw9AR8cDUm1iByTmdWM6yxX6/zycaV76w3YTF2gw==", "dev": true, + "peer": true, "dependencies": { "@ampproject/remapping": "^2.1.0", "@babel/code-frame": "^7.16.7", @@ -2743,6 +2744,7 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.2.tgz", "integrity": "sha512-xjIYgE8HBrkpd/sJqOGNspf8uHG+NOHGOw6a/Urj8taM2EXfdNAH2oFcPeIFfsv3+kz/mJrS5VuMqbNLjCa2vw==", "dev": true, + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2807,6 +2809,7 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", "dev": true, + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -3208,6 +3211,7 @@ "url": "https://github.com/sponsors/ai" } ], + "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001646", "electron-to-chromium": "^1.5.4", @@ -3470,6 +3474,7 @@ "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -3660,6 +3665,7 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", + "peer": true, "engines": { "node": ">=12" } @@ -3838,6 +3844,7 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", + "peer": true, "engines": { "node": ">=12" } @@ -4049,6 +4056,7 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", + "peer": true, "engines": { "node": ">=12" } @@ -4075,6 +4083,7 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", + "peer": true, "engines": { "node": ">=12" } @@ -8540,6 +8549,7 @@ "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.94.0.tgz", "integrity": "sha512-KcsGn50VT+06JH/iunZJedYGUJS5FGjow8wb9c0v5n1Om8O1g4L6LjtfxwlXIATopoQu+vOXXa7gYisWxCoPyg==", "dev": true, + "peer": true, "dependencies": { "@types/estree": "^1.0.5", "@webassemblyjs/ast": "^1.12.1", @@ -8586,6 +8596,7 @@ "resolved": "https://registry.npmjs.org/webpack-cli/-/webpack-cli-4.9.2.tgz", "integrity": "sha512-m3/AACnBBzK/kMTcxWHcZFPrw/eQuY4Df1TxvIWfWM2x7mRqBQCqKEd96oCUa9jkapLBaFfRce33eGDb4Pr7YQ==", "dev": true, + "peer": true, "dependencies": { "@discoveryjs/json-ext": "^0.5.0", "@webpack-cli/configtest": "^1.1.1", @@ -8883,6 +8894,7 @@ "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.17.9.tgz", "integrity": "sha512-5ug+SfZCpDAkVp9SFIZAzlW18rlzsOcJGaetCjkySnrXXDUw9AR8cDUm1iByTmdWM6yxX6/zycaV76w3YTF2gw==", "dev": true, + "peer": true, "requires": { "@ampproject/remapping": "^2.1.0", "@babel/code-frame": "^7.16.7", @@ -10878,7 +10890,8 @@ "version": "8.8.2", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.2.tgz", "integrity": "sha512-xjIYgE8HBrkpd/sJqOGNspf8uHG+NOHGOw6a/Urj8taM2EXfdNAH2oFcPeIFfsv3+kz/mJrS5VuMqbNLjCa2vw==", - "dev": true + "dev": true, + "peer": true }, "acorn-globals": { "version": "6.0.0", @@ -10925,6 +10938,7 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", "dev": true, + "peer": true, "requires": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -11220,6 +11234,7 @@ "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.23.3.tgz", "integrity": "sha512-btwCFJVjI4YWDNfau8RhZ+B1Q/VLoUITrm3RlP6y1tYGWIOa+InuYiRGXUBXo8nA1qKmHMyLB/iVQg5TT4eFoA==", "dev": true, + "peer": true, "requires": { "caniuse-lite": "^1.0.30001646", "electron-to-chromium": "^1.5.4", @@ -11419,6 +11434,7 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "dev": true, + "peer": true, "requires": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -11547,7 +11563,8 @@ "d3-selection": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", - "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==" + "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", + "peer": true }, "d3-transition": { "version": "3.0.1", @@ -11591,7 +11608,8 @@ "d3-selection": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", - "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==" + "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", + "peer": true }, "d3-transition": { "version": "3.0.1", @@ -11718,7 +11736,8 @@ "d3-selection": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", - "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==" + "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", + "peer": true }, "d3-transition": { "version": "3.0.1", @@ -11869,7 +11888,8 @@ "d3-selection": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", - "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==" + "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", + "peer": true }, "d3-transition": { "version": "3.0.1", @@ -15161,6 +15181,7 @@ "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.94.0.tgz", "integrity": "sha512-KcsGn50VT+06JH/iunZJedYGUJS5FGjow8wb9c0v5n1Om8O1g4L6LjtfxwlXIATopoQu+vOXXa7gYisWxCoPyg==", "dev": true, + "peer": true, "requires": { "@types/estree": "^1.0.5", "@webassemblyjs/ast": "^1.12.1", @@ -15192,6 +15213,7 @@ "resolved": "https://registry.npmjs.org/webpack-cli/-/webpack-cli-4.9.2.tgz", "integrity": "sha512-m3/AACnBBzK/kMTcxWHcZFPrw/eQuY4Df1TxvIWfWM2x7mRqBQCqKEd96oCUa9jkapLBaFfRce33eGDb4Pr7YQ==", "dev": true, + "peer": true, "requires": { "@discoveryjs/json-ext": "^0.5.0", "@webpack-cli/configtest": "^1.1.1", diff --git a/pyproject.toml b/pyproject.toml index 5078f5d77d..a7f4bfbe91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,8 +99,14 @@ before-all = [ "CFLAGS='-Wno-error -g -O3' CXXFLAGS='-Wno-error -g -O3' LDFLAGS=-lrt ./configure --enable-libdebuginfod --disable-debuginfod --disable-nls --with-zstd", "make install", - # Install Memray's other build and test dependencies - "yum install -y libunwind-devel", + # Build libunwind from source + "cd /", + "LIBUNWIND_VERS=1.8.3", + "/usr/bin/curl -LO https://github.com/libunwind/libunwind/releases/download/v$LIBUNWIND_VERS/libunwind-$LIBUNWIND_VERS.tar.gz", + "tar xf libunwind-$LIBUNWIND_VERS.tar.gz", + "cd libunwind-$LIBUNWIND_VERS", + "./configure --disable-minidebuginfo", + "make install", ] [tool.cibuildwheel.macos] @@ -138,6 +144,9 @@ show_missing = true # Override the default linux before-all for musl linux [[tool.cibuildwheel.overrides]] select = "*-musllinux*" +# Exclude libgcc_s from bundling - it contains _Unwind_* symbols and having +# multiple copies (bundled + system) breaks C++ exception handling. +repair-wheel-command = "auditwheel repair --exclude libgcc_s.so.1 -w {dest_dir} {wheel}" before-all = [ # Remove gettext-dev, which conficts with the musl-libintl, which is a build # dependency of elfutils. @@ -161,5 +170,14 @@ before-all = [ "apk add libintl", # Install Memray's other build and test dependencies - "apk add --update libunwind-dev lz4-dev" + "apk add --update lz4-dev", + + # Build libunwind from source + "cd /", + "LIBUNWIND_VERS=1.8.3", + "curl -LO https://github.com/libunwind/libunwind/releases/download/v$LIBUNWIND_VERS/libunwind-$LIBUNWIND_VERS.tar.gz", + "tar xf libunwind-$LIBUNWIND_VERS.tar.gz", + "cd libunwind-$LIBUNWIND_VERS", + "./configure --disable-minidebuginfo", + "make install", ] diff --git a/setup.py b/setup.py index c57166c910..fb66b9438f 100644 --- a/setup.py +++ b/setup.py @@ -2,6 +2,7 @@ import distutils.log import os import pathlib +import platform as platform_module import subprocess import sys import tempfile @@ -39,8 +40,29 @@ class BuildMemray(build_ext_orig): def run(self): self.build_js_files() self.build_libbacktrace() + self.build_ghost_stack_asm() super().run() + def build_ghost_stack_asm(self): + """Compile ghost_stack assembly files to object files.""" + if not GHOST_STACK_ASM_FILES: + return + + for asm_file in GHOST_STACK_ASM_FILES: + asm_path = pathlib.Path(asm_file) + obj_path = GHOST_STACK_LOCATION / "src" / (asm_path.stem + ".o") + + if obj_path.exists(): + continue + + self.announce( + f"Compiling assembly file: {asm_file}", + level=distutils.log.INFO, + ) + self.announce_and_run( + ["cc", "-c", str(asm_path), "-o", str(obj_path)], + ) + def announce_and_run(self, command, **kwargs): self.announce( "executing command: `{}`".format(" ".join(command)), @@ -217,6 +239,47 @@ def build_js_files(self): BINARY_FORMATS = {"darwin": "macho", "linux": "elf"} BINARY_FORMAT = BINARY_FORMATS.get(sys.platform, "elf") +# Ghost Stack configuration for fast native unwinding +GHOST_STACK_LOCATION = ( + pathlib.Path(__file__).parent / "src" / "memray" / "_memray" / "ghost_stack" +).resolve() + +ARCH = platform_module.machine() +if ARCH == "x86_64": + GHOST_STACK_ARCH = "x86_64" +elif ARCH in ("aarch64", "arm64"): + GHOST_STACK_ARCH = "aarch64" +else: + GHOST_STACK_ARCH = None + +# Ghost stack sources (cpp only) and assembly files (compiled separately) +GHOST_STACK_SOURCES = [] +GHOST_STACK_ASM_FILES = [] +GHOST_STACK_OBJECTS = [] +if IS_LINUX and GHOST_STACK_ARCH: + GHOST_STACK_SOURCES = [ + "src/memray/_memray/ghost_stack/src/ghost_stack.cpp", + ] + GHOST_STACK_ASM_FILES = [ + f"src/memray/_memray/ghost_stack/src/{GHOST_STACK_ARCH}_linux_trampoline.s", + ] + GHOST_STACK_OBJECTS = [ + str(GHOST_STACK_LOCATION / "src" / f"{GHOST_STACK_ARCH}_linux_trampoline.o"), + ] +elif IS_MAC and GHOST_STACK_ARCH == "aarch64": + GHOST_STACK_SOURCES = [ + "src/memray/_memray/ghost_stack/src/ghost_stack.cpp", + ] + GHOST_STACK_ASM_FILES = [ + f"src/memray/_memray/ghost_stack/src/{GHOST_STACK_ARCH}_darwin_trampoline.s", + ] + GHOST_STACK_OBJECTS = [ + str(GHOST_STACK_LOCATION / "src" / f"{GHOST_STACK_ARCH}_darwin_trampoline.o"), + ] + +if GHOST_STACK_SOURCES: + DEFINE_MACROS.append(("MEMRAY_HAS_GHOST_STACK", "1")) + library_flags = {"libraries": ["lz4"]} if IS_LINUX: library_flags["libraries"].append("unwind") @@ -251,17 +314,22 @@ def build_js_files(self): "src/memray/_memray/snapshot.cpp", "src/memray/_memray/socket_reader_thread.cpp", "src/memray/_memray/native_resolver.cpp", + *GHOST_STACK_SOURCES, ], language="c++", extra_compile_args=["-std=c++17", "-Wall", *EXTRA_COMPILE_ARGS], - extra_objects=[str(LIBBACKTRACE_LIBDIR / "libbacktrace.a")], + extra_objects=[str(LIBBACKTRACE_LIBDIR / "libbacktrace.a"), *GHOST_STACK_OBJECTS], extra_link_args=["-std=c++17", *EXTRA_LINK_ARGS], define_macros=DEFINE_MACROS, undef_macros=UNDEF_MACROS, **library_flags, ) -MEMRAY_EXTENSION.include_dirs[:0] = ["src", str(LIBBACKTRACE_INCLUDEDIRS)] +MEMRAY_EXTENSION.include_dirs[:0] = [ + "src", + str(LIBBACKTRACE_INCLUDEDIRS), + str(GHOST_STACK_LOCATION / "include"), +] MEMRAY_EXTENSION.libraries.append("dl") @@ -269,14 +337,25 @@ def build_js_files(self): name="memray._test_utils", sources=[ "src/memray/_memray_test_utils.pyx", + "src/memray/_memray/ghost_stack_test_utils.cpp", + *GHOST_STACK_SOURCES, ], language="c++", extra_compile_args=["-std=c++17", "-Wall", *EXTRA_COMPILE_ARGS], extra_link_args=["-std=c++17", *EXTRA_LINK_ARGS], + extra_objects=[*GHOST_STACK_OBJECTS], define_macros=DEFINE_MACROS, undef_macros=UNDEF_MACROS, ) +MEMRAY_TEST_EXTENSION.include_dirs = [ + "src", + str(GHOST_STACK_LOCATION / "include"), +] + +if IS_LINUX: + MEMRAY_TEST_EXTENSION.libraries = ["unwind"] + MEMRAY_INJECT_EXTENSION = Extension( name="memray._inject", sources=[ diff --git a/src/memray/_memray.pyi b/src/memray/_memray.pyi index e25e04c7e2..e3e671a488 100644 --- a/src/memray/_memray.pyi +++ b/src/memray/_memray.pyi @@ -242,6 +242,7 @@ class Tracker: file_name: Union[Path, str], *, native_traces: bool = ..., + fast_unwind: bool = ..., memory_interval_ms: int = ..., follow_fork: bool = ..., trace_python_allocators: bool = ..., @@ -255,6 +256,7 @@ class Tracker: *, destination: Destination, native_traces: bool = ..., + fast_unwind: bool = ..., memory_interval_ms: int = ..., follow_fork: bool = ..., trace_python_allocators: bool = ..., @@ -285,6 +287,7 @@ class SymbolicSupport(enum.IntEnum): TOTAL = 3 def get_symbolic_support() -> SymbolicSupport: ... +def has_fast_unwind_support() -> bool: ... RTLD_NOW: int RTLD_DEFAULT: int diff --git a/src/memray/_memray.pyx b/src/memray/_memray.pyx index eee0661463..3e240be637 100644 --- a/src/memray/_memray.pyx +++ b/src/memray/_memray.pyx @@ -725,6 +725,9 @@ cdef class Tracker: native_traces (bool): Whether or not to capture native stack frames, in addition to Python stack frames (see :ref:`Native Tracking`). Defaults to False. + fast_unwind (bool): Whether to use optimized native stack unwinding with + shadow stack caching. This can significantly improve performance when + native_traces is enabled. Requires native_traces=True. Defaults to False. trace_python_allocators (bool): Whether or not to trace Python allocators as independent allocations. (see :ref:`Python allocators`). Defaults to False. @@ -748,6 +751,7 @@ cdef class Tracker: of supported file formats and their limitations. """ cdef bool _native_traces + cdef bool _fast_unwind cdef bool _track_object_lifetimes cdef unsigned int _memory_interval_ms cdef bool _follow_fork @@ -778,7 +782,8 @@ cdef class Tracker: raise TypeError("destination must be a SocketDestination or FileDestination") def __cinit__(self, object file_name=None, *, object destination=None, - bool native_traces=False, unsigned int memory_interval_ms = 10, + bool native_traces=False, bool fast_unwind=False, + unsigned int memory_interval_ms = 10, bool follow_fork=False, bool trace_python_allocators=False, bool track_object_lifetimes=False, FileFormat file_format=FileFormat.ALL_ALLOCATIONS): @@ -792,8 +797,13 @@ cdef class Tracker: f"Current version: {'.'.join(map(str, sys.version_info[:3]))}" ) + # Validate fast_unwind requires native_traces + if fast_unwind and not native_traces: + raise ValueError("fast_unwind requires native_traces to be enabled") + cdef cppstring command_line = " ".join(sys.argv) self._native_traces = native_traces + self._fast_unwind = fast_unwind self._track_object_lifetimes = track_object_lifetimes self._memory_interval_ms = memory_interval_ms self._follow_fork = follow_fork @@ -857,6 +867,7 @@ cdef class Tracker: NativeTracker.createTracker( move(writer), self._native_traces, + self._fast_unwind, self._memory_interval_ms, self._follow_fork, self._trace_python_allocators, @@ -1677,6 +1688,26 @@ def get_symbolic_support(): return SymbolicSupport.NONE +cdef extern from *: + """ + #ifdef MEMRAY_HAS_GHOST_STACK + constexpr bool _has_fast_unwind_support = true; + #else + constexpr bool _has_fast_unwind_support = false; + #endif + """ + bool _has_fast_unwind_support + + +def has_fast_unwind_support() -> bool: + """Check if fast_unwind support is available. + + Returns True if memray was compiled with ghost_stack support, + which enables the fast_unwind option for native stack unwinding. + """ + return _has_fast_unwind_support + + cdef extern from "": int _RTLD_NOW "RTLD_NOW" void* _RTLD_DEFAULT "RTLD_DEFAULT" diff --git a/src/memray/_memray/ghost_stack/include/ghost_stack.h b/src/memray/_memray/ghost_stack/include/ghost_stack.h new file mode 100644 index 0000000000..650c5546e9 --- /dev/null +++ b/src/memray/_memray/ghost_stack/include/ghost_stack.h @@ -0,0 +1,98 @@ +/** + * GhostStack - Fast Stack Unwinding via Shadow Stacks + * ==================================================== + * + * Drop-in replacement for unw_backtrace() that provides O(1) stack capture + * after initial setup by patching return addresses with trampolines. + * + * Basic Usage: + * + * // Initialize once at startup (optional - will auto-init if needed) + * ghost_stack_init(NULL); + * + * // Capture stack trace (same signature as unw_backtrace) + * void* frames[128]; + * size_t n = ghost_stack_backtrace(frames, 128); + * + * // When done with this call stack (e.g., returning to event loop) + * ghost_stack_reset(); + * + * Thread Safety: + * Each thread has its own shadow stack (thread-local storage). + * + * Exception Safety: + * C++ exceptions propagate correctly through patched frames. + */ + +#ifndef GHOST_STACK_H +#define GHOST_STACK_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Unwinder function signature - same as unw_backtrace(). + * @param buffer Array to fill with instruction pointers + * @param size Maximum frames to capture + * @return Number of frames captured + */ +typedef size_t (*ghost_stack_unwinder_t)(void** buffer, size_t size); + +/** + * Initialize GhostStack. + * + * @param unwinder Custom unwinder function, or NULL to use default (unw_backtrace). + * The unwinder is called for initial stack capture; subsequent + * captures use the shadow stack for O(1) performance. + * + * Thread-safe. Can be called multiple times (subsequent calls are no-ops). + * Will be called automatically on first ghost_stack_backtrace() if not + * explicitly initialized. + */ +void +ghost_stack_init(ghost_stack_unwinder_t unwinder); + +/** + * Capture stack trace - drop-in replacement for unw_backtrace(). + * + * First call from a given call stack: uses the unwinder + installs trampolines. + * Subsequent calls from same/deeper stack: O(1) retrieval from shadow stack. + * + * @param buffer Array to fill with return addresses (instruction pointers) + * @param size Maximum number of frames to capture + * @return Number of frames captured (0 on error) + */ +size_t +ghost_stack_backtrace(void** buffer, size_t size); + +/** + * Reset the shadow stack, restoring all original return addresses. + * + * Call this when you want to invalidate the cached stack, e.g.: + * - Returning to an event loop + * - Before making a call that significantly changes the stack + * - On thread exit + * + * Safe to call even if no capture has occurred. + */ +void +ghost_stack_reset(void); + +/** + * Clean up thread-local resources. + * + * Optional - resources are cleaned up automatically on thread exit. + * Call explicitly if you want immediate cleanup. + */ +void +ghost_stack_thread_cleanup(void); + +#ifdef __cplusplus +} +#endif + +#endif /* GHOST_STACK_H */ diff --git a/src/memray/_memray/ghost_stack/src/aarch64_darwin_trampoline.s b/src/memray/_memray/ghost_stack/src/aarch64_darwin_trampoline.s new file mode 100644 index 0000000000..f35fe00edb --- /dev/null +++ b/src/memray/_memray/ghost_stack/src/aarch64_darwin_trampoline.s @@ -0,0 +1,189 @@ +/** + * GhostStack Return Trampoline - AArch64 macOS (Darwin) + * ====================================================== + * + * This assembly implements the return address trampoline for shadow stack unwinding + * on Apple Silicon (AArch64) macOS systems. + * + * When GhostStack patches a return address to point here, this trampoline: + * 1. Saves the function's return value registers (x0-x7) + * 2. Calls _ghost_trampoline_handler() to get the real return address + * 3. Restores the return value registers and returns to the real address + * + * macOS/Darwin Differences from Linux: + * - Symbols are prefixed with underscore (_ghost_ret_trampoline vs ghost_ret_trampoline) + * - Uses Mach-O object format instead of ELF + * - Section names differ (__TEXT,__text vs .text) + * - Exception table goes in __TEXT,__gcc_except_tab + * - Uses .private_extern instead of .hidden + * - No .type directive (Mach-O doesn't use it) + * + * Apple ARM64 ABI Notes: + * - Return values: x0-x7 (same as AAPCS64) + * - Link register: x30 (LR) or 'lr' alias + * - Frame pointer: x29 (FP) or 'fp' alias + * - Stack: 16-byte aligned + * + * Pointer Authentication (PAC): + * On Apple Silicon with PAC enabled, return addresses are cryptographically + * signed. The C++ code uses xpaclri to strip the PAC before use. + */ + +.section __TEXT,__text,regular,pure_instructions +.build_version macos, 14, 0 sdk_version 15, 1 +.p2align 2 + +/* ========================================================================== + * _ghost_ret_trampoline_start - Exception handling anchor + * ========================================================================== + * This symbol marks the function start for DWARF exception handling. + * macOS uses the same CFI mechanism as Linux but with Darwin-specific + * section names and symbol conventions. + * + * CFI Directives: + * - .cfi_personality 155: Encoding for ___gxx_personality_v0 + * - .cfi_lsda 16: Reference to our exception handling data + * - .cfi_undefined lr: Signal that return address is non-standard + */ +.globl _ghost_ret_trampoline_start +.private_extern _ghost_ret_trampoline_start + +_ghost_ret_trampoline_start: +.cfi_startproc +.cfi_personality 155, ___gxx_personality_v0 +.cfi_lsda 16,LLSDA0 +.cfi_undefined lr +.cfi_endproc + +/* Exception try region - any exception here redirects to L3 */ +LEHB0: + nop /* Placeholder marking exception region start */ +LEHE0: + +/* ========================================================================== + * _ghost_ret_trampoline - The actual trampoline entry point + * ========================================================================== + * When a function returns through a patched return address, execution + * lands here. We retrieve the real return address from GhostStack's + * shadow stack and continue execution transparently. + */ +.globl _ghost_ret_trampoline +.private_extern _ghost_ret_trampoline +_ghost_ret_trampoline: + + /* ------------------------------------------------------------------------- + * Step 1: Save return value registers + * ------------------------------------------------------------------------- + * The Apple ARM64 ABI uses x0-x7 for return values (same as AAPCS64). + * We save all 8 to handle any return type (scalars, structs, HFA/HVA). + * + * Stack layout after save (64 bytes total): + * sp+48: x6, x7 + * sp+32: x4, x5 + * sp+16: x2, x3 + * sp+0: x0, x1 (most common return value location) + */ + sub sp, sp, #64 /* Allocate 64 bytes (8 * 8 = 64) */ + stp x0, x1, [sp, #0] /* Save x0, x1 (primary return values) */ + stp x2, x3, [sp, #16] /* Save x2, x3 */ + stp x4, x5, [sp, #32] /* Save x4, x5 */ + stp x6, x7, [sp, #48] /* Save x6, x7 */ + + /* ------------------------------------------------------------------------- + * Step 2: Call into C++ to get the real return address + * ------------------------------------------------------------------------- + * First argument (x0): Original stack pointer location + * = current sp + 64 (our saved registers) + * + * This allows the C++ code to verify stack consistency if needed. + * Returns the real return address in x0. + */ + mov x0, sp + add x0, x0, #64 /* x0 = original stack pointer */ + bl _ghost_trampoline_handler /* Call C++ handler */ + + /* ------------------------------------------------------------------------- + * Step 3: Prepare return address and restore registers + * ------------------------------------------------------------------------- + * Move real return address to lr (x30) BEFORE restoring x0, + * since x0 will be overwritten by ldp. + */ + mov lr, x0 /* lr = real return address */ + + /* Restore all return value registers */ + ldp x0, x1, [sp, #0] /* Restore x0, x1 */ + ldp x2, x3, [sp, #16] /* Restore x2, x3 */ + ldp x4, x5, [sp, #32] /* Restore x4, x5 */ + ldp x6, x7, [sp, #48] /* Restore x6, x7 */ + add sp, sp, #64 /* Deallocate stack frame */ + + /* ------------------------------------------------------------------------- + * Step 4: Return to real caller + * ------------------------------------------------------------------------- + * 'ret' uses lr (x30) as the return address by default. + * The branch predictor will see this as a normal return. + */ + ret + +/* ========================================================================== + * Exception landing pad + * ========================================================================== + * When a C++ exception propagates through our patched frame, the unwinder + * uses our LSDA to find this landing pad. We: + * 1. Call _ghost_exception_handler to get real return addr + * 2. Restore lr with the real address + * 3. Tail-call ___cxa_rethrow to continue exception propagation + * + * The exception object pointer is passed in x0 by the runtime. + */ +L3: + bl _ghost_exception_handler /* Get real return addr in x0 */ + mov lr, x0 /* Restore lr with real return address */ + b ___cxa_rethrow /* Tail-call rethrow (never returns) */ + + +/* ========================================================================== + * LSDA (Language Specific Data Area) + * ========================================================================== + * Exception handling metadata for ___gxx_personality_v0. + * This tells the C++ runtime: + * - Where our "try" region is (LEHB0 to LEHE0) + * - Where to jump on exception (L3) + * - What types to catch (0 = catch all, i.e., catch(...)) + * + * Format follows DWARF exception handling specification. + */ +.section __TEXT,__gcc_except_tab +.align 2 +LLSDA0: + .byte 0xff /* @LPStart encoding: omit */ + .byte 0x9b /* @TType encoding: indirect pcrel sdata4 */ + .uleb128 LLSDATT0-LLSDATTD0 /* @TType base offset */ +LLSDATTD0: + .byte 0x1 /* Call site encoding: uleb128 */ + .uleb128 LLSDACSE0-LLSDACSB0 /* Call site table length */ +LLSDACSB0: + /* Call site entry: our try region */ + .uleb128 LEHB0-_ghost_ret_trampoline_start /* Region start (relative) */ + .uleb128 LEHE0-LEHB0 /* Region length */ + .uleb128 L3-_ghost_ret_trampoline_start /* Landing pad (relative) */ + .uleb128 0x1 /* Action: index 1 in action table */ +LLSDACSE0: + .byte 0x1 /* Action table entry */ + .byte 0 /* No next action */ + .align 2 + .long 0 /* Type table: 0 = catch(...) */ +LLSDATT0: + +/* ========================================================================== + * Symbol declarations + * ========================================================================== + * Declare reference to the C++ personality function. + * On macOS, this is ___gxx_personality_v0 (three underscores total). + */ +.section __DATA,__data +.align 3 +.private_extern ___gxx_personality_v0 + +/* Enable dead code stripping optimization */ +.subsections_via_symbols diff --git a/src/memray/_memray/ghost_stack/src/aarch64_linux_trampoline.s b/src/memray/_memray/ghost_stack/src/aarch64_linux_trampoline.s new file mode 100644 index 0000000000..62e85e4a84 --- /dev/null +++ b/src/memray/_memray/ghost_stack/src/aarch64_linux_trampoline.s @@ -0,0 +1,202 @@ +/** + * GhostStack Return Trampoline - AArch64 Linux + * ============================================= + * + * This assembly implements the return address trampoline for shadow stack unwinding + * on 64-bit ARM (AArch64) Linux systems. + * + * When GhostStack patches a return address to point here, this trampoline: + * 1. Saves the function's return value registers (x0-x7) + * 2. Calls ghost_trampoline_handler() to get the real return address + * 3. Restores the return value registers and branches to the real address + * + * Exception Handling: + * The trampoline includes DWARF unwind info and an LSDA so C++ exceptions + * propagate correctly through patched frames. When an exception passes through, + * control goes to .L3 which calls ghost_exception_handler() + * to restore the real return address before rethrowing. + * + * AArch64 AAPCS64 ABI Notes: + * - Return values: x0-x7 (up to 8 registers for HFA/HVA or multi-value returns) + * - Link register: x30 (LR) holds return address + * - Frame pointer: x29 (FP) + * - Stack: 16-byte aligned, grows downward + * + * Pointer Authentication: + * If PAC is enabled, return addresses may be signed. The C++ code handles + * stripping the PAC before use (via xpaclri instruction). + */ + + .arch armv8-a + .text + .align 2 + .p2align 3,,7 + + /* ========================================================================== + * ghost_ret_trampoline_start - Exception handling anchor + * ========================================================================== + * This symbol marks the start of the function for DWARF unwinding. + * CFI directives establish exception handling context: + * - .cfi_personality: Use __gxx_personality_v0 for C++ exceptions + * - .cfi_lsda: Point to our Language Specific Data Area + * - .cfi_undefined x30: Signal that LR (return address) is non-standard + */ + .global ghost_ret_trampoline_start + .type ghost_ret_trampoline_start, %function +ghost_ret_trampoline_start: +.LFB0: + .cfi_startproc + .cfi_personality 0x9b,DW.ref.__gxx_personality_v0 + .cfi_lsda 0x1b,.LLSDA0 + .cfi_undefined x30 + + /* Exception try region - exceptions here redirect to .L3 */ +.LEHB0: + nop /* Placeholder marking exception region start */ +.LEHE0: + + /* ========================================================================== + * ghost_ret_trampoline - The actual trampoline entry point + * ========================================================================== + * When a function's return address has been patched to point here, + * execution continues at this label upon function return (via RET). + * The original return address is stored in GhostStack's shadow stack. + */ +.globl ghost_ret_trampoline +.type ghost_ret_trampoline, @function +ghost_ret_trampoline: + + /* ------------------------------------------------------------------------- + * Step 1: Save return value registers + * ------------------------------------------------------------------------- + * AAPCS64 uses x0-x7 for return values (e.g., HFA types can use all 8). + * We must preserve these across our callback. + * + * Stack layout after save (64 bytes = 8 registers * 8 bytes): + * sp+56: x7 + * sp+48: x6 + * sp+40: x5 + * sp+32: x4 + * sp+24: x3 + * sp+16: x2 + * sp+8: x1 + * sp+0: x0 + * + * Note: stp stores pairs of registers efficiently. + */ + sub sp, sp, #(8 * 8) /* Allocate 64 bytes (8 registers) */ + stp x0, x1, [sp, 0] /* Save x0, x1 (primary return value pair) */ + stp x2, x3, [sp, 16] /* Save x2, x3 */ + stp x4, x5, [sp, 32] /* Save x4, x5 */ + stp x6, x7, [sp, 48] /* Save x6, x7 */ + + /* ------------------------------------------------------------------------- + * Step 2: Call into C++ to get the real return address + * ------------------------------------------------------------------------- + * Argument (x0): Pointer to original stack location + * = sp (current) + 64 (saved regs) = original sp + * + * ghost_trampoline_handler() returns the real return address in x0. + */ + mov x0, sp + add x0, x0, #64 /* x0 = original stack pointer */ + bl ghost_trampoline_handler /* Call C++ handler; result in x0 */ + + /* ------------------------------------------------------------------------- + * Step 3: Prepare return address and restore registers + * ------------------------------------------------------------------------- + * Move real return address to x30 (LR) first, then restore x0-x7. + * This order matters because x0 gets overwritten by ldp. + */ + mov x30, x0 /* x30 (LR) = real return address */ + + /* Restore all return value registers */ + ldp x0, x1, [sp, 0] /* Restore x0, x1 */ + ldp x2, x3, [sp, 16] /* Restore x2, x3 */ + ldp x4, x5, [sp, 32] /* Restore x4, x5 */ + ldp x6, x7, [sp, 48] /* Restore x6, x7 */ + add sp, sp, #(8 * 8) /* Deallocate stack frame */ + + /* ------------------------------------------------------------------------- + * Step 4: Return to real caller + * ------------------------------------------------------------------------- + * br x30 is an indirect branch to the address in x30. + * Unlike 'ret', 'br' doesn't interact with return prediction, + * which is appropriate since we're branching to an arbitrary address. + */ + br x30 /* Branch to real return address */ + nop /* Padding for alignment */ + + /* ========================================================================== + * Exception landing pad + * ========================================================================== + * When an exception propagates through our patched frame: + * 1. Personality routine finds our LSDA entry + * 2. Stack is unwound to our frame + * 3. Control transfers here with exception object in x0 + * + * We restore the real return address and rethrow so unwinding continues + * correctly through the original call stack. + */ +.L3: + /* x0 already contains exception object pointer from runtime */ + bl ghost_exception_handler /* Get real return addr */ + mov x30, x0 /* Restore LR with real return address */ + b __cxa_rethrow /* Rethrow exception (tail call) */ + + .cfi_endproc +.LFE0: + + /* ========================================================================== + * LSDA (Language Specific Data Area) + * ========================================================================== + * This data tells __gxx_personality_v0 how to handle exceptions. + * + * Structure: + * - Header: encoding information + * - Call site table: maps PC ranges to landing pads + * - Action table: what to do at each landing pad + * - Type table: exception types to catch (0 = catch all) + */ + .global __gxx_personality_v0 + .section .gcc_except_table,"a",@progbits + .align 2 +.LLSDA0: + .byte 0xff /* @LPStart encoding: omit (use function start) */ + .byte 0x9b /* @TType encoding: indirect pcrel sdata4 */ + .uleb128 .LLSDATT0-.LLSDATTD0 /* @TType base offset */ +.LLSDATTD0: + .byte 0x1 /* Call site encoding: uleb128 */ + .uleb128 .LLSDACSE0-.LLSDACSB0 /* Call site table length */ +.LLSDACSB0: + /* Call site entry for our try region */ + .uleb128 .LEHB0-.LFB0 /* Start of region (relative to function) */ + .uleb128 .LEHE0-.LEHB0 /* Length of region */ + .uleb128 .L3-.LFB0 /* Landing pad address (relative) */ + .uleb128 0x1 /* Action: index 1 in action table */ +.LLSDACSE0: + .byte 0x1 /* Action table: filter type 1 */ + .byte 0 /* No next action */ + .align 2 + .4byte 0 /* Type table: 0 = catch(...) */ + +.LLSDATT0: + .text + .size ghost_ret_trampoline_start, .-ghost_ret_trampoline_start + + /* ========================================================================== + * Symbol references + * ========================================================================== + * Weak reference to __gxx_personality_v0 in a COMDAT group. + * This allows multiple translation units to define it without conflicts. + */ + .weak DW.ref.__gxx_personality_v0 + .section .data.rel.local.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat + .align 3 + .type DW.ref.__gxx_personality_v0, %object + .size DW.ref.__gxx_personality_v0, 8 +DW.ref.__gxx_personality_v0: + .xword __gxx_personality_v0 + + /* Mark stack as non-executable (security hardening) */ + .section .note.GNU-stack,"",@progbits diff --git a/src/memray/_memray/ghost_stack/src/ghost_stack.cpp b/src/memray/_memray/ghost_stack/src/ghost_stack.cpp new file mode 100644 index 0000000000..431f123617 --- /dev/null +++ b/src/memray/_memray/ghost_stack/src/ghost_stack.cpp @@ -0,0 +1,825 @@ +/** + * GhostStack Implementation + * ========================= + * Shadow stack-based fast unwinding with O(1) cached captures. + */ + +#include "ghost_stack.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define UNW_LOCAL_ONLY +#include + +#ifdef __APPLE__ +# include +#endif + +// Assembly trampoline (defined in *_trampoline.s) +// The 'used' attribute prevents LTO from stripping the symbol and its eh_frame data +extern "C" void +ghost_ret_trampoline(); +extern "C" void +ghost_ret_trampoline_start(); + +// Force references to trampoline symbols to prevent LTO from stripping eh_frame +// These are never called, just referenced to keep the symbols alive +__attribute__((used)) static void* const _ghost_trampoline_refs[] = { + reinterpret_cast(&ghost_ret_trampoline), + reinterpret_cast(&ghost_ret_trampoline_start), +}; + +// ============================================================================ +// Platform Configuration +// ============================================================================ + +#if defined(__aarch64__) || defined(__arm64__) +# define GS_ARCH_AARCH64 1 +# define GS_SP_REGISTER UNW_AARCH64_X29 +# define GS_RA_REGISTER UNW_AARCH64_X30 +#elif defined(__x86_64__) +# define GS_ARCH_X86_64 1 +# define GS_SP_REGISTER UNW_X86_64_RBP +# define GS_RA_REGISTER UNW_X86_64_RIP +#else +# error "Unsupported architecture" +#endif + +#ifndef GHOST_STACK_MAX_FRAMES +# define GHOST_STACK_MAX_FRAMES 512 +#endif + +// ============================================================================ +// Logging (minimal, stderr only) +// ============================================================================ + +// GS_FORCE_DEBUG can be defined via compiler flag (-DGS_FORCE_DEBUG) for test builds +#if defined(DEBUG) || defined(GS_FORCE_DEBUG) +# define LOG_DEBUG(...) \ + do { \ + fprintf(stderr, "[GhostStack][DEBUG] " __VA_ARGS__); \ + fflush(stderr); \ + } while (0) +#else +# define LOG_DEBUG(...) ((void)0) +#endif + +#define LOG_ERROR(...) \ + do { \ + fprintf(stderr, "[GhostStack][ERROR] " __VA_ARGS__); \ + fflush(stderr); \ + } while (0) +#define LOG_INFO(...) \ + do { \ + fprintf(stderr, "[GhostStack][INFO] " __VA_ARGS__); \ + fflush(stderr); \ + } while (0) + +// ============================================================================ +// Utilities +// ============================================================================ + +#ifdef GS_ARCH_AARCH64 +static inline uintptr_t +ptrauth_strip(uintptr_t val) +{ + uint64_t ret; + asm volatile("mov x30, %1\n\t" + "xpaclri\n\t" + "mov %0, x30\n\t" + : "=r"(ret) + : "r"(val) + : "x30"); + return ret; +} +#else +static inline uintptr_t +ptrauth_strip(uintptr_t val) +{ + return val; +} +#endif + +// ============================================================================ +// Stack Entry +// ============================================================================ + +struct StackEntry +{ + uintptr_t ip; // Instruction pointer of this frame (what to return to caller) + uintptr_t return_address; // Original return address (what we replaced with trampoline) + uintptr_t* location; // Where it lives on the stack + uintptr_t stack_pointer; // SP at capture time (for validation) +}; + +// ============================================================================ +// GhostStack Core (thread-local) +// ============================================================================ + +class GhostStackImpl +{ + public: + GhostStackImpl() + { + entries_.reserve(64); + } + + ~GhostStackImpl() + { + reset(); + } + + // Set custom unwinder (NULL = use default libunwind) + void set_unwinder(ghost_stack_unwinder_t unwinder) + { + custom_unwinder_ = unwinder; + } + + // Main capture function - returns number of frames + size_t backtrace(void** buffer, size_t max_frames) + { + LOG_DEBUG("=== backtrace ENTER ===\n"); + LOG_DEBUG(" this=%p, buffer=%p, max_frames=%zu\n", (void*)this, (void*)buffer, max_frames); + LOG_DEBUG( + " is_capturing_=%d, trampolines_installed_=%d, entries_.size()=%zu, tail_=%zu\n", + (int)is_capturing_, + (int)trampolines_installed_, + entries_.size(), + tail_.load(std::memory_order_acquire)); + + if (is_capturing_) { + LOG_DEBUG(" Recursive call detected, returning 0\n"); + return 0; // Recursive call, bail out + } + is_capturing_ = true; + + size_t result = 0; + + // Fast path: trampolines installed, return cached frames + if (trampolines_installed_ && !entries_.empty()) { + LOG_DEBUG(" Taking FAST PATH (cached frames)\n"); + result = copy_cached_frames(buffer, max_frames); + is_capturing_ = false; + LOG_DEBUG("=== backtrace EXIT (fast path) result=%zu ===\n", result); + return result; + } + + // Slow path: capture with unwinder and install trampolines + LOG_DEBUG(" Taking SLOW PATH (capture and install)\n"); + + // Clear any stale entries from a previous reset before starting fresh capture + if (!entries_.empty() && !trampolines_installed_) { + LOG_DEBUG(" Clearing %zu stale entries from previous reset\n", entries_.size()); + entries_.clear(); + tail_.store(0, std::memory_order_release); + } + + result = capture_and_install(buffer, max_frames); + is_capturing_ = false; + LOG_DEBUG("=== backtrace EXIT (slow path) result=%zu ===\n", result); + return result; + } + + /** + * Reset the shadow stack, restoring all original return addresses. + * + * On ARM64, stale trampolines may still fire after reset() because the LR + * register may have already been loaded with the trampoline address before + * we restored the stack location. We keep entries_ around to handle these + * stale trampolines gracefully. + * + * We restore ALL entries (not just 0 to tail-1) but only if the location + * still contains the trampoline address. This handles the case where a + * location was reused by a new frame after its original trampoline fired. + */ + void reset() + { + LOG_DEBUG("=== reset ENTER ===\n"); + LOG_DEBUG( + " this=%p, trampolines_installed_=%d, entries_.size()=%zu, tail_=%zu\n", + (void*)this, + (int)trampolines_installed_, + entries_.size(), + tail_.load(std::memory_order_acquire)); + + if (trampolines_installed_) { + uintptr_t tramp_addr = reinterpret_cast(ghost_ret_trampoline); + LOG_DEBUG( + " Restoring locations that still have trampoline (0x%lx)\n", + (unsigned long)tramp_addr); + + // Restore ALL entries whose locations still contain the trampoline. + // This handles both pending entries AND already-fired entries whose + // locations haven't been reused by new frames. + for (size_t i = 0; i < entries_.size(); ++i) { + uintptr_t current_value = *entries_[i].location; + // Strip PAC bits before comparison - on ARM64 with PAC enabled, + // the value read from stack may be PAC-signed while tramp_addr is not + uintptr_t stripped_value = ptrauth_strip(current_value); + if (stripped_value == tramp_addr) { + LOG_DEBUG( + " [%zu] location=%p, restoring 0x%lx\n", + i, + (void*)entries_[i].location, + (unsigned long)entries_[i].return_address); + *entries_[i].location = entries_[i].return_address; + } else { + LOG_DEBUG( + " [%zu] location=%p, skipping (current=0x%lx, not trampoline)\n", + i, + (void*)entries_[i].location, + (unsigned long)current_value); + } + } + + // Mark trampolines as not installed, but DON'T clear entries_! + // On ARM64, stale trampolines may still fire because LR was loaded + // before we restored the stack. Keep entries_ so we can still + // return the correct address. + trampolines_installed_ = false; + + // Increment epoch to signal state change + epoch_.fetch_add(1, std::memory_order_release); + LOG_DEBUG( + " New epoch=%lu (entries preserved for stale trampolines)\n", + (unsigned long)epoch_.load(std::memory_order_acquire)); + } + LOG_DEBUG("=== reset EXIT ===\n"); + } + + public: + /** + * Direct entry access method for exception handling. + * Decrements tail and returns the return address without longjmp checking. + */ + uintptr_t pop_entry() + { + LOG_DEBUG("=== pop_entry ENTER ===\n"); + LOG_DEBUG( + " this=%p, entries_.size()=%zu, tail_=%zu\n", + (void*)this, + entries_.size(), + tail_.load(std::memory_order_acquire)); + + size_t tail = tail_.fetch_sub(1, std::memory_order_acq_rel) - 1; + LOG_DEBUG(" After fetch_sub: tail=%zu\n", tail); + + if (tail >= entries_.size()) { + LOG_ERROR("Stack corruption in pop_entry!\n"); + LOG_ERROR(" tail=%zu, entries_.size()=%zu\n", tail, entries_.size()); + std::abort(); + } + uintptr_t ret = entries_[tail].return_address; + LOG_DEBUG(" Returning address 0x%lx\n", (unsigned long)ret); + LOG_DEBUG("=== pop_entry EXIT ===\n"); + return ret; + } + + private: + /** + * Internal helper to clear all state. + * Increments epoch to invalidate any in-flight trampoline operations. + */ + void clear_entries() + { + LOG_DEBUG("=== clear_entries ENTER ===\n"); + LOG_DEBUG( + " this=%p, entries_.size()=%zu, tail_=%zu, epoch_=%lu\n", + (void*)this, + entries_.size(), + tail_.load(std::memory_order_acquire), + (unsigned long)epoch_.load(std::memory_order_acquire)); + + // Increment epoch FIRST to signal any in-flight operations + epoch_.fetch_add(1, std::memory_order_release); + LOG_DEBUG(" New epoch=%lu\n", (unsigned long)epoch_.load(std::memory_order_acquire)); + + entries_.clear(); + tail_.store(0, std::memory_order_release); + trampolines_installed_ = false; + LOG_DEBUG("=== clear_entries EXIT ===\n"); + } + + public: + /** + * Called by trampoline when a function returns. + * + * Handles three scenarios: + * 1. Normal operation: trampolines installed, decrement tail and return + * 2. Post-reset stale trampoline (ARM64): search entries by SP, don't modify state + * 3. Longjmp detection: SP mismatch, search backward for matching entry + * + * @param sp Stack pointer at return time (for longjmp detection / entry lookup) + * @return Original return address to jump to + */ + uintptr_t on_ret_trampoline(uintptr_t sp) + { + LOG_DEBUG("=== on_ret_trampoline ENTER ===\n"); + LOG_DEBUG(" this=%p, sp=0x%lx\n", (void*)this, (unsigned long)sp); + + // Log state + size_t tail_before = tail_.load(std::memory_order_acquire); + size_t entries_size = entries_.size(); + LOG_DEBUG( + " BEFORE: tail_=%zu, entries_.size()=%zu, trampolines_installed_=%d\n", + tail_before, + entries_size, + (int)trampolines_installed_); + + // ========================================================= + // POST-RESET STALE TRAMPOLINE HANDLING (ARM64) + // ========================================================= + // On ARM64, reset() may have been called but stale trampolines can still + // fire because LR was loaded before we restored the stack location. + // In this case, trampolines_installed_ is false but entries_ still has data. + // + // Stale trampolines fire in predictable order: the deepest pending frame + // (highest index that wasn't consumed) fires first, then the next one up. + // We simply return entries in order starting from tail_-1 and decrementing. + if (!trampolines_installed_ && !entries_.empty()) { + size_t current_tail = tail_.load(std::memory_order_acquire); + LOG_DEBUG( + " POST-RESET stale trampoline! tail_=%zu, entries_.size()=%zu\n", + current_tail, + entries_.size()); + + if (current_tail > 0 && current_tail <= entries_.size()) { + // Return the entry at tail-1 (the deepest pending entry) + size_t idx = current_tail - 1; + uintptr_t ret = entries_[idx].return_address; + + // Decrement tail_ for the next stale trampoline (if any) + tail_.store(idx, std::memory_order_release); + + LOG_DEBUG(" Returning entry[%zu].return_address=0x%lx\n", idx, (unsigned long)ret); + LOG_DEBUG("=== on_ret_trampoline EXIT (post-reset) ===\n"); + return ret; + } + + // tail_ is 0 or invalid - this shouldn't happen + LOG_ERROR("POST-RESET trampoline: tail_=%zu is invalid!\n", current_tail); + LOG_ERROR(" entries_.size()=%zu\n", entries_.size()); + std::abort(); + } + + // ========================================================= + // NORMAL OPERATION + // ========================================================= + // Capture current epoch - if it changes during execution, reset() was called + uint64_t current_epoch = epoch_.load(std::memory_order_acquire); + LOG_DEBUG(" current_epoch=%lu\n", (unsigned long)current_epoch); + + // Decrement tail first, like nwind does + size_t tail = tail_.fetch_sub(1, std::memory_order_acq_rel) - 1; + LOG_DEBUG(" AFTER fetch_sub: tail=%zu (was %zu)\n", tail, tail_before); + + if (entries_.empty()) { + LOG_ERROR("Stack corruption in trampoline: entries_ is EMPTY!\n"); + LOG_ERROR(" tail_before=%zu, entries_.size()=%zu\n", tail_before, entries_size); + LOG_ERROR(" this=%p\n", (void*)this); + std::abort(); + } + + if (tail >= entries_.size()) { + LOG_ERROR("Stack corruption in trampoline: tail >= entries_.size()!\n"); + LOG_ERROR( + " tail=%zu, entries_.size()=%zu, tail_before=%zu\n", + tail, + entries_.size(), + tail_before); + LOG_ERROR(" this=%p\n", (void*)this); + std::abort(); + } + + auto& entry = entries_[tail]; + LOG_DEBUG( + " entry[%zu]: ip=0x%lx, return_address=0x%lx, location=%p, stack_pointer=0x%lx\n", + tail, + (unsigned long)entry.ip, + (unsigned long)entry.return_address, + (void*)entry.location, + (unsigned long)entry.stack_pointer); + + // Check for longjmp: if SP doesn't match expected, search backward + // through shadow stack for matching entry (frames were skipped) + if (sp != 0 && entry.stack_pointer != 0 && entry.stack_pointer != sp) { + LOG_DEBUG( + "SP mismatch at index %zu: expected 0x%lx, got 0x%lx - checking for longjmp\n", + tail, + (unsigned long)entry.stack_pointer, + (unsigned long)sp); + + // Search backward through shadow stack for matching SP (nwind style) + // Only update tail_ if we find a match - don't corrupt it during search + for (size_t i = tail; i > 0; --i) { + if (entries_[i - 1].stack_pointer == sp) { + LOG_DEBUG( + "longjmp detected: found matching SP at index %zu (skipped %zu frames)\n", + i - 1, + tail - (i - 1)); + + // Update tail_ to skip all the frames that were bypassed by longjmp + tail_.store(i - 1, std::memory_order_release); + tail = i - 1; + break; + } + } + // If no match found, continue with current entry (SP calculation may differ by platform) + } + + // Verify epoch hasn't changed (reset wasn't called during our execution) + uint64_t final_epoch = epoch_.load(std::memory_order_acquire); + if (final_epoch != current_epoch) { + LOG_ERROR("Reset detected during trampoline - aborting\n"); + LOG_ERROR( + " current_epoch=%lu, final_epoch=%lu\n", + (unsigned long)current_epoch, + (unsigned long)final_epoch); + std::abort(); + } + + uintptr_t ret_addr = entries_[tail].return_address; + LOG_DEBUG(" Returning to address 0x%lx\n", (unsigned long)ret_addr); + LOG_DEBUG("=== on_ret_trampoline EXIT ===\n"); + return ret_addr; + } + + private: + /** + * Copy cached frames to output buffer (fast path). + * + * Called when trampolines are already installed and we can read + * directly from the shadow stack. + */ + size_t copy_cached_frames(void** buffer, size_t max_frames) + { + size_t tail = tail_.load(std::memory_order_acquire); + size_t available = tail; // frames from 0 to tail-1 + size_t count = (available < max_frames) ? available : max_frames; + + for (size_t i = 0; i < count; ++i) { + buffer[i] = reinterpret_cast(entries_[count - 1 - i].ip); + } + + LOG_DEBUG("Fast path: %zu frames\n", count); + return count; + } + + // Capture frames using unwinder, install trampolines + size_t capture_and_install(void** buffer, size_t max_frames) + { + LOG_DEBUG("=== capture_and_install ENTER ===\n"); + LOG_DEBUG(" this=%p, max_frames=%zu\n", (void*)this, max_frames); + + // First, capture IPs using the unwinder + std::vector raw_frames(max_frames); + size_t raw_count = do_unwind(raw_frames.data(), max_frames); + LOG_DEBUG(" do_unwind returned %zu frames\n", raw_count); + + if (raw_count == 0) { + LOG_DEBUG(" No frames captured, returning 0\n"); + return 0; + } + + // Now walk the stack to get return address locations and install trampolines + std::vector new_entries; + new_entries.reserve(raw_count); + bool found_existing = false; + + unw_context_t ctx; + unw_cursor_t cursor; + unw_getcontext(&ctx); + unw_init_local(&cursor, &ctx); + LOG_DEBUG(" Initialized libunwind cursor\n"); + + // Skip the current frame to avoid patching our own return address + if (unw_step(&cursor) > 0) { + // Skipped internal frame + } + + // Process frames: read current frame, then step to next + // Note: After skip loop, cursor is positioned AT the first frame we want + // We need to read first, then step (not step-then-read) + size_t frame_idx = 0; + int step_result; + do { + if (frame_idx >= raw_count) break; + + unw_word_t ip, sp; + unw_get_reg(&cursor, UNW_REG_IP, &ip); + unw_get_reg(&cursor, GS_SP_REGISTER, &sp); + + // On ARM64, strip PAC (Pointer Authentication Code) bits from IP. + // PAC-signed addresses have authentication bits in the upper bits + // that must be stripped for valid address comparison and symbolization. +#ifdef GS_ARCH_AARCH64 + ip = ptrauth_strip(ip); +#endif + + // Get location where return address is stored + uintptr_t* ret_loc = nullptr; +#ifdef __linux__ + unw_save_loc_t loc; + if (unw_get_save_loc(&cursor, GS_RA_REGISTER, &loc) == 0 && loc.type == UNW_SLT_MEMORY) { + ret_loc = reinterpret_cast(loc.u.addr); + } +#else + // macOS: return address is at fp + sizeof(void*) + ret_loc = reinterpret_cast(sp + sizeof(void*)); +#endif + if (!ret_loc) break; + + uintptr_t ret_addr = *ret_loc; + + // Strip PAC (Pointer Authentication Code) if present. + // On ARM64 with PAC, return addresses have authentication bits + // that must be stripped before comparison or storage. + uintptr_t stripped_ret_addr = ptrauth_strip(ret_addr); + + // Check if already patched (cache hit) + // Compare against stripped address since trampoline address doesn't have PAC + if (stripped_ret_addr == reinterpret_cast(ghost_ret_trampoline)) { + found_existing = true; + LOG_DEBUG("Found existing trampoline at frame %zu\n", frame_idx); + break; + } + + // Store the stack pointer that the trampoline will pass. + // Linux: libunwind's SP matches what the trampoline passes + // macOS: trampoline passes ret_loc + sizeof(void*), NOT libunwind's SP +#ifdef __APPLE__ + uintptr_t expected_sp = reinterpret_cast(ret_loc) + sizeof(void*); +#else + unw_word_t actual_sp; + unw_get_reg(&cursor, UNW_REG_SP, &actual_sp); + uintptr_t expected_sp = static_cast(actual_sp); +#endif + // Store both IP (for returning to caller) and return_address (for trampoline restoration) + // Insert at beginning to reverse order (oldest at index 0, newest at end) + new_entries.insert(new_entries.begin(), {ip, ret_addr, ret_loc, expected_sp}); + frame_idx++; + + step_result = unw_step(&cursor); + } while (step_result > 0); + + LOG_DEBUG( + " Collected %zu new entries, found_existing=%d\n", + new_entries.size(), + (int)found_existing); + + // Install trampolines on new entries + LOG_DEBUG(" Installing trampolines (trampoline addr=%p):\n", (void*)ghost_ret_trampoline); + for (size_t i = 0; i < new_entries.size(); ++i) { + auto& e = new_entries[i]; + LOG_DEBUG( + " [%zu] location=%p, old_value=0x%lx, ip=0x%lx, expected_sp=0x%lx\n", + i, + (void*)e.location, + (unsigned long)*e.location, + (unsigned long)e.ip, + (unsigned long)e.stack_pointer); + *e.location = reinterpret_cast(ghost_ret_trampoline); + } + + // Merge with existing entries if we found a patched frame + if (found_existing && !entries_.empty()) { + size_t tail = tail_.load(std::memory_order_acquire); + LOG_DEBUG(" Merging with %zu existing entries\n", tail); + // With reversed order, entries below tail are still valid + // Insert existing valid entries at the beginning of new_entries + new_entries.insert(new_entries.begin(), entries_.begin(), entries_.begin() + tail); + } + + entries_ = std::move(new_entries); + tail_.store(entries_.size(), std::memory_order_release); + trampolines_installed_ = true; + + LOG_DEBUG( + " Final state: entries_.size()=%zu, tail_=%zu\n", + entries_.size(), + tail_.load(std::memory_order_acquire)); + + // Copy to output buffer - return the IP of each frame (what unw_backtrace returns) + // Reverse order: newest frame at buffer[0], oldest at buffer[count-1] + size_t count = (entries_.size() < max_frames) ? entries_.size() : max_frames; + for (size_t i = 0; i < count; ++i) { + buffer[i] = reinterpret_cast(entries_[count - 1 - i].ip); + } + + LOG_DEBUG("=== capture_and_install EXIT, returning %zu frames ===\n", count); + return count; + } + + // Call the unwinder (custom or default) + size_t do_unwind(void** buffer, size_t max_frames) + { + if (custom_unwinder_) { + return custom_unwinder_(buffer, max_frames); + } + +#ifdef __APPLE__ + // macOS: use standard backtrace function + int ret = ::backtrace(buffer, static_cast(max_frames)); + return (ret > 0) ? static_cast(ret) : 0; +#else + // Linux: use libunwind's unw_backtrace + int ret = unw_backtrace(buffer, static_cast(max_frames)); + return (ret > 0) ? static_cast(ret) : 0; +#endif + } + + // Shadow stack entries (return addresses and their locations) + std::vector entries_; + + // Current position in the shadow stack (atomic for signal safety) + std::atomic tail_{0}; + + // Epoch counter - incremented on reset to invalidate in-flight operations + std::atomic epoch_{0}; + + // Guards against recursive calls (e.g., from signal handlers during capture) + bool is_capturing_ = false; + + // Whether trampolines are currently installed + bool trampolines_installed_ = false; + + // Optional custom unwinder function + ghost_stack_unwinder_t custom_unwinder_ = nullptr; +}; + +// ============================================================================ +// Thread-Local Instance Management +// ============================================================================ + +/** + * RAII wrapper for thread-local GhostStackImpl. + * + * When a thread exits, C++ automatically calls this destructor which resets + * the shadow stack (restoring original return addresses). This matches nwind's + * approach using pthread_key_t destructors, but uses idiomatic C++11. + */ +struct ThreadLocalInstance +{ + GhostStackImpl* ptr = nullptr; + + ~ThreadLocalInstance() + { + if (ptr) { + LOG_DEBUG("Thread exit: resetting shadow stack\n"); + ptr->reset(); + delete ptr; + ptr = nullptr; + } + } +}; + +static thread_local ThreadLocalInstance t_instance; + +static GhostStackImpl& +get_instance() +{ + if (!t_instance.ptr) { + t_instance.ptr = new GhostStackImpl(); + LOG_DEBUG( + "Created new shadow stack instance for thread: this=%p, tid=%lu\n", + (void*)t_instance.ptr, + (unsigned long)pthread_self()); + } + return *t_instance.ptr; +} + +// ============================================================================ +// Global State +// ============================================================================ + +static std::once_flag g_init_flag; +static std::once_flag g_atfork_flag; +static ghost_stack_unwinder_t g_custom_unwinder = nullptr; + +// ============================================================================ +// Fork Safety +// ============================================================================ + +/** + * Called in child process after fork() to reset thread-local state. + * + * After fork(), the child process has a copy of the parent's shadow stack + * entries. The virtual addresses are identical, so entries point to valid + * locations in the child's own stack. We must restore the original return + * addresses before the child returns through any trampolined frames. + */ +static void +fork_child_handler() +{ + if (t_instance.ptr) { + t_instance.ptr->reset(); + } + LOG_DEBUG("Fork child handler: reset shadow stack\n"); +} + +static void +register_atfork_handler() +{ + std::call_once(g_atfork_flag, []() { + pthread_atfork(nullptr, nullptr, fork_child_handler); + LOG_DEBUG("Registered pthread_atfork handler\n"); + }); +} + +// ============================================================================ +// C API Implementation +// ============================================================================ + +extern "C" { + +void +ghost_stack_init(ghost_stack_unwinder_t unwinder) +{ + std::call_once(g_init_flag, [unwinder]() { + g_custom_unwinder = unwinder; + LOG_DEBUG("Initialized with %s unwinder\n", unwinder ? "custom" : "default"); + }); + + // Register fork handler (idempotent, safe to call multiple times) + register_atfork_handler(); +} + +size_t +ghost_stack_backtrace(void** buffer, size_t size) +{ + // Auto-init if needed + std::call_once(g_init_flag, []() { g_custom_unwinder = nullptr; }); + + // Ensure fork handler is registered (idempotent) + register_atfork_handler(); + + auto& impl = get_instance(); + + // Apply global unwinder setting if not already set + static thread_local bool unwinder_set = false; + if (!unwinder_set) { + impl.set_unwinder(g_custom_unwinder); + unwinder_set = true; + } + + return impl.backtrace(buffer, size); +} + +void +ghost_stack_reset(void) +{ + if (t_instance.ptr) { + t_instance.ptr->reset(); + } +} + +void +ghost_stack_thread_cleanup(void) +{ + if (t_instance.ptr) { + t_instance.ptr->reset(); + delete t_instance.ptr; + t_instance.ptr = nullptr; + } +} + +// Called by assembly trampoline +uintptr_t +ghost_trampoline_handler(uintptr_t sp) +{ + LOG_DEBUG( + ">>> ghost_trampoline_handler called, sp=0x%lx, tid=%lu\n", + (unsigned long)sp, + (unsigned long)pthread_self()); + auto& impl = get_instance(); + LOG_DEBUG(">>> got instance=%p\n", (void*)&impl); + uintptr_t result = impl.on_ret_trampoline(sp); + LOG_DEBUG(">>> ghost_trampoline_handler returning 0x%lx\n", (unsigned long)result); + return result; +} + +// Called when exception passes through trampoline +uintptr_t +ghost_exception_handler(void* exception) +{ + LOG_DEBUG("Exception through trampoline\n"); + + auto& impl = get_instance(); + uintptr_t ret = impl.pop_entry(); // Direct pop, no longjmp check + impl.reset(); + + __cxxabiv1::__cxa_begin_catch(exception); + return ret; +} + +} // extern diff --git a/src/memray/_memray/ghost_stack/src/x86_64_linux_trampoline.s b/src/memray/_memray/ghost_stack/src/x86_64_linux_trampoline.s new file mode 100644 index 0000000000..42061dab84 --- /dev/null +++ b/src/memray/_memray/ghost_stack/src/x86_64_linux_trampoline.s @@ -0,0 +1,253 @@ +/** + * GhostStack Return Trampoline - x86_64 Linux + * ============================================ + * + * This assembly implements the return address trampoline for shadow stack unwinding. + * When GhostStack patches a return address to point here, this trampoline: + * 1. Saves the function's return value (preserved across the callback) + * 2. Calls ghost_trampoline_handler() to get the real return address + * 3. Restores the return value and jumps to the real return address + * + * Exception Handling: + * The trampoline includes DWARF unwind info and an LSDA (Language Specific Data Area) + * so that C++ exceptions can propagate correctly through patched frames. When an + * exception passes through, the personality routine directs control to .L3, which + * calls ghost_exception_handler() to restore the real return address + * before rethrowing. + * + * Key insight: The .cfi_undefined rip directive tells the unwinder that the return + * address is not in a standard location - this is intentional since we've patched it. + * + * x86_64 SysV ABI Notes: + * - Return values: rax (integer/pointer), rdx (second value), xmm0/xmm1 (floating point) + * - We save rax, rdx, and rcx (used by some ABIs like Rust for extra return values) + * - Stack must be 16-byte aligned before CALL instruction + */ + + .text + .section .text.unlikely,"ax",@progbits +.LCOLDB0: + .text +.LHOTB0: + .p2align 4 + + /* ========================================================================== + * ghost_ret_trampoline_start - Exception handling anchor + * ========================================================================== + * This symbol marks the start of the function for DWARF unwinding purposes. + * The CFI directives set up exception handling: + * - .cfi_personality: Use __gxx_personality_v0 for C++ exceptions + * - .cfi_lsda: Point to our Language Specific Data Area for catch clauses + * - .cfi_undefined rip: Signal that return address is non-standard + */ + .globl ghost_ret_trampoline_start + .hidden ghost_ret_trampoline_start + .type ghost_ret_trampoline_start, @function +ghost_ret_trampoline_start: +.LFB0: + .cfi_startproc + .cfi_personality 0x9b,DW.ref.__gxx_personality_v0 + .cfi_lsda 0x1b,.LLSDA0 + .cfi_undefined rip + + /* Exception try region starts here - any exception in this region + * will be caught and redirected to .L3 for proper handling */ +.LEHB0: + nop /* Placeholder for exception region start */ +.LEHE0: + + /* ========================================================================== + * ghost_ret_trampoline - The actual trampoline entry point + * ========================================================================== + * When a function returns and its return address has been patched to point + * here, execution continues at this label. The original return address is + * stored in GhostStack's shadow stack and will be retrieved via callback. + */ +.globl ghost_ret_trampoline +.type ghost_ret_trampoline, @function +ghost_ret_trampoline: +.intel_syntax noprefix + + /* ------------------------------------------------------------------------- + * Step 1: Save return values + * ------------------------------------------------------------------------- + * The function we're returning from may have placed values in these registers. + * We must preserve them across our callback to ghost_trampoline_handler(). + * + * Stack layout after saves: + * rsp+24: original rsp (return address location) + * rsp+16: saved rax (primary return value) + * rsp+8: saved rdx (secondary return value, e.g., for 128-bit returns) + * rsp: saved rcx (used by Rust ABI, also scratch in some cases) + * [then -8 for alignment] + */ + push rax /* Save primary return value */ + push rdx /* Save secondary return value */ + push rcx /* Save rcx (Rust ABI uses this) */ + + /* Align stack to 16-byte boundary (required by SysV ABI before CALL). + * We've pushed 3 * 8 = 24 bytes. Adding 8 makes it 32, which is aligned. */ + sub rsp, 8 + + /* ------------------------------------------------------------------------- + * Step 2: Call into C++ to get the real return address + * ------------------------------------------------------------------------- + * Argument (rdi): Pointer to where the return address *would* be on stack + * = rsp + 8 (alignment) + 8 (rcx) + 8 (rdx) + 8 (rax) = rsp + 32 + * This lets the C++ code verify stack pointer consistency if desired. + * + * ghost_trampoline_handler() returns the real return address in rax. + */ + mov rdi, rsp + add rdi, 32 /* rdi = &original_return_addr_location */ + call ghost_trampoline_handler + + /* ------------------------------------------------------------------------- + * Step 3: Restore and jump to real return address + * ------------------------------------------------------------------------- + * rax now contains the real return address. Move it to rsi (callee-saved + * across our restores), restore the original return values, then jump. + */ + mov rsi, rax /* Save real return address */ + add rsp, 8 /* Remove alignment padding */ + pop rcx /* Restore rcx */ + pop rdx /* Restore secondary return value */ + pop rax /* Restore primary return value */ + jmp rsi /* Jump to real return address */ + +.att_syntax + + /* ========================================================================== + * Exception landing pad (hot path handoff) + * ========================================================================== + * If an exception is thrown while executing in the try region (.LEHB0-.LEHE0), + * the C++ runtime's personality function sees our LSDA entry and directs + * unwinding here. We save the exception object and jump to the cold handler. + */ +.L3: + movq %rax, %rdi /* Exception object pointer -> first argument */ + jmp .L2 /* Jump to cold exception handler */ + + .globl __gxx_personality_v0 + + /* ========================================================================== + * LSDA (Language Specific Data Area) + * ========================================================================== + * This data tells __gxx_personality_v0 how to handle exceptions in our code. + * Format: DWARF exception handling tables + * + * Key fields: + * - Call site table: Maps PC ranges to landing pads + * - Action table: What to do when landing (0 = cleanup, >0 = catch) + * - Type table: Exception types to catch (not used here, we catch all) + */ + .section .gcc_except_table,"a",@progbits + .align 4 +.LLSDA0: + .byte 0xff /* @LPStart encoding: omit (use function start) */ + .byte 0x9b /* @TType encoding: indirect pcrel sdata4 */ + .uleb128 .LLSDATT0-.LLSDATTD0 /* @TType base offset */ +.LLSDATTD0: + .byte 0x1 /* Call site encoding: uleb128 */ + .uleb128 .LLSDACSE0-.LLSDACSB0 /* Call site table length */ +.LLSDACSB0: + /* Call site entry: try region that catches exceptions */ + .uleb128 .LEHB0-.LFB0 /* Region start (relative to function) */ + .uleb128 .LEHE0-.LEHB0 /* Region length */ + .uleb128 .L3-.LFB0 /* Landing pad (where to go on exception) */ + .uleb128 0x1 /* Action: index into action table (catch-all) */ +.LLSDACSE0: + .byte 0x1 /* Action table entry: catch type index 1 */ + .byte 0 /* No next action */ + .align 4 + .long 0 /* Type table entry: 0 = catch(...) */ + +.LLSDATT0: + .text + .cfi_endproc + + /* ========================================================================== + * Cold exception handler + * ========================================================================== + * This is the "cold" (unlikely) path for exception handling. Placed in a + * separate section to improve instruction cache locality of the hot path. + * + * When we get here: + * 1. An exception was thrown + * 2. The personality function found our LSDA + * 3. Stack was unwound to our frame + * 4. Control transferred to .L3, then here + * + * We must: + * 1. Get the real return address from GhostStack + * 2. Push it so __cxa_rethrow can continue unwinding correctly + * 3. Rethrow the exception + */ + .section .text.unlikely + .cfi_startproc + .cfi_personality 0x9b,DW.ref.__gxx_personality_v0 + .cfi_lsda 0x1b,.LLSDAC0 + .type ghost_ret_trampoline_start.cold, @function +ghost_ret_trampoline_start.cold: +.LFSB0: +.L2: + /* rdi already contains exception pointer from .L3 */ + call ghost_exception_handler + + /* rax = real return address. Push it onto stack so the unwinder + * sees correct return address when __cxa_rethrow continues. */ + push %rax + + /* Rethrow the exception - unwinding continues from real return address */ + jmp __cxa_rethrow@PLT + .cfi_endproc +.LFE0: + + /* LSDA for cold section (empty - no more catching needed) */ + .section .gcc_except_table + .align 4 +.LLSDAC0: + .byte 0xff + .byte 0x9b + .uleb128 .LLSDATTC0-.LLSDATTDC0 +.LLSDATTDC0: + .byte 0x1 + .uleb128 .LLSDACSEC0-.LLSDACSBC0 +.LLSDACSBC0: +.LLSDACSEC0: + .byte 0x1 + .byte 0 + .align 4 + .long 0 + +.LLSDATTC0: + .section .text.unlikely + .text + .size ghost_ret_trampoline_start, .-ghost_ret_trampoline_start + .section .text.unlikely + .size ghost_ret_trampoline_start.cold, .-ghost_ret_trampoline_start.cold +.LCOLDE0: + .text +.LHOTE0: + + /* ========================================================================== + * Symbol definitions + * ========================================================================== + * Reference to __gxx_personality_v0 for exception handling. + * Placed in a COMDAT group so multiple TUs can define it. + */ + .hidden DW.ref.__gxx_personality_v0 + .weak DW.ref.__gxx_personality_v0 + .section .data.rel.local.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat + .align 8 + .type DW.ref.__gxx_personality_v0, @object + .size DW.ref.__gxx_personality_v0, 8 +DW.ref.__gxx_personality_v0: + .quad __gxx_personality_v0 + + /* Hide internal symbols from dynamic linking */ + .hidden ghost_exception_handler + .hidden ghost_trampoline_handler + + /* Mark stack as non-executable (security) */ + .section .note.GNU-stack,"",@progbits diff --git a/src/memray/_memray/ghost_stack_test_utils.cpp b/src/memray/_memray/ghost_stack_test_utils.cpp new file mode 100644 index 0000000000..5919caa833 --- /dev/null +++ b/src/memray/_memray/ghost_stack_test_utils.cpp @@ -0,0 +1,93 @@ +#include "ghost_stack_test_utils.h" + +#include +#include + +#ifdef MEMRAY_HAS_GHOST_STACK +# include "ghost_stack.h" +# ifdef __APPLE__ +# include +# else +# define UNW_LOCAL_ONLY +# include +# endif +#endif + +extern "C" { + +PyObject* +ghost_stack_test_backtrace(void) +{ +#ifdef MEMRAY_HAS_GHOST_STACK + void* frames[256]; + size_t n = ghost_stack_backtrace(frames, 256); + PyObject* result = PyList_New(static_cast(n)); + if (!result) return nullptr; + for (size_t i = 0; i < n; i++) { + PyObject* addr = PyLong_FromUnsignedLongLong(reinterpret_cast(frames[i])); + if (!addr) { + Py_DECREF(result); + return nullptr; + } + PyList_SET_ITEM(result, static_cast(i), addr); + } + return result; +#else + Py_RETURN_NONE; +#endif +} + +PyObject* +libunwind_test_backtrace(void) +{ +#ifdef MEMRAY_HAS_GHOST_STACK + void* frames[256]; +# ifdef __APPLE__ + int n = backtrace(frames, 256); +# else + int n = unw_backtrace(frames, 256); +# endif + if (n < 0) n = 0; + PyObject* result = PyList_New(static_cast(n)); + if (!result) return nullptr; + for (int i = 0; i < n; i++) { + PyObject* addr = PyLong_FromUnsignedLongLong(reinterpret_cast(frames[i])); + if (!addr) { + Py_DECREF(result); + return nullptr; + } + PyList_SET_ITEM(result, static_cast(i), addr); + } + return result; +#else + Py_RETURN_NONE; +#endif +} + +void +ghost_stack_test_reset(void) +{ +#ifdef MEMRAY_HAS_GHOST_STACK + ghost_stack_reset(); +#endif +} + +void +ghost_stack_test_init(void) +{ +#ifdef MEMRAY_HAS_GHOST_STACK + ghost_stack_init(nullptr); +#endif +} + +int +ghost_stack_test_has_support(void) +{ +#ifdef MEMRAY_HAS_GHOST_STACK + return 1; +#else + return 0; +#endif +} + +} // extern "C" diff --git a/src/memray/_memray/ghost_stack_test_utils.h b/src/memray/_memray/ghost_stack_test_utils.h new file mode 100644 index 0000000000..829085b125 --- /dev/null +++ b/src/memray/_memray/ghost_stack_test_utils.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Returns a Python list of frame addresses from ghost_stack_backtrace +// Returns Py_None if MEMRAY_HAS_GHOST_STACK is not defined +PyObject* +ghost_stack_test_backtrace(void); + +// Returns a Python list of frame addresses from unw_backtrace (libunwind) +// Returns Py_None if MEMRAY_HAS_GHOST_STACK is not defined +PyObject* +libunwind_test_backtrace(void); + +// Reset ghost_stack shadow stack +void +ghost_stack_test_reset(void); + +// Initialize ghost_stack +void +ghost_stack_test_init(void); + +// Check if ghost_stack support is available +int +ghost_stack_test_has_support(void); + +#ifdef __cplusplus +} +#endif diff --git a/src/memray/_memray/tracking_api.cpp b/src/memray/_memray/tracking_api.cpp index e63bfaffae..3aa40f15fc 100644 --- a/src/memray/_memray/tracking_api.cpp +++ b/src/memray/_memray/tracking_api.cpp @@ -768,19 +768,21 @@ PythonStackTracker::clear() Tracker::Tracker( std::unique_ptr record_writer, bool native_traces, + bool fast_unwind, unsigned int memory_interval, bool follow_fork, bool trace_python_allocators, bool reference_tracking) : d_writer(std::move(record_writer)) , d_unwind_native_frames(native_traces) +, d_fast_unwind(fast_unwind) , d_memory_interval(memory_interval) , d_follow_fork(follow_fork) , d_trace_python_allocators(trace_python_allocators) , d_reference_tracking(reference_tracking) { static std::once_flag once; - call_once(once, [] { + call_once(once, [fast_unwind] { // We use the pthread TLS API for this vector because we must be able // to re-create it while TLS destructors are running (a destructor can // call malloc, hitting our malloc hook). POSIX guarantees multiple @@ -794,7 +796,7 @@ Tracker::Tracker( } hooks::ensureAllHooksAreValid(); - NativeTrace::setup(); + NativeTrace::setup(fast_unwind); #if PY_VERSION_HEX >= 0x030C0000 PyCode_AddWatcher([](PyCodeEvent event, PyCodeObject* code) { @@ -1064,6 +1066,7 @@ Tracker::childFork() s_instance_owner.reset(new Tracker( std::move(new_writer), old_tracker->d_unwind_native_frames, + old_tracker->d_fast_unwind, old_tracker->d_memory_interval, old_tracker->d_follow_fork, old_tracker->d_trace_python_allocators, @@ -1438,6 +1441,7 @@ PyObject* Tracker::createTracker( std::unique_ptr record_writer, bool native_traces, + bool fast_unwind, unsigned int memory_interval, bool follow_fork, bool trace_python_allocators, @@ -1446,6 +1450,7 @@ Tracker::createTracker( s_instance_owner.reset(new Tracker( std::move(record_writer), native_traces, + fast_unwind, memory_interval, follow_fork, trace_python_allocators, diff --git a/src/memray/_memray/tracking_api.h b/src/memray/_memray/tracking_api.h index e9febf2ac2..d886e2bd8d 100644 --- a/src/memray/_memray/tracking_api.h +++ b/src/memray/_memray/tracking_api.h @@ -25,6 +25,12 @@ # include #endif +#ifdef MEMRAY_HAS_GHOST_STACK +# include "ghost_stack.h" +// ghost_stack skips 1 internal frame, we skip 1 more for our tracking frame +# define GHOST_STACK_SKIP_FRAMES 1 +#endif + #include "frame_tree.h" #include "hooks.h" #include "linker_shenanigans.h" @@ -136,6 +142,7 @@ class NativeTrace { public: using ip_t = frame_id_t; + static inline bool s_use_fast_unwind = false; NativeTrace(std::vector& data) : d_data(data) @@ -163,9 +170,25 @@ class NativeTrace size_t size; while (true) { #ifdef __linux__ +# ifdef MEMRAY_HAS_GHOST_STACK + if (s_use_fast_unwind) { + size = ghost_stack_backtrace((void**)d_data.data(), d_data.size()); + } else { + size = unw_backtrace((void**)d_data.data(), d_data.size()); + } +# else size = unw_backtrace((void**)d_data.data(), d_data.size()); +# endif #elif defined(__APPLE__) +# ifdef MEMRAY_HAS_GHOST_STACK + if (s_use_fast_unwind) { + size = ghost_stack_backtrace((void**)d_data.data(), d_data.size()); + } else { + size = ::backtrace((void**)d_data.data(), d_data.size()); + } +# else size = ::backtrace((void**)d_data.data(), d_data.size()); +# endif #else return 0; #endif @@ -176,11 +199,15 @@ class NativeTrace d_data.resize(d_data.size() * 2); } d_size = size > skip ? size - skip : 0; +#ifdef MEMRAY_HAS_GHOST_STACK + d_skip = skip + (s_use_fast_unwind ? GHOST_STACK_SKIP_FRAMES : 0); +#else d_skip = skip; +#endif return d_size > 0; } - static void setup() + static void setup(bool use_fast_unwind = false) { #ifdef __linux__ // configure libunwind for better speed @@ -192,7 +219,21 @@ class NativeTrace fprintf(stderr, "WARNING: Failed to set libunwind cache size.\n"); } # endif +# ifdef MEMRAY_HAS_GHOST_STACK + if (use_fast_unwind) { + ghost_stack_init(nullptr); + s_use_fast_unwind = true; + } +# endif +#elif defined(__APPLE__) +# ifdef MEMRAY_HAS_GHOST_STACK + if (use_fast_unwind) { + ghost_stack_init(nullptr); + s_use_fast_unwind = true; + } +# endif #else + (void)use_fast_unwind; return; #endif } @@ -206,6 +247,17 @@ class NativeTrace #endif } + static inline void resetGhostStack() + { +#if defined(__linux__) || defined(__APPLE__) +# ifdef MEMRAY_HAS_GHOST_STACK + if (s_use_fast_unwind) { + ghost_stack_reset(); + } +# endif +#endif + } + private: size_t d_size = 0; size_t d_skip = 0; @@ -236,6 +288,7 @@ class Tracker static PyObject* createTracker( std::unique_ptr record_writer, bool native_traces, + bool fast_unwind, unsigned int memory_interval, bool follow_fork, bool trace_python_allocators, @@ -438,6 +491,7 @@ class Tracker std::shared_ptr d_writer; FrameTree d_native_trace_tree; const bool d_unwind_native_frames; + const bool d_fast_unwind; const unsigned int d_memory_interval; const bool d_follow_fork; const bool d_trace_python_allocators; @@ -473,6 +527,7 @@ class Tracker explicit Tracker( std::unique_ptr record_writer, bool native_traces, + bool fast_unwind, unsigned int memory_interval, bool follow_fork, bool trace_python_allocators, diff --git a/src/memray/_memray/tracking_api.pxd b/src/memray/_memray/tracking_api.pxd index 2a748d4b0e..7d74d41a12 100644 --- a/src/memray/_memray/tracking_api.pxd +++ b/src/memray/_memray/tracking_api.pxd @@ -19,6 +19,7 @@ cdef extern from "tracking_api.h" namespace "memray::tracking_api": object createTracker( unique_ptr[RecordWriter] record_writer, bool native_traces, + bool fast_unwind, unsigned int memory_interval, bool follow_fork, bool trace_pymalloc, diff --git a/src/memray/_memray_test_utils.pyx b/src/memray/_memray_test_utils.pyx index d3fa2352ef..ee4c1e65f4 100644 --- a/src/memray/_memray_test_utils.pyx +++ b/src/memray/_memray_test_utils.pyx @@ -285,3 +285,50 @@ cdef class PrimeCaches: return self def __exit__(self, *args): sys.setprofile(self.old_profile) + + +# Ghost stack test utilities +cdef extern from "_memray/ghost_stack_test_utils.h": + object ghost_stack_test_backtrace() + object libunwind_test_backtrace() + void ghost_stack_test_reset() + void ghost_stack_test_init() + int ghost_stack_test_has_support() + + +def has_ghost_stack_support(): + """Check if ghost_stack support is available.""" + return ghost_stack_test_has_support() != 0 + + +cdef class GhostStackTestContext: + """Context manager for ghost_stack testing. + + Usage: + with GhostStackTestContext() as ctx: + frames = ctx.backtrace() + libunwind_frames = ctx.libunwind_backtrace() + """ + + def __enter__(self): + # init is defensive in case ghost_stack wasn't initialized globally; + # reset clears any stale shadow stack state from previous operations + ghost_stack_test_init() + ghost_stack_test_reset() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + ghost_stack_test_reset() + return False + + def backtrace(self): + """Capture ghost_stack frames and return as list of addresses.""" + return ghost_stack_test_backtrace() + + def libunwind_backtrace(self): + """Capture libunwind frames for comparison.""" + return libunwind_test_backtrace() + + def reset(self): + """Reset ghost_stack shadow stack.""" + ghost_stack_test_reset() diff --git a/src/memray/commands/run.py b/src/memray/commands/run.py index 4f006cf1ca..cfbd3be378 100644 --- a/src/memray/commands/run.py +++ b/src/memray/commands/run.py @@ -50,9 +50,13 @@ def _run_tracker( kwargs["trace_python_allocators"] = True if args.aggregate: kwargs["file_format"] = FileFormat.AGGREGATED_ALLOCATIONS + if args.fast_unwind: + kwargs["fast_unwind"] = True tracker = Tracker(destination=destination, native_traces=args.native, **kwargs) except OSError as error: raise MemrayCommandError(str(error), exit_code=1) + except ValueError as error: + raise MemrayCommandError(str(error), exit_code=1) with tracker: pid = os.getpid() @@ -83,6 +87,7 @@ def _run_tracker( def _child_process( port: int, native: bool, + fast_unwind: bool, trace_python_allocators: bool, run_as_module: bool, run_as_cmd: bool, @@ -92,6 +97,7 @@ def _child_process( ) -> None: args = argparse.Namespace( native=native, + fast_unwind=fast_unwind, trace_python_allocators=trace_python_allocators, follow_fork=False, aggregate=False, @@ -112,7 +118,7 @@ def _run_child_process_and_attach(args: argparse.Namespace) -> None: raise MemrayCommandError(f"Invalid port: {port}", exit_code=1) arguments = ( - f"{port},{args.native},{args.trace_python_allocators}," + f"{port},{args.native},{args.fast_unwind},{args.trace_python_allocators}," f"{args.run_as_module},{args.run_as_cmd},{args.quiet}," f"{args.script!r},{args.script_args}" ) @@ -240,6 +246,13 @@ def prepare_parser(self, parser: argparse.ArgumentParser) -> None: dest="native", default=False, ) + parser.add_argument( + "--fast-unwind", + help="Use optimized native stack unwinding with shadow stack caching (requires --native)", + action="store_true", + dest="fast_unwind", + default=False, + ) parser.add_argument( "--follow-fork", action="store_true", diff --git a/src/vendor/libbacktrace/install_arm64/include/libbacktrace/backtrace-supported.h b/src/vendor/libbacktrace/install_arm64/include/libbacktrace/backtrace-supported.h new file mode 100644 index 0000000000..39482feb9e --- /dev/null +++ b/src/vendor/libbacktrace/install_arm64/include/libbacktrace/backtrace-supported.h @@ -0,0 +1,66 @@ +/* backtrace-supported.h.in -- Whether stack backtrace is supported. + Copyright (C) 2012-2024 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +/* The file backtrace-supported.h.in is used by configure to generate + the file backtrace-supported.h. The file backtrace-supported.h may + be #include'd to see whether the backtrace library will be able to + get a backtrace and produce symbolic information. */ + + +/* BACKTRACE_SUPPORTED will be #define'd as 1 if the backtrace library + should work, 0 if it will not. Libraries may #include this to make + other arrangements. */ + +#define BACKTRACE_SUPPORTED 1 + +/* BACKTRACE_USES_MALLOC will be #define'd as 1 if the backtrace + library will call malloc as it works, 0 if it will call mmap + instead. This may be used to determine whether it is safe to call + the backtrace functions from a signal handler. In general this + only applies to calls like backtrace and backtrace_pcinfo. It does + not apply to backtrace_simple, which never calls malloc. It does + not apply to backtrace_print, which always calls fprintf and + therefore malloc. */ + +#define BACKTRACE_USES_MALLOC 0 + +/* BACKTRACE_SUPPORTS_THREADS will be #define'd as 1 if the backtrace + library is configured with threading support, 0 if not. If this is + 0, the threaded parameter to backtrace_create_state must be passed + as 0. */ + +#define BACKTRACE_SUPPORTS_THREADS 1 + +/* BACKTRACE_SUPPORTS_DATA will be #defined'd as 1 if the backtrace_syminfo + will work for variables. It will always work for functions. */ + +#define BACKTRACE_SUPPORTS_DATA 1 diff --git a/src/vendor/libbacktrace/install_arm64/include/libbacktrace/backtrace.h b/src/vendor/libbacktrace/install_arm64/include/libbacktrace/backtrace.h new file mode 100644 index 0000000000..de92a3afb3 --- /dev/null +++ b/src/vendor/libbacktrace/install_arm64/include/libbacktrace/backtrace.h @@ -0,0 +1,189 @@ +/* backtrace.h -- Public header file for stack backtrace library. + Copyright (C) 2012-2024 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef BACKTRACE_H +#define BACKTRACE_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* The backtrace state. This struct is intentionally not defined in + the public interface. */ + +struct backtrace_state; + +/* The type of the error callback argument to backtrace functions. + This function, if not NULL, will be called for certain error cases. + The DATA argument is passed to the function that calls this one. + The MSG argument is an error message. The ERRNUM argument, if + greater than 0, holds an errno value. The MSG buffer may become + invalid after this function returns. + + As a special case, the ERRNUM argument will be passed as -1 if no + debug info can be found for the executable, or if the debug info + exists but has an unsupported version, but the function requires + debug info (e.g., backtrace_full, backtrace_pcinfo). The MSG in + this case will be something along the lines of "no debug info". + Similarly, ERRNUM will be passed as -1 if there is no symbol table, + but the function requires a symbol table (e.g., backtrace_syminfo). + This may be used as a signal that some other approach should be + tried. */ + +typedef void (*backtrace_error_callback) (void *data, const char *msg, + int errnum); + +/* Create state information for the backtrace routines. This must be + called before any of the other routines, and its return value must + be passed to all of the other routines. FILENAME is the path name + of the executable file; if it is NULL the library will try + system-specific path names. If not NULL, FILENAME must point to a + permanent buffer. If THREADED is non-zero the state may be + accessed by multiple threads simultaneously, and the library will + use appropriate atomic operations. If THREADED is zero the state + may only be accessed by one thread at a time. This returns a state + pointer on success, NULL on error. If an error occurs, this will + call the ERROR_CALLBACK routine. + + Calling this function allocates resources that cannot be freed. + There is no backtrace_free_state function. The state is used to + cache information that is expensive to recompute. Programs are + expected to call this function at most once and to save the return + value for all later calls to backtrace functions. */ + +extern struct backtrace_state *backtrace_create_state ( + const char *filename, int threaded, + backtrace_error_callback error_callback, void *data); + +/* The type of the callback argument to the backtrace_full function. + DATA is the argument passed to backtrace_full. PC is the program + counter. FILENAME is the name of the file containing PC, or NULL + if not available. LINENO is the line number in FILENAME containing + PC, or 0 if not available. FUNCTION is the name of the function + containing PC, or NULL if not available. This should return 0 to + continuing tracing. The FILENAME and FUNCTION buffers may become + invalid after this function returns. */ + +typedef int (*backtrace_full_callback) (void *data, uintptr_t pc, + const char *filename, int lineno, + const char *function); + +/* Get a full stack backtrace. SKIP is the number of frames to skip; + passing 0 will start the trace with the function calling + backtrace_full. DATA is passed to the callback routine. If any + call to CALLBACK returns a non-zero value, the stack backtrace + stops, and backtrace returns that value; this may be used to limit + the number of stack frames desired. If all calls to CALLBACK + return 0, backtrace returns 0. The backtrace_full function will + make at least one call to either CALLBACK or ERROR_CALLBACK. This + function requires debug info for the executable. */ + +extern int backtrace_full (struct backtrace_state *state, int skip, + backtrace_full_callback callback, + backtrace_error_callback error_callback, + void *data); + +/* The type of the callback argument to the backtrace_simple function. + DATA is the argument passed to simple_backtrace. PC is the program + counter. This should return 0 to continue tracing. */ + +typedef int (*backtrace_simple_callback) (void *data, uintptr_t pc); + +/* Get a simple backtrace. SKIP is the number of frames to skip, as + in backtrace. DATA is passed to the callback routine. If any call + to CALLBACK returns a non-zero value, the stack backtrace stops, + and backtrace_simple returns that value. Otherwise + backtrace_simple returns 0. The backtrace_simple function will + make at least one call to either CALLBACK or ERROR_CALLBACK. This + function does not require any debug info for the executable. */ + +extern int backtrace_simple (struct backtrace_state *state, int skip, + backtrace_simple_callback callback, + backtrace_error_callback error_callback, + void *data); + +/* Print the current backtrace in a user readable format to a FILE. + SKIP is the number of frames to skip, as in backtrace_full. Any + error messages are printed to stderr. This function requires debug + info for the executable. */ + +extern void backtrace_print (struct backtrace_state *state, int skip, FILE *); + +/* Given PC, a program counter in the current program, call the + callback function with filename, line number, and function name + information. This will normally call the callback function exactly + once. However, if the PC happens to describe an inlined call, and + the debugging information contains the necessary information, then + this may call the callback function multiple times. This will make + at least one call to either CALLBACK or ERROR_CALLBACK. This + returns the first non-zero value returned by CALLBACK, or 0. */ + +extern int backtrace_pcinfo (struct backtrace_state *state, uintptr_t pc, + backtrace_full_callback callback, + backtrace_error_callback error_callback, + void *data); + +/* The type of the callback argument to backtrace_syminfo. DATA and + PC are the arguments passed to backtrace_syminfo. SYMNAME is the + name of the symbol for the corresponding code. SYMVAL is the + value and SYMSIZE is the size of the symbol. SYMNAME will be NULL + if no error occurred but the symbol could not be found. */ + +typedef void (*backtrace_syminfo_callback) (void *data, uintptr_t pc, + const char *symname, + uintptr_t symval, + uintptr_t symsize); + +/* Given ADDR, an address or program counter in the current program, + call the callback information with the symbol name and value + describing the function or variable in which ADDR may be found. + This will call either CALLBACK or ERROR_CALLBACK exactly once. + This returns 1 on success, 0 on failure. This function requires + the symbol table but does not require the debug info. Note that if + the symbol table is present but ADDR could not be found in the + table, CALLBACK will be called with a NULL SYMNAME argument. + Returns 1 on success, 0 on error. */ + +extern int backtrace_syminfo (struct backtrace_state *state, uintptr_t addr, + backtrace_syminfo_callback callback, + backtrace_error_callback error_callback, + void *data); + +#ifdef __cplusplus +} /* End extern "C". */ +#endif + +#endif diff --git a/src/vendor/libbacktrace/install_arm64/include/libbacktrace/debuginfod_support.h b/src/vendor/libbacktrace/install_arm64/include/libbacktrace/debuginfod_support.h new file mode 100644 index 0000000000..78f4d8df29 --- /dev/null +++ b/src/vendor/libbacktrace/install_arm64/include/libbacktrace/debuginfod_support.h @@ -0,0 +1,115 @@ +/* External declarations for the libdebuginfod client library. + Copyright (C) 2019-2020 Red Hat, Inc. + This file is part of elfutils. + + This file is free software; you can redistribute it and/or modify + it under the terms of either + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at + your option) any later version + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at + your option) any later version + + or both in parallel, as here. + + elfutils is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see . */ + +#ifndef _DEBUGINFOD_CLIENT_H +#define _DEBUGINFOD_CLIENT_H 1 + +/* Names of environment variables that control the client logic. */ +#define DEBUGINFOD_URLS_ENV_VAR "DEBUGINFOD_URLS" +#define DEBUGINFOD_CACHE_PATH_ENV_VAR "DEBUGINFOD_CACHE_PATH" +#define DEBUGINFOD_TIMEOUT_ENV_VAR "DEBUGINFOD_TIMEOUT" +#define DEBUGINFOD_PROGRESS_ENV_VAR "DEBUGINFOD_PROGRESS" +#define DEBUGINFOD_VERBOSE_ENV_VAR "DEBUGINFOD_VERBOSE" +#define DEBUGINFOD_RETRY_LIMIT_ENV_VAR "DEBUGINFOD_RETRY_LIMIT" +#define DEBUGINFOD_MAXSIZE_ENV_VAR "DEBUGINFOD_MAXSIZE" +#define DEBUGINFOD_MAXTIME_ENV_VAR "DEBUGINFOD_MAXTIME" +#define DEBUGINFOD_HEADERS_FILE_ENV_VAR "DEBUGINFOD_HEADERS_FILE" + +/* Handle for debuginfod-client connection. */ +typedef struct debuginfod_client debuginfod_client; + +#ifdef __cplusplus +extern "C" { +#endif + +/* Create a handle for a new debuginfod-client session. */ +debuginfod_client *debuginfod_begin (void); + +/* Query the urls contained in $DEBUGINFOD_URLS for a file with + the specified type and build id. If build_id_len == 0, the + build_id is supplied as a lowercase hexadecimal string; otherwise + it is a binary blob of given length. + + If successful, return a file descriptor to the target, otherwise + return a posix error code. If successful, set *path to a + strdup'd copy of the name of the same file in the cache. + Caller must free() it later. */ + +int debuginfod_find_debuginfo (debuginfod_client *client, + const unsigned char *build_id, + int build_id_len, + char **path); + +int debuginfod_find_executable (debuginfod_client *client, + const unsigned char *build_id, + int build_id_len, + char **path); + +int debuginfod_find_source (debuginfod_client *client, + const unsigned char *build_id, + int build_id_len, + const char *filename, + char **path); + +int debuginfod_find_section (debuginfod_client *client, + const unsigned char *build_id, + int build_id_len, + const char *section, + char **path); + +typedef int (*debuginfod_progressfn_t)(debuginfod_client *c, long a, long b); +void debuginfod_set_progressfn(debuginfod_client *c, + debuginfod_progressfn_t fn); + +void debuginfod_set_verbose_fd(debuginfod_client *c, int fd); + +/* Set the user parameter. */ +void debuginfod_set_user_data (debuginfod_client *client, void *value); + +/* Get the user parameter. */ +void* debuginfod_get_user_data (debuginfod_client *client); + +/* Get the current or last active URL, if known. */ +const char* debuginfod_get_url (debuginfod_client *client); + +/* Returns set of x-debuginfod* header lines received from current or + last active transfer, \n separated, if known. */ +const char* debuginfod_get_headers(debuginfod_client *client); + +/* Add an outgoing HTTP request "Header: Value". Copies string. */ +int debuginfod_add_http_header (debuginfod_client *client, const char* header); + +/* Release debuginfod client connection context handle. */ +void debuginfod_end (debuginfod_client *client); + +#ifdef __cplusplus +} +#endif + + +#endif /* _DEBUGINFOD_CLIENT_H */ diff --git a/src/vendor/libbacktrace/install_arm64/include/libbacktrace/internal.h b/src/vendor/libbacktrace/install_arm64/include/libbacktrace/internal.h new file mode 100644 index 0000000000..fdadc24ec0 --- /dev/null +++ b/src/vendor/libbacktrace/install_arm64/include/libbacktrace/internal.h @@ -0,0 +1,467 @@ +/* internal.h -- Internal header file for stack backtrace library. + Copyright (C) 2012-2024 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef BACKTRACE_INTERNAL_H +#define BACKTRACE_INTERNAL_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* We assume that and "backtrace.h" have already been + included. */ + +#ifndef GCC_VERSION +# define GCC_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__) +#endif + +#if (GCC_VERSION < 2007) +# define __attribute__(x) +#endif + +#ifndef ATTRIBUTE_UNUSED +# define ATTRIBUTE_UNUSED __attribute__ ((__unused__)) +#endif + +#ifndef ATTRIBUTE_MALLOC +# if (GCC_VERSION >= 2096) +# define ATTRIBUTE_MALLOC __attribute__ ((__malloc__)) +# else +# define ATTRIBUTE_MALLOC +# endif +#endif + +#ifdef __has_attribute +# if __has_attribute(fallthrough) +# define ATTRIBUTE_FALLTHROUGH __attribute__ ((fallthrough)) +# endif +#endif +#ifndef ATTRIBUTE_FALLTHROUGH +# if (GCC_VERSION >= 7000) +# define ATTRIBUTE_FALLTHROUGH __attribute__ ((__fallthrough__)) +# else +# define ATTRIBUTE_FALLTHROUGH +# endif +#endif + +#ifndef HAVE_SYNC_FUNCTIONS + +/* Define out the sync functions. These should never be called if + they are not available. */ + +#define __sync_bool_compare_and_swap(A, B, C) (abort(), 1) +#define __sync_lock_test_and_set(A, B) (abort(), 0) +#define __sync_lock_release(A) abort() + +#endif /* !defined (HAVE_SYNC_FUNCTIONS) */ + +#ifdef HAVE_ATOMIC_FUNCTIONS + +/* We have the atomic builtin functions. */ + +#define backtrace_atomic_load_pointer(p) \ + __atomic_load_n ((p), __ATOMIC_ACQUIRE) +#define backtrace_atomic_load_int(p) \ + __atomic_load_n ((p), __ATOMIC_ACQUIRE) +#define backtrace_atomic_store_pointer(p, v) \ + __atomic_store_n ((p), (v), __ATOMIC_RELEASE) +#define backtrace_atomic_store_size_t(p, v) \ + __atomic_store_n ((p), (v), __ATOMIC_RELEASE) +#define backtrace_atomic_store_int(p, v) \ + __atomic_store_n ((p), (v), __ATOMIC_RELEASE) + +#else /* !defined (HAVE_ATOMIC_FUNCTIONS) */ +#ifdef HAVE_SYNC_FUNCTIONS + +/* We have the sync functions but not the atomic functions. Define + the atomic ones in terms of the sync ones. */ + +extern void *backtrace_atomic_load_pointer (void *); +extern int backtrace_atomic_load_int (int *); +extern void backtrace_atomic_store_pointer (void *, void *); +extern void backtrace_atomic_store_size_t (size_t *, size_t); +extern void backtrace_atomic_store_int (int *, int); + +#else /* !defined (HAVE_SYNC_FUNCTIONS) */ + +/* We have neither the sync nor the atomic functions. These will + never be called. */ + +#define backtrace_atomic_load_pointer(p) (abort(), (void *) NULL) +#define backtrace_atomic_load_int(p) (abort(), 0) +#define backtrace_atomic_store_pointer(p, v) abort() +#define backtrace_atomic_store_size_t(p, v) abort() +#define backtrace_atomic_store_int(p, v) abort() + +#endif /* !defined (HAVE_SYNC_FUNCTIONS) */ +#endif /* !defined (HAVE_ATOMIC_FUNCTIONS) */ + +/* The type of the function that collects file/line information. This + is like backtrace_pcinfo. */ + +typedef int (*fileline) (struct backtrace_state *state, uintptr_t pc, + backtrace_full_callback callback, + backtrace_error_callback error_callback, void *data); + +/* The type of the function that collects symbol information. This is + like backtrace_syminfo. */ + +typedef void (*syminfo) (struct backtrace_state *state, uintptr_t pc, + backtrace_syminfo_callback callback, + backtrace_error_callback error_callback, void *data); + +/* What the backtrace state pointer points to. */ + +struct backtrace_state +{ + /* The name of the executable. */ + const char *filename; + /* Non-zero if threaded. */ + int threaded; + /* The master lock for fileline_fn, fileline_data, syminfo_fn, + syminfo_data, fileline_initialization_failed and everything the + data pointers point to. */ + void *lock; + /* The function that returns file/line information. */ + fileline fileline_fn; + /* The data to pass to FILELINE_FN. */ + void *fileline_data; + /* The function that returns symbol information. */ + syminfo syminfo_fn; + /* The data to pass to SYMINFO_FN. */ + void *syminfo_data; + /* Whether initializing the file/line information failed. */ + int fileline_initialization_failed; + /* The lock for the freelist. */ + int lock_alloc; + /* The freelist when using mmap. */ + struct backtrace_freelist_struct *freelist; +}; + +/* Open a file for reading. Returns -1 on error. If DOES_NOT_EXIST + is not NULL, *DOES_NOT_EXIST will be set to 0 normally and set to 1 + if the file does not exist. If the file does not exist and + DOES_NOT_EXIST is not NULL, the function will return -1 and will + not call ERROR_CALLBACK. On other errors, or if DOES_NOT_EXIST is + NULL, the function will call ERROR_CALLBACK before returning. */ +extern int backtrace_open (const char *filename, + backtrace_error_callback error_callback, + void *data, + int *does_not_exist); + +/* A view of the contents of a file. This supports mmap when + available. A view will remain in memory even after backtrace_close + is called on the file descriptor from which the view was + obtained. */ + +struct backtrace_view +{ + /* The data that the caller requested. */ + const void *data; + /* The base of the view. */ + void *base; + /* The total length of the view. */ + size_t len; +}; + +/* Create a view of SIZE bytes from DESCRIPTOR at OFFSET. Store the + result in *VIEW. Returns 1 on success, 0 on error. */ +extern int backtrace_get_view (struct backtrace_state *state, int descriptor, + off_t offset, uint64_t size, + backtrace_error_callback error_callback, + void *data, struct backtrace_view *view); + +/* Release a view created by backtrace_get_view. */ +extern void backtrace_release_view (struct backtrace_state *state, + struct backtrace_view *view, + backtrace_error_callback error_callback, + void *data); + +/* Close a file opened by backtrace_open. Returns 1 on success, 0 on + error. */ + +extern int backtrace_close (int descriptor, + backtrace_error_callback error_callback, + void *data); + +/* Sort without using memory. */ + +extern void backtrace_qsort (void *base, size_t count, size_t size, + int (*compar) (const void *, const void *)); + +/* Allocate memory. This is like malloc. If ERROR_CALLBACK is NULL, + this does not report an error, it just returns NULL. */ + +extern void *backtrace_alloc (struct backtrace_state *state, size_t size, + backtrace_error_callback error_callback, + void *data) ATTRIBUTE_MALLOC; + +/* Free memory allocated by backtrace_alloc. If ERROR_CALLBACK is + NULL, this does not report an error. */ + +extern void backtrace_free (struct backtrace_state *state, void *mem, + size_t size, + backtrace_error_callback error_callback, + void *data); + +/* A growable vector of some struct. This is used for more efficient + allocation when we don't know the final size of some group of data + that we want to represent as an array. */ + +struct backtrace_vector +{ + /* The base of the vector. */ + void *base; + /* The number of bytes in the vector. */ + size_t size; + /* The number of bytes available at the current allocation. */ + size_t alc; +}; + +/* Grow VEC by SIZE bytes. Return a pointer to the newly allocated + bytes. Note that this may move the entire vector to a new memory + location. Returns NULL on failure. */ + +extern void *backtrace_vector_grow (struct backtrace_state *state, size_t size, + backtrace_error_callback error_callback, + void *data, + struct backtrace_vector *vec); + +/* Finish the current allocation on VEC. Prepare to start a new + allocation. The finished allocation will never be freed. Returns + a pointer to the base of the finished entries, or NULL on + failure. */ + +extern void* backtrace_vector_finish (struct backtrace_state *state, + struct backtrace_vector *vec, + backtrace_error_callback error_callback, + void *data); + +/* Release any extra space allocated for VEC. This may change + VEC->base. Returns 1 on success, 0 on failure. */ + +extern int backtrace_vector_release (struct backtrace_state *state, + struct backtrace_vector *vec, + backtrace_error_callback error_callback, + void *data); + +/* Free the space managed by VEC. This will reset VEC. */ + +static inline void +backtrace_vector_free (struct backtrace_state *state, + struct backtrace_vector *vec, + backtrace_error_callback error_callback, void *data) +{ + vec->alc += vec->size; + vec->size = 0; + backtrace_vector_release (state, vec, error_callback, data); +} + +/* Read initial debug data from a descriptor, and set the + fileline_data, syminfo_fn, and syminfo_data fields of STATE. + Return the fileln_fn field in *FILELN_FN--this is done this way so + that the synchronization code is only implemented once. This is + called after the descriptor has first been opened. It will close + the descriptor if it is no longer needed. Returns 1 on success, 0 + on error. There will be multiple implementations of this function, + for different file formats. Each system will compile the + appropriate one. */ + +extern int backtrace_initialize (struct backtrace_state *state, + const char *filename, + int descriptor, + backtrace_error_callback error_callback, + void *data, + fileline *fileline_fn); + +/* An enum for the DWARF sections we care about. */ + +enum dwarf_section +{ + DEBUG_INFO, + DEBUG_LINE, + DEBUG_ABBREV, + DEBUG_RANGES, + DEBUG_STR, + DEBUG_ADDR, + DEBUG_STR_OFFSETS, + DEBUG_LINE_STR, + DEBUG_RNGLISTS, + + DEBUG_MAX +}; + +/* Data for the DWARF sections we care about. */ + +struct dwarf_sections +{ + const unsigned char *data[DEBUG_MAX]; + size_t size[DEBUG_MAX]; +}; + +/* DWARF data read from a file, used for .gnu_debugaltlink. */ + +struct dwarf_data; + +/* The load address mapping. */ + +#if defined(__FDPIC__) && defined(HAVE_DL_ITERATE_PHDR) && (defined(HAVE_LINK_H) || defined(HAVE_SYS_LINK_H)) + +#ifdef HAVE_LINK_H + #include +#endif +#ifdef HAVE_SYS_LINK_H + #include +#endif + +#define libbacktrace_using_fdpic() (1) + +struct libbacktrace_base_address +{ + struct elf32_fdpic_loadaddr m; +}; + +#define libbacktrace_add_base(pc, base) \ + ((uintptr_t) (__RELOC_POINTER ((pc), (base).m))) + +#else /* not _FDPIC__ */ + +#define libbacktrace_using_fdpic() (0) + +struct libbacktrace_base_address +{ + uintptr_t m; +}; + +#define libbacktrace_add_base(pc, base) ((pc) + (base).m) + +#endif /* not _FDPIC__ */ + +/* Add file/line information for a DWARF module. */ + +extern int backtrace_dwarf_add (struct backtrace_state *state, + struct libbacktrace_base_address base_address, + const struct dwarf_sections *dwarf_sections, + int is_bigendian, + struct dwarf_data *fileline_altlink, + backtrace_error_callback error_callback, + void *data, fileline *fileline_fn, + struct dwarf_data **fileline_entry); + +/* A data structure to pass to backtrace_syminfo_to_full. */ + +struct backtrace_call_full +{ + backtrace_full_callback full_callback; + backtrace_error_callback full_error_callback; + void *full_data; + int ret; +}; + +/* A backtrace_syminfo_callback that can call into a + backtrace_full_callback, used when we have a symbol table but no + debug info. */ + +extern void backtrace_syminfo_to_full_callback (void *data, uintptr_t pc, + const char *symname, + uintptr_t symval, + uintptr_t symsize); + +/* An error callback that corresponds to + backtrace_syminfo_to_full_callback. */ + +extern void backtrace_syminfo_to_full_error_callback (void *, const char *, + int); + +/* A test-only hook for elf_uncompress_zdebug. */ + +extern int backtrace_uncompress_zdebug (struct backtrace_state *, + const unsigned char *compressed, + size_t compressed_size, + backtrace_error_callback, void *data, + unsigned char **uncompressed, + size_t *uncompressed_size); + +/* A test-only hook for elf_zstd_decompress. */ + +extern int backtrace_uncompress_zstd (struct backtrace_state *, + const unsigned char *compressed, + size_t compressed_size, + backtrace_error_callback, void *data, + unsigned char *uncompressed, + size_t uncompressed_size); + +/* A test-only hook for elf_uncompress_lzma. */ + +extern int backtrace_uncompress_lzma (struct backtrace_state *, + const unsigned char *compressed, + size_t compressed_size, + backtrace_error_callback, void *data, + unsigned char **uncompressed, + size_t *uncompressed_size); + +struct elf_ppc64_opd_data; +extern int elf_add (struct backtrace_state *state, const char *filename, int descriptor, + const unsigned char *memory, size_t memory_size, + struct libbacktrace_base_address base_address, + struct elf_ppc64_opd_data *caller_opd, + backtrace_error_callback error_callback, void *data, + fileline *fileline_fn, int *found_sym, int *found_dwarf, + struct dwarf_data **fileline_entry, int exe, int debuginfo, + const char *with_buildid_data, uint32_t with_buildid_size); +extern void elf_syminfo (struct backtrace_state *state, uintptr_t addr, + backtrace_syminfo_callback callback, + backtrace_error_callback error_callback ATTRIBUTE_UNUSED, + void *data); +extern void elf_nosyms (struct backtrace_state *state ATTRIBUTE_UNUSED, + uintptr_t addr ATTRIBUTE_UNUSED, + backtrace_syminfo_callback callback ATTRIBUTE_UNUSED, + backtrace_error_callback error_callback, void *data); + +extern int macho_add (struct backtrace_state *state, const char *filename, int descriptor, + off_t offset, const unsigned char *match_uuid, + struct libbacktrace_base_address base_address, int skip_symtab, + backtrace_error_callback error_callback, void *data, + fileline *fileline_fn, int *found_sym); +extern void macho_syminfo (struct backtrace_state *state, uintptr_t addr, + backtrace_syminfo_callback callback, + backtrace_error_callback error_callback ATTRIBUTE_UNUSED, + void *data); +extern void macho_nosyms (struct backtrace_state *state ATTRIBUTE_UNUSED, + uintptr_t addr ATTRIBUTE_UNUSED, + backtrace_syminfo_callback callback ATTRIBUTE_UNUSED, + backtrace_error_callback error_callback, void *data); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tests/integration/ghost_stack_test_extension/ghost_stack_test.cpp b/tests/integration/ghost_stack_test_extension/ghost_stack_test.cpp new file mode 100644 index 0000000000..96f7a07dbc --- /dev/null +++ b/tests/integration/ghost_stack_test_extension/ghost_stack_test.cpp @@ -0,0 +1,225 @@ +#define PY_SSIZE_T_CLEAN +#include + +#include +#include +#include + +#pragma GCC push_options +#pragma GCC optimize("O0") + +// ============================================================================ +// Exception Test Helpers +// ============================================================================ + +static int destructor_count = 0; +static std::vector cleanup_order; + +struct RAIIGuard +{ + RAIIGuard() + { + destructor_count = 0; + } + ~RAIIGuard() + { + destructor_count++; + } +}; + +struct OrderedGuard +{ + int id; + OrderedGuard(int i) + : id(i) + { + cleanup_order.push_back(id * 10); + } // construct + ~OrderedGuard() + { + cleanup_order.push_back(id); + } // destruct +}; + +// Callback to Python function that calls ghost_stack_backtrace +static PyObject* capture_callback = nullptr; + +__attribute__((noinline)) static void +call_capture_callback() +{ + if (capture_callback) { + PyObject* result = PyObject_CallObject(capture_callback, nullptr); + Py_XDECREF(result); + } +} + +__attribute__((noinline)) static void +throw_with_trace() +{ + call_capture_callback(); + throw std::runtime_error("test exception"); +} + +__attribute__((noinline)) static void +raii_throw() +{ + RAIIGuard guard; + call_capture_callback(); + throw std::runtime_error("raii test"); +} + +__attribute__((noinline)) static void +multi_raii_throw() +{ + OrderedGuard g1(1); + call_capture_callback(); + OrderedGuard g2(2); + call_capture_callback(); + OrderedGuard g3(3); + throw std::runtime_error("multi raii"); +} + +__attribute__((noinline)) static std::string +nested_try_catch() +{ + try { + call_capture_callback(); + try { + call_capture_callback(); + throw std::runtime_error("inner"); + } catch (const std::runtime_error&) { + call_capture_callback(); + throw std::runtime_error("outer"); + } + } catch (const std::runtime_error& e) { + return e.what(); + } + return ""; +} + +// ============================================================================ +// Python-exposed test functions +// ============================================================================ + +static PyObject* +set_capture_callback(PyObject* self, PyObject* args) +{ + PyObject* callback; + if (!PyArg_ParseTuple(args, "O", &callback)) return nullptr; + Py_XDECREF(capture_callback); + capture_callback = callback; + Py_INCREF(capture_callback); + Py_RETURN_NONE; +} + +static PyObject* +test_basic_exception(PyObject* self, PyObject* args) +{ + try { + throw_with_trace(); + Py_RETURN_FALSE; // Should not reach here + } catch (const std::runtime_error& e) { + if (std::string(e.what()) == "test exception") { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; + } +} + +static PyObject* +test_raii_cleanup(PyObject* self, PyObject* args) +{ + destructor_count = 0; + try { + raii_throw(); + } catch (const std::runtime_error&) { + // Expected + } + return PyLong_FromLong(destructor_count); +} + +static PyObject* +test_raii_cleanup_order(PyObject* self, PyObject* args) +{ + cleanup_order.clear(); + try { + multi_raii_throw(); + } catch (const std::runtime_error&) { + // Expected + } + // Return cleanup_order as a list + PyObject* result = PyList_New(cleanup_order.size()); + for (size_t i = 0; i < cleanup_order.size(); i++) { + PyList_SET_ITEM(result, i, PyLong_FromLong(cleanup_order[i])); + } + return result; +} + +static PyObject* +test_nested_try_catch(PyObject* self, PyObject* args) +{ + std::string result = nested_try_catch(); + return PyUnicode_FromString(result.c_str()); +} + +static PyObject* +test_different_exception_types(PyObject* self, PyObject* args) +{ + // Test int exception + try { + call_capture_callback(); + throw 42; + } catch (int e) { + if (e != 42) Py_RETURN_FALSE; + } + + // Test const char* exception + try { + call_capture_callback(); + throw "test string"; + } catch (const char* e) { + if (std::string(e) != "test string") Py_RETURN_FALSE; + } + + // Test std::string exception + try { + call_capture_callback(); + throw std::string("string exception"); + } catch (const std::string& e) { + if (e != "string exception") Py_RETURN_FALSE; + } + + Py_RETURN_TRUE; +} + +#pragma GCC pop_options + +static PyMethodDef methods[] = { + {"set_capture_callback", + set_capture_callback, + METH_VARARGS, + "Set callback for ghost_stack capture"}, + {"test_basic_exception", + test_basic_exception, + METH_NOARGS, + "Test basic exception through ghost_stack"}, + {"test_raii_cleanup", test_raii_cleanup, METH_NOARGS, "Test RAII cleanup during unwinding"}, + {"test_raii_cleanup_order", + test_raii_cleanup_order, + METH_NOARGS, + "Test RAII cleanup order (LIFO)"}, + {"test_nested_try_catch", test_nested_try_catch, METH_NOARGS, "Test nested try/catch"}, + {"test_different_exception_types", + test_different_exception_types, + METH_NOARGS, + "Test different exception types"}, + {nullptr, nullptr, 0, nullptr}, +}; + +static struct PyModuleDef moduledef = {PyModuleDef_HEAD_INIT, "ghost_stack_test", "", -1, methods}; + +PyMODINIT_FUNC +PyInit_ghost_stack_test(void) +{ + return PyModule_Create(&moduledef); +} diff --git a/tests/integration/ghost_stack_test_extension/setup.py b/tests/integration/ghost_stack_test_extension/setup.py new file mode 100644 index 0000000000..941a2fe457 --- /dev/null +++ b/tests/integration/ghost_stack_test_extension/setup.py @@ -0,0 +1,15 @@ +from distutils.core import Extension +from distutils.core import setup + +setup( + name="ghost_stack_test", + ext_modules=[ + Extension( + "ghost_stack_test", + language="c++", + sources=["ghost_stack_test.cpp"], + extra_compile_args=["-O0", "-g3", "-fno-omit-frame-pointer"], + ), + ], + zip_safe=False, +) diff --git a/tests/integration/test_ghost_stack.py b/tests/integration/test_ghost_stack.py new file mode 100644 index 0000000000..e35a787098 --- /dev/null +++ b/tests/integration/test_ghost_stack.py @@ -0,0 +1,212 @@ +"""Tests for ghost_stack functionality. + +These tests verify that ghost_stack (fast unwinding) works correctly: +1. C++ exceptions propagate correctly through patched frames +2. Ghost_stack frames exactly match libunwind frames +""" + +import shutil +import subprocess +import sys +from pathlib import Path + +import pytest + +from memray._test_utils import GhostStackTestContext +from memray._test_utils import has_ghost_stack_support + +HERE = Path(__file__).parent +TEST_GHOST_STACK_EXTENSION = HERE / "ghost_stack_test_extension" + +pytestmark = pytest.mark.skipif( + not has_ghost_stack_support(), + reason="ghost_stack not available on this platform", +) + + +@pytest.fixture +def ghost_stack_extension(tmpdir, monkeypatch): + """Compile and import the ghost_stack test extension.""" + extension_path = tmpdir / "ghost_stack_test_extension" + shutil.copytree(TEST_GHOST_STACK_EXTENSION, extension_path) + subprocess.run( + [sys.executable, str(extension_path / "setup.py"), "build_ext", "--inplace"], + check=True, + cwd=extension_path, + capture_output=True, + ) + with monkeypatch.context() as ctx: + ctx.setattr(sys, "path", [*sys.path, str(extension_path)]) + import ghost_stack_test + + yield ghost_stack_test + + +class TestGhostStackExceptions: + """Test C++ exception safety through ghost_stack trampolines.""" + + def test_basic_exception(self, ghost_stack_extension): + """Verify std::runtime_error works through ghost_stack frames.""" + with GhostStackTestContext() as ctx: + ghost_stack_extension.set_capture_callback(ctx.backtrace) + assert ghost_stack_extension.test_basic_exception() is True + + def test_raii_cleanup(self, ghost_stack_extension): + """Verify RAII destructors are called during exception unwinding.""" + with GhostStackTestContext() as ctx: + ghost_stack_extension.set_capture_callback(ctx.backtrace) + destructor_count = ghost_stack_extension.test_raii_cleanup() + assert destructor_count == 1, "destructor should be called during unwinding" + + def test_raii_cleanup_order(self, ghost_stack_extension): + """Verify LIFO destructor order (3 guards).""" + with GhostStackTestContext() as ctx: + ghost_stack_extension.set_capture_callback(ctx.backtrace) + cleanup_order = ghost_stack_extension.test_raii_cleanup_order() + # Expected: [10, 20, 30, 3, 2, 1] = construct g1, g2, g3, then destruct g3, g2, g1 + assert cleanup_order == [10, 20, 30, 3, 2, 1] + + def test_nested_try_catch(self, ghost_stack_extension): + """Verify nested exception handling.""" + with GhostStackTestContext() as ctx: + ghost_stack_extension.set_capture_callback(ctx.backtrace) + result = ghost_stack_extension.test_nested_try_catch() + assert result == "outer" + + def test_different_exception_types(self, ghost_stack_extension): + """Verify int, const char*, std::string exceptions work.""" + with GhostStackTestContext() as ctx: + ghost_stack_extension.set_capture_callback(ctx.backtrace) + assert ghost_stack_extension.test_different_exception_types() is True + + +class TestGhostStackEquivalence: + """Test that ghost_stack frames exactly match libunwind frames.""" + + def _capture_frames_at_depth(self, ctx, depth=0): + """Capture ghost_stack and libunwind frames at given recursion depth.""" + ctx.reset() + + if depth > 0: + return self._capture_frames_at_depth(ctx, depth - 1) + + # Capture libunwind first (before ghost_stack patches return addresses) + libunwind_frames = ctx.libunwind_backtrace() + + # Now capture ghost_stack + ghost_frames = ctx.backtrace() + + ctx.reset() + return ghost_frames, libunwind_frames + + def _find_common_start(self, ghost_frames, libunwind_frames, max_skip=3): + """Find indices where frames start matching (max skip of 3 frames each).""" + libunwind_set = set(libunwind_frames[: max_skip + 1]) + for gi in range(min(max_skip + 1, len(ghost_frames))): + gf = ghost_frames[gi] + if gf in libunwind_set: + li = libunwind_frames.index(gf) + if li <= max_skip: + return gi, li + return None, None + + def test_frames_match_shallow(self): + """Verify ghost_stack frame IPs match libunwind frame IPs.""" + with GhostStackTestContext() as ctx: + ghost_frames, libunwind_frames = self._capture_frames_at_depth(ctx, depth=0) + + assert len(ghost_frames) > 0, "ghost_stack should capture frames" + assert len(libunwind_frames) > 0, "libunwind should capture frames" + + # Find where frames start matching (skip at most 3 capture internals) + gi, li = self._find_common_start(ghost_frames, libunwind_frames) + assert gi is not None, ( + f"should find common frames within first 3\n" + f"ghost: {[hex(f) for f in ghost_frames]}\n" + f"libunwind: {[hex(f) for f in libunwind_frames]}" + ) + + ghost_tail = ghost_frames[gi:] + libunwind_tail = libunwind_frames[li:] + + # Allow up to 1/3 of frames to differ at the end (system frames) + max_diff = max(1, len(ghost_tail) // 3) + common_len = min(len(ghost_tail), len(libunwind_tail)) + compare_len = max(1, common_len - max_diff) + + assert ghost_tail[:compare_len] == libunwind_tail[:compare_len], ( + f"frame IPs must match from common start (comparing first {compare_len} frames)\n" + f"ghost[{gi}:]: {[hex(f) for f in ghost_tail]}\n" + f"libunwind[{li}:]: {[hex(f) for f in libunwind_tail]}" + ) + + def test_frames_match_deep(self): + """Verify frame matching at recursion depth 10.""" + with GhostStackTestContext() as ctx: + ghost_frames, libunwind_frames = self._capture_frames_at_depth( + ctx, depth=10 + ) + + assert len(ghost_frames) >= 10, "should capture at least 10 frames" + assert ( + len(libunwind_frames) >= 10 + ), "libunwind should capture at least 10 frames" + + # Find where frames start matching (skip at most 3 capture internals) + gi, li = self._find_common_start(ghost_frames, libunwind_frames) + assert gi is not None, ( + f"should find common frames within first 3\n" + f"ghost: {[hex(f) for f in ghost_frames]}\n" + f"libunwind: {[hex(f) for f in libunwind_frames]}" + ) + + ghost_tail = ghost_frames[gi:] + libunwind_tail = libunwind_frames[li:] + + # Allow up to 1/3 of frames to differ at the end (system frames) + max_diff = max(1, len(ghost_tail) // 3) + common_len = min(len(ghost_tail), len(libunwind_tail)) + compare_len = max(1, common_len - max_diff) + + assert ghost_tail[:compare_len] == libunwind_tail[:compare_len], ( + f"frame IPs must match from common start (comparing first {compare_len} frames)\n" + f"ghost[{gi}:]: {[hex(f) for f in ghost_tail]}\n" + f"libunwind[{li}:]: {[hex(f) for f in libunwind_tail]}" + ) + + +class TestGhostStackThreadSafety: + """Test thread safety of ghost_stack.""" + + def test_rapid_reset(self): + """Verify rapid reset/capture cycles work.""" + with GhostStackTestContext() as ctx: + for _ in range(1000): + frames = ctx.backtrace() + assert len(frames) > 0, "should capture frames" + ctx.reset() + + def test_multiple_threads(self): + """Verify ghost_stack works correctly across multiple threads.""" + import threading + + errors = [] + + def thread_func(): + try: + with GhostStackTestContext() as ctx: + for _ in range(100): + frames = ctx.backtrace() + if len(frames) == 0: + errors.append("No frames captured") + ctx.reset() + except Exception as e: + errors.append(str(e)) + + threads = [threading.Thread(target=thread_func) for _ in range(4)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert not errors, f"Thread errors: {errors}" diff --git a/tests/integration/test_native_tracking.py b/tests/integration/test_native_tracking.py index db60beefea..fdbb564771 100644 --- a/tests/integration/test_native_tracking.py +++ b/tests/integration/test_native_tracking.py @@ -12,6 +12,7 @@ from memray import AllocatorType from memray import FileReader from memray import Tracker +from memray._memray import has_fast_unwind_support from memray._test import MemoryAllocator from tests.utils import filter_relevant_allocations @@ -20,7 +21,20 @@ TEST_NATIVE_EXTENSION = HERE / "native_extension" -def test_multithreaded_extension_with_native_tracking(tmpdir, monkeypatch): +# Dynamic parametrization based on platform fast unwind support +def _get_fast_unwind_params(): + """Returns parametrization values for fast_unwind based on platform support.""" + if has_fast_unwind_support(): + return [False, True] + else: + return [False] + + +fast_unwind_params = _get_fast_unwind_params() + + +@pytest.mark.parametrize("fast_unwind", fast_unwind_params) +def test_multithreaded_extension_with_native_tracking(tmpdir, monkeypatch, fast_unwind): """Test tracking allocations in a native extension which spawns multiple threads, each thread allocating and freeing memory.""" # GIVEN @@ -40,7 +54,7 @@ def test_multithreaded_extension_with_native_tracking(tmpdir, monkeypatch): ctx.setattr(sys, "path", [*sys.path, str(extension_path)]) from testext import run # type: ignore - with Tracker(output, native_traces=True): + with Tracker(output, native_traces=True, fast_unwind=fast_unwind): run() # THEN @@ -75,7 +89,8 @@ def test_multithreaded_extension_with_native_tracking(tmpdir, monkeypatch): @pytest.mark.valgrind -def test_simple_call_chain_with_native_tracking(tmpdir, monkeypatch): +@pytest.mark.parametrize("fast_unwind", fast_unwind_params) +def test_simple_call_chain_with_native_tracking(tmpdir, monkeypatch, fast_unwind): # GIVEN output = Path(tmpdir) / "test.bin" extension_name = "multithreaded_extension" @@ -93,7 +108,7 @@ def test_simple_call_chain_with_native_tracking(tmpdir, monkeypatch): ctx.setattr(sys, "path", [*sys.path, str(extension_path)]) from native_ext import run_simple # type: ignore - with Tracker(output, native_traces=True): + with Tracker(output, native_traces=True, fast_unwind=fast_unwind): run_simple() # THEN @@ -120,7 +135,8 @@ def test_simple_call_chain_with_native_tracking(tmpdir, monkeypatch): sys.platform == "darwin", reason="we cannot use debug information to resolve inline functions on macOS", ) -def test_inlined_call_chain_with_native_tracking(tmpdir, monkeypatch): +@pytest.mark.parametrize("fast_unwind", fast_unwind_params) +def test_inlined_call_chain_with_native_tracking(tmpdir, monkeypatch, fast_unwind): # GIVEN output = Path(tmpdir) / "test.bin" extension_name = "multithreaded_extension" @@ -138,7 +154,7 @@ def test_inlined_call_chain_with_native_tracking(tmpdir, monkeypatch): ctx.setattr(sys, "path", [*sys.path, str(extension_path)]) from native_ext import run_inline - with Tracker(output, native_traces=True): + with Tracker(output, native_traces=True, fast_unwind=fast_unwind): run_inline() # THEN @@ -162,7 +178,8 @@ def test_inlined_call_chain_with_native_tracking(tmpdir, monkeypatch): @pytest.mark.valgrind -def test_deep_call_chain_with_native_tracking(tmpdir, monkeypatch): +@pytest.mark.parametrize("fast_unwind", fast_unwind_params) +def test_deep_call_chain_with_native_tracking(tmpdir, monkeypatch, fast_unwind): # GIVEN output = Path(tmpdir) / "test.bin" extension_name = "multithreaded_extension" @@ -180,7 +197,7 @@ def test_deep_call_chain_with_native_tracking(tmpdir, monkeypatch): ctx.setattr(sys, "path", [*sys.path, str(extension_path)]) from native_ext import run_deep - with Tracker(output, native_traces=True): + with Tracker(output, native_traces=True, fast_unwind=fast_unwind): run_deep(2048) # THEN @@ -206,7 +223,8 @@ def test_deep_call_chain_with_native_tracking(tmpdir, monkeypatch): assert all("deep_call" in stack[0] for stack in native_stack[3 : 3 + 2048]) -def test_hybrid_stack_in_pure_python(tmpdir): +@pytest.mark.parametrize("fast_unwind", fast_unwind_params) +def test_hybrid_stack_in_pure_python(tmpdir, fast_unwind): # GIVEN allocator = MemoryAllocator() output = Path(tmpdir) / "test.bin" @@ -219,7 +237,7 @@ def recursive_func(n): # WHEN - with Tracker(output, native_traces=True): + with Tracker(output, native_traces=True, fast_unwind=fast_unwind): recursive_func(MAX_RECURSIONS) # THEN @@ -254,7 +272,8 @@ def recursive_func(n): assert hybrid_stack[-1] == "test_hybrid_stack_in_pure_python" -def test_hybrid_stack_in_pure_python_with_callbacks(tmpdir): +@pytest.mark.parametrize("fast_unwind", fast_unwind_params) +def test_hybrid_stack_in_pure_python_with_callbacks(tmpdir, fast_unwind): # GIVEN allocator = MemoryAllocator() output = Path(tmpdir) / "test.bin" @@ -278,7 +297,7 @@ def baz(): # WHEN - with Tracker(output, native_traces=True): + with Tracker(output, native_traces=True, fast_unwind=fast_unwind): ham() # THEN @@ -314,7 +333,8 @@ def baz(): assert [frame[0] for frame in valloc.stack_trace()].count("valloc") == 1 -def test_hybrid_stack_of_allocations_inside_ceval(tmpdir): +@pytest.mark.parametrize("fast_unwind", fast_unwind_params) +def test_hybrid_stack_of_allocations_inside_ceval(tmpdir, fast_unwind): # GIVEN output = Path(tmpdir) / "test.bin" @@ -330,7 +350,7 @@ def test_hybrid_stack_of_allocations_inside_ceval(tmpdir): # WHEN program = textwrap.dedent( - """ + f""" import functools import sys @@ -352,7 +372,7 @@ def baz(): pass - with memray.Tracker(sys.argv[1], native_traces=True): + with memray.Tracker(sys.argv[1], native_traces=True, fast_unwind={fast_unwind}): functools.partial(foo)() """ ) @@ -372,10 +392,12 @@ def baz(): for record in records: try: stack = [frame[0] for frame in record.hybrid_stack_trace()] + native_stack = [frame[0] for frame in record.native_stack_trace()] except NotImplementedError: continue # Must be a free; we don't have its stack. print(stack) + print(native_stack) # This function never allocates anything, so we should never see it. assert "baz" not in stack @@ -388,7 +410,8 @@ def baz(): assert found_an_interesting_stack -def test_hybrid_stack_in_recursive_python_c_call(tmpdir, monkeypatch): +@pytest.mark.parametrize("fast_unwind", fast_unwind_params) +def test_hybrid_stack_in_recursive_python_c_call(tmpdir, monkeypatch, fast_unwind): # GIVEN output = Path(tmpdir) / "test.bin" extension_name = "multithreaded_extension" @@ -411,7 +434,7 @@ def test_hybrid_stack_in_recursive_python_c_call(tmpdir, monkeypatch): def callback(n): return run_recursive(n, callback) - with Tracker(output, native_traces=True): + with Tracker(output, native_traces=True, fast_unwind=fast_unwind): run_recursive(MAX_RECURSIONS, callback) # THEN @@ -445,7 +468,8 @@ def callback(n): assert hybrid_stack[-1] == "test_hybrid_stack_in_recursive_python_c_call" -def test_hybrid_stack_in_a_thread(tmpdir, monkeypatch): +@pytest.mark.parametrize("fast_unwind", fast_unwind_params) +def test_hybrid_stack_in_a_thread(tmpdir, monkeypatch, fast_unwind): # GIVEN output = Path(tmpdir) / "test.bin" extension_name = "multithreaded_extension" @@ -463,7 +487,7 @@ def test_hybrid_stack_in_a_thread(tmpdir, monkeypatch): ctx.setattr(sys, "path", [*sys.path, str(extension_path)]) from native_ext import run_in_thread - with Tracker(output, native_traces=True): + with Tracker(output, native_traces=True, fast_unwind=fast_unwind): run_in_thread() # THEN @@ -482,7 +506,8 @@ def test_hybrid_stack_in_a_thread(tmpdir, monkeypatch): assert expected_symbols == [stack[0] for stack in valloc.hybrid_stack_trace()][:3] -def test_hybrid_stack_of_python_thread_starts_with_native_frames(tmp_path): +@pytest.mark.parametrize("fast_unwind", fast_unwind_params) +def test_hybrid_stack_of_python_thread_starts_with_native_frames(tmp_path, fast_unwind): """Ensure there are native frames above a thread's first Python frame.""" # GIVEN allocator = MemoryAllocator() @@ -493,7 +518,7 @@ def func(): allocator.free() # WHEN - with Tracker(output, native_traces=True): + with Tracker(output, native_traces=True, fast_unwind=fast_unwind): thread = threading.Thread(target=func) thread.start() thread.join() @@ -511,14 +536,18 @@ def func(): @pytest.mark.parametrize("native_traces", [True, False]) -def test_native_tracing_header(native_traces, tmpdir): +@pytest.mark.parametrize("fast_unwind", fast_unwind_params) +def test_native_tracing_header(native_traces, fast_unwind, tmpdir): # GIVEN allocator = MemoryAllocator() output = Path(tmpdir) / "test.bin" # WHEN + kwargs = {"native_traces": native_traces} + if native_traces and fast_unwind: + kwargs["fast_unwind"] = fast_unwind - with Tracker(output, native_traces=native_traces): + with Tracker(output, **kwargs): allocator.valloc(1234) # THEN diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 942a12f438..b185c5097c 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -168,7 +168,7 @@ def test_run_with_live( sys.executable, "-c", "from memray.commands.run import _child_process;" - "_child_process(1234,False,False,False,False,False," + "_child_process(1234,False,False,False,False,False,False," "'./directory/foobar.py',['arg1', 'arg2'])", ], stderr=-1, @@ -209,7 +209,7 @@ def test_run_with_live_and_trace_python_allocators( sys.executable, "-c", "from memray.commands.run import _child_process;" - "_child_process(1234,False,True,False,False,False," + "_child_process(1234,False,False,True,False,False,False," "'./directory/foobar.py',['arg1', 'arg2'])", ], stderr=-1,