diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 32a3ae7df..6a0ba7217 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,9 +4,9 @@ name: snmalloc CI on: # Triggers the workflow on push or pull request events but only for the master branch push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master, snmalloc2 ] + branches: [ main, snmalloc1 ] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -18,39 +18,65 @@ jobs: # Build each combination of OS and release/debug variants os: [ "ubuntu-latest", "ubuntu-18.04", "macos-11", "macos-10.15", "freebsd-12.2", "freebsd-13.0" ] build-type: [ Release, Debug ] - extra-cmake-flags: [ " " ] + # Extra cmake flags. GitHub Actions matrix overloads `include` to mean + # 'add extra things to a job' and 'add jobs'. You can add extra things + # to a job by specifying things that exist in a job created from the + # matrix definition and adding things. You can specify extra jobs by + # specifying properties that don't match existing jobs. We use + # `cmake-flags` to add cmake flags to all jobs matching a pattern and + # `extra-cmake-flags` to specify a new job with custom CMake flags. + extra-cmake-flags: [ "" ] # Modify the complete matrix include: # Provide the dependency installation for each platform - os: "ubuntu-18.04" dependencies: "sudo apt install ninja-build" + cmake-flags: "-DSNMALLOC_USE_CXX17=ON" - os: "ubuntu-latest" dependencies: "sudo apt install ninja-build" - os: "macos-11" - dependencies: "brew update && brew install ninja" + # The homebrew packages are broken at the moment and error out + # after trying to install Python as a dependency of ninja because + # 2to3 exists. As a quick hack, delete it first. This should be + # removed once the homebrew install is fixed. + dependencies: "rm -f /usr/local/bin/2to3 ; brew update && brew install ninja" - os: "macos-10.15" - dependencies: "brew update && brew install ninja" + dependencies: "rm -f /usr/local/bin/2to3 ; brew update && brew install ninja" # Skip the tests for the FreeBSD release builds + # Also build-test the checked memcpy implementation while doing these. + # It is run-tested on Linux and should be the same everywhere. - os: "freebsd-13.0" build-type: Release build-only: yes + cmake-flags: "-DSNMALLOC_MEMCPY_BOUNDS=ON -DSNMALLOC_CHECK_LOADS=ON" - os: "freebsd-12.2" build-type: Debug build-only: yes - # Add the self-host build - - os: ubuntu-latest + cmake-flags: "-DSNMALLOC_MEMCPY_BOUNDS=ON -DSNMALLOC_CHECK_LOADS=ON" + # Add the self-host build, using the bounds-checked memcpy in + # maximally paranoid mode (checking loads and stores) + - os: "ubuntu-latest" build-type: Debug self-host: true + extra-cmake-flags: "-DSNMALLOC_MEMCPY_BOUNDS=ON -DSNMALLOC_CHECK_LOADS=ON" + dependencies: "sudo apt install ninja-build" + # Extra build to check using pthread library for destructing local state. + - os: "ubuntu-latest" + variant: "Ubuntu (with pthread destructors)." + dependencies: "sudo apt install ninja-build" + build-type: Debug + self-host: true + extra-cmake-flags: "-DSNMALLOC_USE_PTHREAD_DESTRUCTORS=On -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10" # Add an extra element to the matrix that does a build with clang 12 # but doesn't run tests. - os: "freebsd-13.0" variant: Clang 12 (Build only) extra-cmake-flags: "-DCMAKE_CXX_COMPILER=clang++12" build-only: yes - - os: ubuntu-latest + - os: "ubuntu-latest" variant: Clang 10 libstdc++ (Build only) dependencies: "sudo apt install ninja-build" - extra-cmake-flags: "-DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_CXX_FLAGS=-stdlib=libstdc++" + extra-cmake-flags: "-DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 -DCMAKE_CXX_FLAGS=-stdlib=libstdc++" build-only: yes # Don't abort runners if a single one fails fail-fast: false @@ -61,30 +87,58 @@ jobs: - name: Install build dependencies run: ${{ matrix.dependencies }} - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{matrix.build-type}} -G Ninja ${{ matrix.extra-cmake-flags }} + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{matrix.build-type}} -G Ninja ${{ matrix.cmake-flags }} ${{ matrix.extra-cmake-flags }} # Build with a nice ninja status line - name: Build working-directory: ${{github.workspace}}/build run: NINJA_STATUS="%p [%f:%s/%t] %o/s, %es" ninja - name: Test file size of binaries is sane working-directory: ${{github.workspace}}/build - run: "ls -l func-first_operation-1 ; [ $(ls -l func-first_operation-1 | awk '{ print $5}') -lt 10000000 ]" + run: "ls -l func-first_operation-fast ; [ $(ls -l func-first_operation-fast | awk '{ print $5}') -lt 10000000 ]" # If the tests are enabled for this job, run them - name: Test if: ${{ matrix.build-only != 'yes' }} working-directory: ${{github.workspace}}/build - run: ctest --output-on-failure -j 4 + run: ctest --output-on-failure -j 4 -C ${{ matrix.build-type }} --timeout 400 - name: Selfhost if: ${{ matrix.self-host }} working-directory: ${{github.workspace}}/build run: | - sudo cp libsnmallocshim.so libsnmallocshim-16mib.so libsnmallocshim-oe.so /usr/local/lib/ + sudo cp libsnmallocshim.so libsnmallocshim-checks.so /usr/local/lib/ ninja clean LD_PRELOAD=/usr/local/lib/libsnmallocshim.so ninja ninja clean - LD_PRELOAD=/usr/local/lib/libsnmallocshim-16mib.so ninja - ninja clean - LD_PRELOAD=/usr/local/lib/libsnmallocshim-oe.so ninja + LD_PRELOAD=/usr/local/lib/libsnmallocshim-checks.so ninja + + sanitizer: + strategy: + matrix: + # Build each combination of OS and release/debug variants + os: [ "ubuntu-latest"] + build-type: [ Release, Debug ] + include: + - os: "ubuntu-latest" + continue-on-error: # Don't class as an error if this fails, until we have a more reliablity. + variant: Clang 10 libc++ (TSan + UBSan) + dependencies: "sudo apt install ninja-build" + extra-cmake-flags: "-DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_CXX_FLAGS=-stdlib=\"libc++ -g\" -DSNMALLOC_SANITIZER=undefined,thread" + # Don't abort runners if a single one fails + fail-fast: false + runs-on: ${{ matrix.os }} + name: ${{ matrix.os }} ${{ matrix.build-type }} ${{ matrix.variant }} + steps: + - uses: actions/checkout@v2 + - name: Install build dependencies + run: ${{ matrix.dependencies }} + - name: Configure CMake + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{matrix.build-type}} -G Ninja ${{ matrix.cmake-flags }} ${{ matrix.extra-cmake-flags }} + # Build with a nice ninja status line + - name: Build + working-directory: ${{github.workspace}}/build + run: NINJA_STATUS="%p [%f:%s/%t] %o/s, %es" ninja + - name: Test + working-directory: ${{github.workspace}}/build + run: ctest --output-on-failure -j 4 -C ${{ matrix.build-type }} --timeout 400 -E "memcpy|external_pointer" --repeat-until-fail 2 qemu-crossbuild: strategy: @@ -95,18 +149,28 @@ jobs: system-processor: arm triple: arm-linux-gnueabihf rtld: ld-linux-armhf.so.3 + ld-flavour: lld - name: arm64 system-processor: aarch64 triple: aarch64-linux-gnu rtld: ld-linux-aarch64.so.1 + ld-flavour: lld - name: ppc64el system-processor: powerpc64le triple: powerpc64le-linux-gnu rtld: ld64.so.2 + ld-flavour: lld + - name: riscv64 + system-processor: riscv64 + triple: riscv64-linux-gnu + rtld: ld-linux-riscv64-lp64d.so.1 + extra-packages: binutils-riscv64-linux-gnu + ld-flavour: bfd + ld: /usr/bin/riscv64-linux-gnu-ld.bfd # Don't abort runners if a single one fails fail-fast: false runs-on: ubuntu-latest - name: Cross-build for ${{ matrix.arch.triple }} + name: ${{matrix.build-type}} cross-build for ${{ matrix.arch.triple }} steps: - uses: actions/checkout@v2 - name: Install cross-compile toolchain and QEMU @@ -117,6 +181,14 @@ jobs: sudo add-apt-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" sudo apt update sudo apt install libstdc++-9-dev-${{ matrix.arch.name }}-cross qemu-user ninja-build clang-13 lld-13 + sudo apt install ${{matrix.arch.extra-packages}} + # The default PowerPC qemu configuration uses the wrong page size. + # Wrap it in a script that fixes this. + sudo update-binfmts --disable qemu-ppc64le + sudo sh -c 'echo ":qemu-ppc64le:M:0:\x7f\x45\x4c\x46\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15\x00:\xff\xff\xff\xff\xff\xff\xff\xfc\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\x00:`pwd`/ppc64.sh:" > /proc/sys/fs/binfmt_misc/register' + echo '#!/bin/sh' > ppc64.sh + echo '/usr/bin/qemu-ppc64le -p 65536 $@' >> ppc64.sh + chmod +x ppc64.sh - name: Configure run: > RTLD_NAME=${{ matrix.arch.rtld }} @@ -130,6 +202,8 @@ jobs: -DSNMALLOC_QEMU_WORKAROUND=ON -DSNMALLOC_STATIC_LIBRARY=OFF -DCMAKE_TOOLCHAIN_FILE=ci/Toolchain.cmake + -DSNMALLOC_LINKER=${{matrix.arch.ld}} + -DSNMALLOC_LINKER_FLAVOUR=${{matrix.arch.ld-flavour}} - name: Build working-directory: ${{github.workspace}}/build run: NINJA_STATUS="%p [%f:%s/%t] %o/s, %es" ninja @@ -138,31 +212,43 @@ jobs: # QEMU) - name: Test working-directory: ${{github.workspace}}/build - run: ctest --output-on-failure -E '(perf-.*)|(.*-malloc$)' + run: ctest --output-on-failure -E '(perf-.*)|(.*-malloc$)' --timeout 400 timeout-minutes: 30 windows: strategy: matrix: # Build each combination of OS and release/debug variants - os: [ windows-2016, windows-2019 ] + os: [ windows-2019 ] build-type: [ Release, Debug ] arch: [ Win32, x64 ] toolchain: [ "", "-T ClangCL" ] extra-cmake-flags: [ "" ] - # The ClangCL toolchain was added in Visual Studio 2019, the Windows - # 2016 runners have only VS 2017, so skip them for this configuration - exclude: - - os: windows-2016 - toolchain: "-T ClangCL" # Add an extra check for the Windows 8 compatible PAL include: - - os: windows-2016 + - os: windows-2019 build-type: Release arch: x64 toolchain: "" extra-cmake-flags: -DWIN8COMPAT=TRUE variant: Windows 8 compatible + - os: windows-2022 + build-type: Release + arch: Win32 + toolchain: "" + - os: windows-2022 + build-type: Debug + arch: Win32 + toolchain: "" + - os: windows-2022 + build-type: Release + arch: x64 + toolchain: "" + - os: windows-2022 + build-type: Debug + arch: x64 + toolchain: "" + # Don't abort runners if a single one fails fail-fast: false runs-on: ${{ matrix.os }} @@ -178,7 +264,8 @@ jobs: # Run the tests. - name: Test working-directory: ${{ github.workspace }}/build - run: ctest -j 2 --interactive-debug-mode 0 --output-on-failure -C ${{ matrix.build-type }} + run: ctest -j 2 --interactive-debug-mode 0 --output-on-failure -C ${{ matrix.build-type }} --timeout 400 + timeout-minutes: 20 # Job to run clang-format and report errors @@ -188,7 +275,7 @@ jobs: steps: - uses: actions/checkout@v2 - name: Configure CMake - run: cmake -B ${{github.workspace}}/build + run: cmake -B ${{github.workspace}}/build -DSNMALLOC_USE_CXX17=ON - name: Install clang-tidy run: sudo apt install clang-tidy-9 # Run the clang-format check and error if it generates a diff @@ -200,7 +287,7 @@ jobs: git diff --exit-code - name: Run clang-tidy run: | - clang-tidy-9 src/override/malloc.cc -header-filter="`pwd`/*" -warnings-as-errors='*' -export-fixes=tidy.fail -- -std=c++17 -mcx16 -DSNMALLOC_PLATFORM_HAS_GETENTROPY=0 + clang-tidy-9 src/snmalloc/override/malloc.cc -header-filter="`pwd`/*" -warnings-as-errors='*' -export-fixes=tidy.fail -- -std=c++17 -mcx16 -DSNMALLOC_PLATFORM_HAS_GETENTROPY=0 if [ -f tidy.fail ] ; then cat tidy.fail exit 1 diff --git a/.github/workflows/morello.yml b/.github/workflows/morello.yml new file mode 100644 index 000000000..31e7a5fcd --- /dev/null +++ b/.github/workflows/morello.yml @@ -0,0 +1,93 @@ +name: snmalloc CI for Morello + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the master branch + push: + branches: [ main ] + pull_request: + branches: [ main ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + # UNIX-like, but with some needed TLC for the moment, so not folded in to the main unixlike CI. + # Morello can't easily self-host since it mixes purecap and hybrid build tools. + # We point cmake at the LLVM Clang frontend explicitly lest it pick up the more magic wrappers. + morello: + strategy: + matrix: + # Build each combination of OS and release/debug variants + os: [ "morello" ] + build-type: [ Release, Debug ] + caps: [ Hybrid, Purecap ] + include: + - os: "morello" + caps: Hybrid + dependencies: | + pkg64 install -y llvm-localbase cmake ninja + cmake-flags: > + -DCMAKE_CXX_COMPILER=/usr/local64/bin/clang++ + -DCMAKE_C_COMPILER=/usr/local64/bin/clang + -DCMAKE_CXX_FLAGS='-march=morello -Xclang -morello-vararg=new' + -DCMAKE_C_FLAGS='-march=morello -Xclang -morello-vararg=new' + - os: "morello" + caps: Purecap + dependencies: | + pkg64 install -y llvm-localbase cmake ninja + cmake-flags: > + -DCMAKE_CXX_COMPILER=/usr/local64/bin/clang++ + -DCMAKE_C_COMPILER=/usr/local64/bin/clang + -DCMAKE_CXX_FLAGS='-march=morello -mabi=purecap -Xclang -morello-vararg=new' + -DCMAKE_C_FLAGS='-march=morello -mabi=purecap -Xclang -morello-vararg=new' + -DSNMALLOC_LINK_ICF=OFF + # SNMALLOC_LINK_ICF=OFF until the fix for + # https://git.morello-project.org/morello/llvm-project/-/issues/51 , namely + # https://git.morello-project.org/morello/llvm-project/-/commit/f41cb9104e0793de87d98b38acb274b2e3266091 + # lands in a package or a release. That's probably the Fall 2022 release. + # Don't abort runners if a single one fails + fail-fast: false + # Morello needs special handling to get picked up by our self-hosted runners + # When/if we do integrate this back into the unixlike matrix above, we could + # say something like:: + # runs-on: >- + # ${{ (matrix.os == 'morello' && fromJSON('["self-hosted", "msr-morello", "2022.05-benchmark"]')) + # || matrix.os }} + # to dispatch dynamically. + runs-on: ["self-hosted", "msr-morello", "2022.05-benchmark"] + name: ${{ matrix.os }} ${{ matrix.build-type }} ${{ matrix.caps }} + steps: + - uses: actions/checkout@v2 + - name: Install build dependencies + run: ${{ matrix.dependencies }} + - name: Configure CMake + run: > + cmake + -B ${{github.workspace}}/build + -DCMAKE_BUILD_TYPE=${{matrix.build-type}} + -G Ninja ${{ matrix.cmake-flags }} + ${{ matrix.extra-cmake-flags }} + # Build with a nice ninja status line + - name: Build + # https://github.com/ChristopherHX/github-act-runner seemingly ignores working-directory, + # but everyone agrees that we start in ${{github.workspace}} by default, so just cd into + # the build directory ourselves. See + # https://docs.github.com/en/actions/learn-github-actions/contexts#github-context + # https://github.com/ChristopherHX/github-act-runner/issues/61 + # working-directory: ${{github.workspace}}/build + run: | + cd build + NINJA_STATUS="%p [%f:%s/%t] %o/s, %es" ninja + - name: Test file size of binaries is sane + # working-directory: ${{github.workspace}}/build + run: | + cd build + ls -l func-first_operation-fast ; [ $(ls -l func-first_operation-fast | awk '{ print $5}') -lt 10000000 ] + # If the tests are enabled for this job, run them + - name: Test + if: ${{ matrix.build-only != 'yes' }} + # working-directory: ${{github.workspace}}/build + run: | + cd build + ctest --output-on-failure -j 4 -C ${{ matrix.build-type }} --timeout 400 diff --git a/.gitignore b/.gitignore index f25dbf668..8737c737a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,20 @@ +# conventional build dirs release*/ debug*/ build*/ +cmake-build-*/ + +# cmake intermediate files CMakeFiles/ + +# vscode dirs .vscode/ .vs/ + +# jetbrains IDE dirs +.idea/ + +# special endings *~ *.sw? + diff --git a/CMakeLists.txt b/CMakeLists.txt index 613af3db5..6a43a43ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ -cmake_minimum_required(VERSION 3.8) -project(snmalloc C CXX) +cmake_minimum_required(VERSION 3.14) +project(snmalloc CXX) if (NOT CMAKE_BUILD_TYPE) message(STATUS "No build type selected, default to: Release") @@ -7,24 +7,81 @@ if (NOT CMAKE_BUILD_TYPE) endif() include(CheckCXXCompilerFlag) -include(CheckCSourceCompiles) +include(CheckCXXSourceCompiles) +include(CheckIncludeFileCXX) +include(CMakeDependentOption) -option(USE_SNMALLOC_STATS "Track allocation stats" OFF) +option(SNMALLOC_HEADER_ONLY_LIBRARY "Use snmalloc has a header-only library" OFF) +# Options that apply globally option(SNMALLOC_CI_BUILD "Disable features not sensible for CI" OFF) -option(EXPOSE_EXTERNAL_PAGEMAP "Expose the global pagemap" OFF) -option(EXPOSE_EXTERNAL_RESERVE "Expose an interface to reserve memory using the default memory provider" OFF) -option(SNMALLOC_RUST_SUPPORT "Build static library for rust" OFF) -option(SNMALLOC_STATIC_LIBRARY "Build static libraries" ON) option(SNMALLOC_QEMU_WORKAROUND "Disable using madvise(DONT_NEED) to zero memory on Linux" Off) -option(SNMALLOC_OPTIMISE_FOR_CURRENT_MACHINE "Compile for current machine architecture" Off) -set(SNMALLOC_STATIC_LIBRARY_PREFIX "sn_" CACHE STRING "Static library function prefix") -option(SNMALLOC_USE_CXX20 "Build as C++20, not C++17; experimental as yet" OFF) +option(SNMALLOC_USE_CXX17 "Build as C++17 for legacy support." OFF) +option(SNMALLOC_TRACING "Enable large quantities of debug output." OFF) +option(SNMALLOC_NO_REALLOCARRAY "Build without reallocarray exported" ON) +option(SNMALLOC_NO_REALLOCARR "Build without reallocarr exported" ON) +option(SNMALLOC_LINK_ICF "Link with Identical Code Folding" ON) +# Options that apply only if we're not building the header-only library +cmake_dependent_option(SNMALLOC_RUST_SUPPORT "Build static library for rust" OFF "NOT SNMALLOC_HEADER_ONLY_LIBRARY" OFF) +cmake_dependent_option(SNMALLOC_STATIC_LIBRARY "Build static libraries" ON "NOT SNMALLOC_HEADER_ONLY_LIBRARY" OFF) +cmake_dependent_option(SNMALLOC_CHECK_LOADS "Perform bounds checks on the source argument to memcpy with heap objects" OFF "NOT SNMALLOC_HEADER_ONLY_LIBRARY" OFF) +cmake_dependent_option(SNMALLOC_OPTIMISE_FOR_CURRENT_MACHINE "Compile for current machine architecture" Off "NOT SNMALLOC_HEADER_ONLY_LIBRARY" OFF) +cmake_dependent_option(SNMALLOC_PAGEID "Set an id to memory regions" OFF "NOT SNMALLOC_PAGEID" OFF) +if (NOT SNMALLOC_HEADER_ONLY_LIBRARY) + # Pick a sensible default for the thread cleanup mechanism + if (${CMAKE_SYSTEM_NAME} STREQUAL FreeBSD) + set(SNMALLOC_CLEANUP_DEFAULT THREAD_CLEANUP) + elseif (UNIX AND NOT APPLE) + set(SNMALLOC_CLEANUP_DEFAULT PTHREAD_DESTRUCTORS) + else () + set(SNMALLOC_CLEANUP_DEFAULT CXX11_DESTRUCTORS) + endif() + # Specify the thread cleanup mechanism to use. + set(SNMALLOC_CLEANUP ${SNMALLOC_CLEANUP_DEFAULT} CACHE STRING "The mechanism that snmalloc will use for thread destructors. Valid options are: CXX11_DESTRUCTORS (use C++11 destructors, may depend on the C++ runtime library), PTHREAD_DESTRUCTORS (use pthreads, may interact badly with C++ on some platforms, such as macOS) THREAD_CLEANUP (depend on an explicit call to _malloc_thread_cleanup on thread exit, supported by FreeBSD's threading implementation and possibly elsewhere)") + set_property(CACHE SNMALLOC_CLEANUP PROPERTY STRINGS THREAD_CLEANUP PTHREAD_DESTRUCTORS CXX11_DESTRUCTORS) + + set(SNMALLOC_STATIC_LIBRARY_PREFIX "sn_" CACHE STRING "Static library function prefix") +else () + unset(SNMALLOC_STATIC_LIBRARY_PREFIX CACHE) + unset(SNMALLOC_CLEANUP CACHE) +endif () + +if (NOT SNMALLOC_CLEANUP STREQUAL CXX11_DESTRUCTORS) + set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "") +endif() + +set(SNMALLOC_SANITIZER "" CACHE STRING "Use sanitizer type (undefined|thread|...)") +if (SNMALLOC_SANITIZER) + message(STATUS "Using sanitizer=${SNMALLOC_SANITIZER}") +endif() + +# If CheckLinkerFlag doesn't exist then provide a dummy implementation that +# always fails. The fallback can be removed when we move to CMake 3.18 as the +# baseline. +include(CheckLinkerFlag OPTIONAL RESULT_VARIABLE CHECK_LINKER_FLAG) +if (NOT CHECK_LINKER_FLAG) + function(check_linker_flag) + endfunction() +endif () + +if (NOT MSVC AND NOT (SNMALLOC_CLEANUP STREQUAL CXX11_DESTRUCTORS)) + # If the target compiler doesn't support -nostdlib++ then we must enable C at + # the global scope for the fallbacks to work. + check_linker_flag(CXX "-nostdlib++" SNMALLOC_LINKER_SUPPORT_NOSTDLIBXX) + if (NOT SNMALLOC_LINKER_SUPPORT_NOSTDLIBXX AND NOT SNMALLOC_HEADER_ONLY_LIBRARY) + enable_language(C) + endif() +endif() + +# Define a generator expression for things that will be enabled in either CI +# builds or debug mode. +set(ci_or_debug "$,$>") # malloc.h will error if you include it on FreeBSD, so this test must not # unconditionally include it. -CHECK_C_SOURCE_COMPILES(" +CHECK_CXX_SOURCE_COMPILES(" #if __has_include() #include +#endif #if __has_include() #include #else @@ -34,14 +91,14 @@ size_t malloc_usable_size(const void* ptr) { return 0; } int main() { return 0; } " CONST_QUALIFIED_MALLOC_USABLE_SIZE) -# older libcs might not have getentropy, e.g. it appeared in gliobc 2.25 +# Some libcs might not have getentropy, e.g. it appeared in glibc 2.25 # so we need to fallback if we cannot compile this -CHECK_C_SOURCE_COMPILES(" +CHECK_CXX_SOURCE_COMPILES(" #if __has_include() # include #endif #if __has_include() -#include +# include #endif int main() { int entropy = 0; @@ -50,15 +107,18 @@ int main() { } " SNMALLOC_PLATFORM_HAS_GETENTROPY) -if (NOT SNMALLOC_CI_BUILD) - option(USE_POSIX_COMMIT_CHECKS "Instrument Posix PAL to check for access to unused blocks of memory." Off) -else () - # This is enabled in every bit of CI to detect errors. - option(USE_POSIX_COMMIT_CHECKS "Instrument Posix PAL to check for access to unused blocks of memory." On) -endif () - -# Provide as macro so other projects can reuse -macro(warnings_high) +# check if linux/random.h is available +# older libcs might not have sys/random.h +# but some might provide the necessary flags via linux/random.h +# the __has_include macro isn't working properly on all platforms for that header +# this is why we check its existence here +CHECK_INCLUDE_FILE_CXX(linux/random.h SNMALLOC_HAS_LINUX_RANDOM_H) + +# Provide as function so other projects can reuse +# FIXME: This modifies some variables that may or may not be the ones that +# provide flags and so is broken by design. It should be removed once Verona +# no longer uses it. +function(warnings_high) if(MSVC) # Force to always compile with W4 if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") @@ -66,6 +126,7 @@ macro(warnings_high) else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") endif() + # /Wv18 is required for the annotation to force inline a lambda. add_compile_options(/WX /wd4127 /wd4324 /wd4201) else() if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") @@ -73,16 +134,12 @@ macro(warnings_high) endif () add_compile_options(-Wall -Wextra -Werror -Wundef) endif() -endmacro() - -macro(oe_simulate target) - target_compile_definitions(${target} PRIVATE SNMALLOC_USE_SMALL_CHUNKS) -endmacro() +endfunction() -macro(clangformat_targets) +function(clangformat_targets) # The clang-format tool is installed under a variety of different names. Try # to find a sensible one. Only look for versions 9 explicitly - we don't - # know whether our clang-format file will work with newer versions of the + # know whether our clang-format file will work with newer versions of the # tool. It does not work with older versions as AfterCaseLabel is not supported # in earlier versions. find_program(CLANG_FORMAT NAMES @@ -94,7 +151,7 @@ macro(clangformat_targets) message(WARNING "Not generating clangformat target, no clang-format tool found") else () message(STATUS "Generating clangformat target using ${CLANG_FORMAT}") - file(GLOB_RECURSE ALL_SOURCE_FILES src/*.cc src/*.h src/*.hh) + file(GLOB_RECURSE ALL_SOURCE_FILES CONFIGURE_DEPENDS src/*.cc src/*.h src/*.hh) # clangformat does not yet understand concepts well; for the moment, don't # ask it to format them. See https://reviews.llvm.org/D79773 list(FILTER ALL_SOURCE_FILES EXCLUDE REGEX "src/[^/]*/[^/]*_concept\.h$") @@ -104,182 +161,206 @@ macro(clangformat_targets) -i ${ALL_SOURCE_FILES}) endif() -endmacro() +endfunction() -# The main target for snmalloc -add_library(snmalloc_lib INTERFACE) -target_include_directories(snmalloc_lib INTERFACE src/) +# The main target for snmalloc. This is the exported target for the +# header-only configuration and is used as a dependency for all of the builds +# that compile anything. +add_library(snmalloc INTERFACE) + +if(SNMALLOC_USE_CXX17) + target_compile_definitions(snmalloc INTERFACE -DSNMALLOC_USE_CXX17) + target_compile_features(snmalloc INTERFACE cxx_std_17) +else() + target_compile_features(snmalloc INTERFACE cxx_std_20) +endif() + +# Add header paths. +target_include_directories(snmalloc + INTERFACE + $ + $) if(NOT MSVC) - find_package(Threads REQUIRED COMPONENTS snmalloc_lib) - target_link_libraries(snmalloc_lib INTERFACE ${CMAKE_THREAD_LIBS_INIT}) - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - target_link_libraries(snmalloc_lib INTERFACE atomic) - endif() + find_package(Threads REQUIRED COMPONENTS snmalloc) + target_link_libraries(snmalloc INTERFACE + ${CMAKE_THREAD_LIBS_INIT} $<$:atomic>) endif() if (WIN32) set(WIN8COMPAT FALSE CACHE BOOL "Avoid Windows 10 APIs") - if (WIN8COMPAT) - target_compile_definitions(snmalloc_lib INTERFACE -DWINVER=0x0603) - message(STATUS "snmalloc: Avoiding Windows 10 APIs") - else() - message(STATUS "snmalloc: Using Windows 10 APIs") - # VirtualAlloc2 is exposed by mincore.lib, not Kernel32.lib (as the - # documentation says) - target_link_libraries(snmalloc_lib INTERFACE mincore) - endif() + target_compile_definitions(snmalloc INTERFACE $<$:WINVER=0x0603>) + # VirtualAlloc2 is exposed by mincore.lib, not Kernel32.lib (as the + # documentation says) + target_link_libraries(snmalloc INTERFACE $<$>:mincore>) + message(STATUS "snmalloc: Avoiding Windows 10 APIs is ${WIN8COMPAT}") endif() -# detect support for cmpxchg16b; werror is needed to make sure mcx16 must be used by targets +# Detect support for cmpxchg16b; Werror is needed to make sure mcx16 must be used by targets check_cxx_compiler_flag("-Werror -Wextra -Wall -mcx16" SNMALLOC_COMPILER_SUPPORT_MCX16) if(SNMALLOC_COMPILER_SUPPORT_MCX16) - target_compile_options(snmalloc_lib INTERFACE $<$:-mcx16>) + target_compile_options(snmalloc INTERFACE $<$:-mcx16>) endif() -# Have to set this globally, as can't be set on an interface target. -if(SNMALLOC_USE_CXX20) - set(CMAKE_CXX_STANDARD 20) -else() - set(CMAKE_CXX_STANDARD 17) +# Helper function that conditionally defines a macro for the build target if +# the CMake variable of the same name is set. +function(add_as_define FLAG) + target_compile_definitions(snmalloc INTERFACE $<$:${FLAG}>) +endfunction() + +add_as_define(SNMALLOC_QEMU_WORKAROUND) +add_as_define(SNMALLOC_TRACING) +add_as_define(SNMALLOC_CI_BUILD) +add_as_define(SNMALLOC_PLATFORM_HAS_GETENTROPY) +add_as_define(SNMALLOC_HAS_LINUX_RANDOM_H) +if (SNMALLOC_NO_REALLOCARRAY) + add_as_define(SNMALLOC_NO_REALLOCARRAY) endif() - -if(USE_SNMALLOC_STATS) - target_compile_definitions(snmalloc_lib INTERFACE -DUSE_SNMALLOC_STATS) +if (SNMALLOC_NO_REALLOCARR) + add_as_define(SNMALLOC_NO_REALLOCARR) endif() -if(SNMALLOC_QEMU_WORKAROUND) - target_compile_definitions(snmalloc_lib INTERFACE -DSNMALLOC_QEMU_WORKAROUND) +target_compile_definitions(snmalloc INTERFACE $<$:MALLOC_USABLE_SIZE_QUALIFIER=const>) + +# In debug and CI builds, link the backtrace library so that we can get stack +# traces on errors. +find_package(Backtrace) +if(${Backtrace_FOUND}) + target_compile_definitions(snmalloc INTERFACE + $<${ci_or_debug}:SNMALLOC_BACKTRACE_HEADER="${Backtrace_HEADER}">) + target_link_libraries(snmalloc INTERFACE + $<${ci_or_debug}:${Backtrace_LIBRARIES}>) + target_include_directories(snmalloc INTERFACE + $<${ci_or_debug}:${Backtrace_INCLUDE_DIRS}>) endif() -if(SNMALLOC_CI_BUILD) - target_compile_definitions(snmalloc_lib INTERFACE -DSNMALLOC_CI_BUILD) -endif() - -if(USE_POSIX_COMMIT_CHECKS) - target_compile_definitions(snmalloc_lib INTERFACE -DUSE_POSIX_COMMIT_CHECKS) -endif() - -if(SNMALLOC_PLATFORM_HAS_GETENTROPY) - target_compile_definitions(snmalloc_lib INTERFACE -DSNMALLOC_PLATFORM_HAS_GETENTROPY) +if(MSVC) + target_compile_definitions(snmalloc INTERFACE -D_HAS_EXCEPTIONS=0) +else() + # All symbols are always dynamic on haiku and -rdynamic is redundant (and unsupported). + if (NOT CMAKE_SYSTEM_NAME STREQUAL "Haiku") + # Get better stack traces in CI and debug builds. + target_link_options(snmalloc INTERFACE $<${ci_or_debug}:-rdynamic>) + endif() endif() -if(CONST_QUALIFIED_MALLOC_USABLE_SIZE) - target_compile_definitions(snmalloc_lib INTERFACE -DMALLOC_USABLE_SIZE_QUALIFIER=const) +if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "OpenBSD") + check_linker_flag(CXX "-Wl,--no-undefined" SNMALLOC_LINKER_SUPPORT_NO_ALLOW_SHLIB_UNDEF) endif() +function(add_warning_flags name) + target_compile_options(${name} PRIVATE + $<$:/Zi /W4 /WX /wd4127 /wd4324 /wd4201> + $<$,$>>:-fno-exceptions -fno-rtti -Wall -Wextra -Werror -Wundef> + $<$:-Wsign-conversion -Wconversion>) + target_link_options(${name} PRIVATE + $<$:-Wl,--no-undefined> + $<$:$<${ci_or_debug}:/DEBUG>>) +endfunction() -# To build with just the header library target define SNMALLOC_ONLY_HEADER_LIBRARY -# in containing Cmake file. -if(NOT DEFINED SNMALLOC_ONLY_HEADER_LIBRARY) - warnings_high() - - if(MSVC) - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi") - set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG") - else() - add_compile_options(-fno-exceptions -fno-rtti -g -fomit-frame-pointer) - # Static TLS model is unsupported on Haiku. - # All symbols are always dynamic on haiku and -rdynamic is redundant (and unsupported). - if (NOT CMAKE_SYSTEM_NAME MATCHES "Haiku") - add_compile_options(-ftls-model=initial-exec) - if(SNMALLOC_CI_BUILD OR (${CMAKE_BUILD_TYPE} MATCHES "Debug")) - # Get better stack traces in CI and Debug. - target_link_libraries(snmalloc_lib INTERFACE "-rdynamic") - endif() - endif() +# To build with just the header library target define SNMALLOC_HEADER_ONLY_LIBRARY +if(NOT SNMALLOC_HEADER_ONLY_LIBRARY) - if(SNMALLOC_OPTIMISE_FOR_CURRENT_MACHINE) - check_cxx_compiler_flag(-march=native SUPPORT_MARCH_NATIVE) - if (SUPPORT_MARCH_NATIVE) - add_compile_options(-march=native) - else() - message(WARNING "Compiler does not support `-march=native` required by SNMALLOC_OPTIMISE_FOR_CURRENT_MACHINE") - endif() - endif() - - find_package(Backtrace) - if(${Backtrace_FOUND}) - target_compile_definitions(snmalloc_lib INTERFACE -DBACKTRACE_HEADER="${Backtrace_HEADER}") - target_link_libraries(snmalloc_lib INTERFACE ${Backtrace_LIBRARIES}) - target_include_directories(snmalloc_lib INTERFACE ${Backtrace_INCLUDE_DIRS}) - endif() - - endif() - - macro(subdirlist result curdir) - file(GLOB children LIST_DIRECTORIES true RELATIVE ${curdir} ${curdir}/*) + function(subdirlist result curdir) + file(GLOB children CONFIGURE_DEPENDS LIST_DIRECTORIES true RELATIVE ${curdir} ${curdir}/* ) set(dirlist "") foreach(child ${children}) if(IS_DIRECTORY ${curdir}/${child}) list(APPEND dirlist ${child}) endif() endforeach() - set(${result} ${dirlist}) - endmacro() + set(${result} ${dirlist} PARENT_SCOPE) + endfunction() + + if(NOT (DEFINED SNMALLOC_LINKER_FLAVOUR) OR ("${SNMALLOC_LINKER_FLAVOUR}" MATCHES "^$")) + # Linker not specified externally; probe to see if we can make lld work + set(CMAKE_REQUIRED_LINK_OPTIONS -fuse-ld=lld) + check_cxx_source_compiles("int main() { return 1; }" LLD_WORKS) + if (LLD_WORKS) + message(STATUS "Using LLD to link snmalloc shims") + endif() + elseif(SNMALLOC_LINKER_FLAVOUR STREQUAL "lld") + # Linker specified externally to be lld; assume it works and that the flags + # have also been set for us + set(LLD_WORKS TRUE) + else() + # Linker specified externally as something other than lld; presume it + # doesn't work and don't add its flags, below + set(LLD_WORKS FALSE) + endif() - macro(add_shim name type) + function(add_shim name type) add_library(${name} ${type} ${ARGN}) - target_link_libraries(${name} snmalloc_lib) + target_link_libraries(${name} snmalloc) + set_target_properties(${name} PROPERTIES CXX_VISIBILITY_PRESET hidden) + target_compile_definitions(${name} PRIVATE "SNMALLOC_USE_${SNMALLOC_CLEANUP}") + + add_warning_flags(${name}) if(NOT MSVC) target_compile_definitions(${name} PRIVATE "SNMALLOC_EXPORT=__attribute__((visibility(\"default\")))") - endif() - set_target_properties(${name} PROPERTIES CXX_VISIBILITY_PRESET hidden) + target_compile_options(${name} PRIVATE + -fomit-frame-pointer -ffunction-sections) + # Static TLS model is unsupported on Haiku. + if (NOT CMAKE_SYSTEM_NAME STREQUAL "Haiku") + target_compile_options(${name} PRIVATE -ftls-model=initial-exec) + target_compile_options(${name} PRIVATE $<$:-g>) + endif() - if(EXPOSE_EXTERNAL_PAGEMAP) - if(MSVC) - target_compile_definitions(${name} PRIVATE /DSNMALLOC_EXPOSE_PAGEMAP) - else() - target_compile_definitions(${name} PRIVATE -DSNMALLOC_EXPOSE_PAGEMAP) + if(SNMALLOC_OPTIMISE_FOR_CURRENT_MACHINE) + check_cxx_compiler_flag(-march=native SUPPORT_MARCH_NATIVE) + if (SUPPORT_MARCH_NATIVE) + target_compile_options(${name} PRIVATE -march=native) + else() + message(WARNING "Compiler does not support `-march=native` required by SNMALLOC_OPTIMISE_FOR_CURRENT_MACHINE") + endif() endif() - endif() - if(EXPOSE_EXTERNAL_RESERVE) - if(MSVC) - target_compile_definitions(${name} PRIVATE /DSNMALLOC_EXPOSE_RESERVE) - else() - target_compile_definitions(${name} PRIVATE -DSNMALLOC_EXPOSE_RESERVE) + # Ensure that we do not link against C++ stdlib when compiling shims. + # If the compiler supports excluding the C++ stdlib implementation, use + # it. Otherwise, fall back to linking the library as if it were C, which + # has roughly the same effect. + if (NOT ${SNMALLOC_CLEANUP} STREQUAL CXX11_DESTRUCTORS) + check_linker_flag(CXX "-nostdlib++" SNMALLOC_LINKER_SUPPORT_NOSTDLIBXX) + if (SNMALLOC_LINKER_SUPPORT_NOSTDLIBXX) + target_link_options(${name} PRIVATE -nostdlib++) + else() + set_target_properties(${name} PROPERTIES LINKER_LANGUAGE C) + endif() endif() + # Remove all the duplicate new/malloc and free/delete definitions + target_link_options(${name} PRIVATE $<$:$<$:-Wl,--icf=all> -fuse-ld=lld>) endif() - # Ensure that we do not link against C++ stdlib when compiling shims. - if(NOT MSVC) - set_target_properties(${name} PROPERTIES LINKER_LANGUAGE C) - endif() + target_compile_definitions(${name} PRIVATE + SNMALLOC_CHECK_LOADS=$,true,false>) + target_compile_definitions(${name} PRIVATE + SNMALLOC_PAGEID=$,true,false>) + + install(TARGETS ${name} EXPORT snmallocConfig) + + endfunction() - endmacro() + set(SHIM_FILES src/snmalloc/override/new.cc) + set(SHIM_FILES_MEMCPY src/snmalloc/override/memcpy.cc) if (SNMALLOC_STATIC_LIBRARY) - add_shim(snmallocshim-static STATIC src/override/malloc.cc) - add_shim(snmallocshim-1mib-static STATIC src/override/malloc.cc) - add_shim(snmallocshim-16mib-static STATIC src/override/malloc.cc) - target_compile_definitions(snmallocshim-16mib-static PRIVATE SNMALLOC_USE_LARGE_CHUNKS - SNMALLOC_STATIC_LIBRARY_PREFIX=${SNMALLOC_STATIC_LIBRARY_PREFIX}) + add_shim(snmallocshim-static STATIC ${SHIM_FILES}) target_compile_definitions(snmallocshim-static PRIVATE SNMALLOC_STATIC_LIBRARY_PREFIX=${SNMALLOC_STATIC_LIBRARY_PREFIX}) - target_compile_definitions(snmallocshim-1mib-static PRIVATE - SNMALLOC_STATIC_LIBRARY_PREFIX=${SNMALLOC_STATIC_LIBRARY_PREFIX}) endif () if(NOT WIN32) - set(SHARED_FILES src/override/new.cc src/override/malloc.cc) - add_shim(snmallocshim SHARED ${SHARED_FILES}) - add_shim(snmallocshim-checks SHARED ${SHARED_FILES}) - add_shim(snmallocshim-1mib SHARED ${SHARED_FILES}) - add_shim(snmallocshim-16mib SHARED ${SHARED_FILES}) - target_compile_definitions(snmallocshim-16mib PRIVATE SNMALLOC_USE_LARGE_CHUNKS) - target_compile_definitions(snmallocshim-checks PRIVATE CHECK_CLIENT) - # Build a shim with some settings from oe. - add_shim(snmallocshim-oe SHARED ${SHARED_FILES}) - oe_simulate(snmallocshim-oe) + add_shim(snmallocshim SHARED ${SHIM_FILES}) + add_shim(snmallocshim-checks-memcpy-only SHARED ${SHIM_FILES} ${SHIM_FILES_MEMCPY}) + add_shim(snmallocshim-checks SHARED ${SHIM_FILES} ${SHIM_FILES_MEMCPY}) + target_compile_definitions(snmallocshim-checks PRIVATE SNMALLOC_CHECK_CLIENT) endif() if(SNMALLOC_RUST_SUPPORT) - add_shim(snmallocshim-rust STATIC src/override/rust.cc) - add_shim(snmallocshim-1mib-rust STATIC src/override/rust.cc) - add_shim(snmallocshim-16mib-rust STATIC src/override/rust.cc) - target_compile_definitions(snmallocshim-16mib-rust PRIVATE SNMALLOC_USE_LARGE_CHUNKS) + add_shim(snmallocshim-rust STATIC src/snmalloc/override/rust.cc) + add_shim(snmallocshim-checks-rust STATIC src/snmalloc/override/rust.cc) + target_compile_definitions(snmallocshim-checks-rust PRIVATE SNMALLOC_CHECK_CLIENT) endif() enable_testing() @@ -287,7 +368,13 @@ if(NOT DEFINED SNMALLOC_ONLY_HEADER_LIBRARY) set(TESTDIR ${CMAKE_CURRENT_SOURCE_DIR}/src/test) subdirlist(TEST_CATEGORIES ${TESTDIR}) list(REVERSE TEST_CATEGORIES) + if (${SNMALLOC_CLEANUP} STREQUAL THREAD_CLEANUP) + set(TEST_CLEANUP PTHREAD_DESTRUCTORS) + else () + set(TEST_CLEANUP ${SNMALLOC_CLEANUP}) + endif() foreach(TEST_CATEGORY ${TEST_CATEGORIES}) + message(STATUS "Adding ${TEST_CATEGORY} tests") subdirlist(TESTS ${TESTDIR}/${TEST_CATEGORY}) foreach(TEST ${TESTS}) if (WIN32 @@ -298,9 +385,9 @@ if(NOT DEFINED SNMALLOC_ONLY_HEADER_LIBRARY) # Windows does not support aligned allocation well enough # for pass through. # NetBSD, OpenBSD and DragonFlyBSD do not support malloc*size calls. - set(FLAVOURS 1;16;oe;check) + set(FLAVOURS fast;check) else() - set(FLAVOURS 1;16;oe;malloc;check) + set(FLAVOURS fast;check;malloc) endif() foreach(FLAVOUR ${FLAVOURS}) unset(SRC) @@ -309,58 +396,79 @@ if(NOT DEFINED SNMALLOC_ONLY_HEADER_LIBRARY) add_executable(${TESTNAME} ${SRC}) - # For all tests enable commit checking. - target_compile_definitions(${TESTNAME} PRIVATE -DUSE_POSIX_COMMIT_CHECKS) - - if (${FLAVOUR} EQUAL 16) - target_compile_definitions(${TESTNAME} PRIVATE SNMALLOC_USE_LARGE_CHUNKS) - endif() - if (${FLAVOUR} STREQUAL "oe") - oe_simulate(${TESTNAME}) + if(SNMALLOC_SANITIZER) + target_compile_options(${TESTNAME} PRIVATE -g -fsanitize=${SNMALLOC_SANITIZER} -fno-omit-frame-pointer) + target_link_libraries(${TESTNAME} -fsanitize=${SNMALLOC_SANITIZER}) endif() + + add_warning_flags(${TESTNAME}) + if (${FLAVOUR} STREQUAL "malloc") target_compile_definitions(${TESTNAME} PRIVATE SNMALLOC_PASS_THROUGH) endif() if (${FLAVOUR} STREQUAL "check") - target_compile_definitions(${TESTNAME} PRIVATE CHECK_CLIENT) - endif() - if(CONST_QUALIFIED_MALLOC_USABLE_SIZE) - target_compile_definitions(${TESTNAME} PRIVATE -DMALLOC_USABLE_SIZE_QUALIFIER=const) + target_compile_definitions(${TESTNAME} PRIVATE SNMALLOC_CHECK_CLIENT) endif() - target_link_libraries(${TESTNAME} snmalloc_lib) + target_link_libraries(${TESTNAME} snmalloc) + target_compile_definitions(${TESTNAME} PRIVATE "SNMALLOC_USE_${TEST_CLEANUP}") if (${TEST} MATCHES "release-.*") - message(STATUS "Adding test: ${TESTNAME} only for release configs") + message(VERBOSE "Adding test: ${TESTNAME} only for release configs") add_test(NAME ${TESTNAME} COMMAND ${TESTNAME} CONFIGURATIONS "Release") else() - message(STATUS "Adding test: ${TESTNAME}") + message(VERBOSE "Adding test: ${TESTNAME}") add_test(${TESTNAME} ${TESTNAME}) endif() if (${TEST_CATEGORY} MATCHES "perf") - message(STATUS "Single threaded test: ${TESTNAME}") + message(VERBOSE "Single threaded test: ${TESTNAME}") set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 4) endif() if(WIN32) # On Windows these tests use a lot of memory as it doesn't support # lazy commit. if (${TEST} MATCHES "two_alloc_types") - message(STATUS "Single threaded test: ${TESTNAME}") + message(VERBOSE "Single threaded test: ${TESTNAME}") set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 4) endif() if (${TEST} MATCHES "fixed_region") - message(STATUS "Single threaded test: ${TESTNAME}") + message(VERBOSE "Single threaded test: ${TESTNAME}") set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 4) endif() if (${TEST} MATCHES "memory") - message(STATUS "Single threaded test: ${TESTNAME}") + message(VERBOSE "Single threaded test: ${TESTNAME}") set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 4) endif() endif() - if (${TEST_CATEGORY} MATCHES "func") - target_compile_definitions(${TESTNAME} PRIVATE -DUSE_SNMALLOC_STATS) - endif () endforeach() endforeach() endforeach() clangformat_targets() endif() + +install(TARGETS snmalloc EXPORT snmallocConfig) + +install(TARGETS EXPORT snmallocConfig DESTINATION ${CMAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/snmalloc) + +install(DIRECTORY src/snmalloc/aal DESTINATION include/snmalloc) +install(DIRECTORY src/snmalloc/ds DESTINATION include/snmalloc) +install(DIRECTORY src/snmalloc/override DESTINATION include/snmalloc) +install(DIRECTORY src/snmalloc/backend DESTINATION include/snmalloc) +install(DIRECTORY src/snmalloc/mem DESTINATION include/snmalloc) +install(DIRECTORY src/snmalloc/pal DESTINATION include/snmalloc) +install(FILES + src/test/measuretime.h + src/test/opt.h + src/test/setup.h + src/test/usage.h + src/test/xoroshiro.h + DESTINATION include/snmalloc/test + ) +install(FILES src/snmalloc/snmalloc.h;src/snmalloc/snmalloc_core.h;src/snmalloc/snmalloc_front.h DESTINATION include/snmalloc) + +install(EXPORT snmallocConfig + FILE snmalloc-config.cmake + NAMESPACE snmalloc:: + DESTINATION "share/snmalloc" +) + diff --git a/README.md b/README.md index 96c6311d7..8dcd9d119 100644 --- a/README.md +++ b/README.md @@ -29,13 +29,24 @@ scenarios that can be problematic for other allocators: Both of these can cause massive reductions in performance of other allocators, but do not for snmalloc. -Comprehensive details about snmalloc's design can be found in the -[accompanying paper](snmalloc.pdf), and differences between the paper and the -current implementation are [described here](difference.md). -Since writing the paper, the performance of snmalloc has improved considerably. +The implementation of snmalloc has evolved significantly since the [initial paper](snmalloc.pdf). +The mechanism for returning memory to remote threads has remained, but most of the meta-data layout has changed. +We recommend you read [docs/security](./docs/security/README.md) to find out about the current design, and +if you want to dive into the code [docs/AddressSpace.md](./docs/AddressSpace.md) provides a good overview of the allocation and deallocation paths. [![snmalloc CI](https://github.com/microsoft/snmalloc/actions/workflows/main.yml/badge.svg?branch=master)](https://github.com/microsoft/snmalloc/actions/workflows/main.yml) +# Hardening + +There is a hardened version of snmalloc, it contains + +* Randomisation of the allocations' relative locations, +* Most meta-data is stored separately from allocations, and is protected with guard pages, +* All in-band meta-data is protected with a novel encoding that can detect corruption, and +* Provides a `memcpy` that automatically checks the bounds relative to the underlying malloc. + +A more comprehensive write up is in [docs/security](./docs/security/README.md). + # Further documentation - [Instructions for building snmalloc](docs/BUILDING.md) diff --git a/ci/Toolchain.cmake b/ci/Toolchain.cmake index e3e041c83..2b5613c47 100644 --- a/ci/Toolchain.cmake +++ b/ci/Toolchain.cmake @@ -7,7 +7,11 @@ set(CMAKE_C_COMPILER clang-13) set(CMAKE_C_COMPILER_TARGET ${triple}) set(CMAKE_CXX_COMPILER clang++-13) set(CMAKE_CXX_COMPILER_TARGET ${triple}) -set(CROSS_LINKER_FLAGS "-fuse-ld=lld -Wl,--dynamic-linker=/usr/${triple}/lib/$ENV{RTLD_NAME},-rpath,/usr/${triple}/lib") + +set(CROSS_LINKER_FLAGS "-fuse-ld=${SNMALLOC_LINKER_FLAVOUR} -Wl,--dynamic-linker=/usr/${triple}/lib/$ENV{RTLD_NAME},-rpath,/usr/${triple}/lib") +if((DEFINED SNMALLOC_LINKER) AND NOT ("${SNMALLOC_LINKER}" MATCHES "^$")) + string(APPEND CROSS_LINKER_FLAGS " --ld-path=${SNMALLOC_LINKER}") +endif() set(CMAKE_EXE_LINKER_FLAGS ${CROSS_LINKER_FLAGS}) set(CMAKE_SHARED_LINKER_FLAGS ${CROSS_LINKER_FLAGS}) set(CMAKE_MODULE_LINKER_FLAGS ${CROSS_LINKER_FLAGS}) diff --git a/difference.md b/difference.md deleted file mode 100644 index 273a74ca6..000000000 --- a/difference.md +++ /dev/null @@ -1,42 +0,0 @@ -# Difference from published paper - -This document outlines the changes that have diverged from -[the published paper](snmalloc.pdf) on `snmalloc`. - - 1. Link no longer terminates the bump-free list. The paper describes a - complex invariant for how the final element of the bump-free list can - also be the link node. - - We now have a much simpler invariant. The link is either 1, signifying - the block is completely full. Or not 1, signifying it has at least one - free element at the offset contained in link, and that contains the DLL - node for this sizeclass. - - The bump-free list contains additional free elements, and the remaining - bump allocated space. - - The value 1, is never a valid bump allocation value, as we initially - allocate the first entry as the link, so we can use 1 as the no more bump - space value. - - 2. Separate Bump/Free list. We have separate bump ptr and free list. This - is required to have a "fast free list" in each allocator for each - sizeclass. We bump allocate a whole os page (4KiB) worth of allocations - in one go, so that the CPU predicts the free list path for the fast - path. - - 3. Per allocator per sizeclass fast free list. Each allocator has an array - for each small size class that contains a free list of some elements for - that sizeclass. This enables a very compressed path for the common - allocation case. - - 4. We now store a direct pointer to the next element in each slabs free list - rather than a relative offset into the slab. This enables list - calculation on the fast path. - - 5. There is a single bump-ptr per size class that is part of the - allocator structure. The per size class slab list now only contains slabs - with free list, and not if it only has a bump ptr. - -[2-4] Are changes that are directly inspired by -(mimalloc)[http://github.com/microsoft/mimalloc]. \ No newline at end of file diff --git a/docs/AddressSpace.md b/docs/AddressSpace.md new file mode 100644 index 000000000..95f91ef43 --- /dev/null +++ b/docs/AddressSpace.md @@ -0,0 +1,157 @@ +# How snmalloc Manages Address Space + +Like any modern, high-performance allocator, `snmalloc` contains multiple layers of allocation. +We give here some notes on the internal orchestration. + +## From platform to malloc + +Consider a first, "small" allocation (typically less than a platform page); such allocations showcase more of the machinery. +For simplicity, we assume that + +- this is not an `OPEN_ENCLAVE` build, +- the `BackendAllocator` has not been told to use a `fixed_range`, +- this is not a `SNMALLOC_CHECK_CLIENT` build, and +- (as a consequence of the above) `SNMALLOC_META_PROTECTED` is not `#define`-d. + +Since this is the first allocation, all the internal caches will be empty, and so we will hit all the slow paths. +For simplicity, we gloss over much of the "lazy initialization" that would actually be implied by a first allocation. + +1. The `LocalAlloc::small_alloc` finds that it cannot satisfy the request because its `LocalCache` lacks a free list for this size class. + The request is delegated, unchanged, to `CoreAllocator::small_alloc`. + +2. The `CoreAllocator` has no active slab for this sizeclass, so `CoreAllocator::small_alloc_slow` delegates to `BackendAllocator::alloc_chunk`. + At this point, the allocation request is enlarged to one or a few chunks (a small counting number multiple of `MIN_CHUNK_SIZE`, which is typically 16KiB); see `sizeclass_to_slab_size`. + +3. `BackendAllocator::alloc_chunk` at this point splits the allocation request in two, allocating both the chunk's metadata structure (of size `PAGEMAP_METADATA_STRUCT_SIZE`) and the chunk itself (a multiple of `MIN_CHUNK_SIZE`). + Because the two exercise similar bits of machinery, we now track them in parallel in prose despite their sequential nature. + +4. The `BackendAllocator` has a chain of "range" types that it uses to manage address space. + By default (and in the case we are considering), that chain begins with a per-thread "small buddy allocator range". + + 1. For the metadata allocation, the size is (well) below `MIN_CHUNK_SIZE` and so this allocator, which by supposition is empty, attempts to `refill` itself from its parent. + This results in a request for a `MIN_CHUNK_SIZE` chunk from the parent allocator. + + 2. For the chunk allocation, the size is `MIN_CHUNK_SIZE` or larger, so this allocator immediately forwards the request to its parent. + +5. The next range allocator in the chain is a per-thread *large* buddy allocator that refills in 2 MiB granules. + (2 MiB chosen because it is a typical superpage size.) + At this point, both requests are for at least one and no more than a few times `MIN_CHUNK_SIZE` bytes. + + 1. The first request will `refill` this empty allocator by making a request for 2 MiB to its parent. + + 2. The second request will stop here, as the allocator will no longer be empty. + +6. The chain continues with a `CommitRange`, which simply forwards all allocation requests and (upon unwinding) ensures that the address space is mapped. + +7. The chain now transitions from thread-local to global; the `GlobalRange` simply serves to acquire a lock around the rest of the chain. + +8. The next entry in the chain is a `StatsRange` which serves to accumulate statistics. + We ignore this stage and continue onwards. + +9. The next entry in the chain is another *large* buddy allocator which refills at 16 MiB but can hold regions + of any size up to the entire address space. + The first request triggers a `refill`, continuing along the chain as a 16 MiB request. + (Recall that the second allocation will be handled at an earlier point on the chain.) + +10. The penultimate entry in the chain is a `PagemapRegisterRange`, which always forwards allocations along the chain. + +11. At long last, we have arrived at the last entry in the chain, a `PalRange`. + This delegates the actual allocation, of 16 MiB, to either the `reserve_aligned` or `reserve` method of the Platform Abstraction Layer (PAL). + +12. Having wound the chain onto our stack, we now unwind! + The `PagemapRegisterRange` ensures that the Pagemap entries for allocations passing through it are mapped and returns the allocation unaltered. + +13. The global large buddy allocator splits the 16 MiB refill into 8, 4, and 2 MiB regions it retains as well as returning the remaining 2 MiB back along the chain. + +14. The `StatsRange` makes its observations, the `GlobalRange` now unlocks the global component of the chain, and the `CommitRange` ensures that the allocation is mapped. + Aside from these side effects, these propagate the allocation along the chain unaltered. + +15. We now arrive back at the thread-local large buddy allocator, which takes its 2 MiB refill and breaks it down into powers of two down to the requested `MIN_CHUNK_SIZE`. + The second allocation (of the chunk), will either return or again break down one of these intermediate chunks. + +16. For the first (metadata) allocation, the thread-local *small* allocator breaks the `MIN_CHUNK_SIZE` allocation down into powers of two down to `PAGEMAP_METADATA_STRUCT_SIZE` and returns one of that size. + The second allocation will have been forwarded and so is not additionally handled here. + +Exciting, no? + +## What Can I Learn from the Pagemap? + +### Decoding a MetaEntry + +The centerpiece of `snmalloc`'s metadata is its `Pagemap`, which associates each "chunk" of the address space (~16KiB; see `MIN_CHUNK_BITS`) with a `MetaEntry`. +A `MetaEntry` is a pair of pointers, suggestively named `meta` and `remote_and_sizeclass`. +In more detail, `MetaEntry`s are better represented by Sigma and Pi types, all packed into two pointer-sized words in ways that preserve pointer provenance on CHERI. + +To begin decoding, a bit (`REMOTE_BACKEND_MARKER`) in `remote_and_sizeclass` distinguishes chunks owned by frontend and backend allocators. + +For chunks owned by the *frontend* (`REMOTE_BACKEND_MARKER` not asserted), + +1. The `remote_and_sizeclass` field is a product of + + 1. A `RemoteAllocator*` indicating the `LocalAlloc` that owns the region of memory. + + 2. A "full sizeclass" value (itself a tagged sum type between large and small sizeclasses). + +2. The `meta` pointer is a bit-stuffed pair of + + 1. A pointer to a larger metadata structure with type dependent on the role of this chunk + + 2. A bit (`META_BOUNDARY_BIT`) that serves to limit chunk coalescing on platforms where that may not be possible, such as CHERI. + +See `src/backend/metatypes.h` and `src/mem/metaslab.h`. + +For chunks owned by a *backend* (`REMOTE_BACKEND_MARKER` asserted), there are again multiple possibilities. + +For chunks owned by a *small buddy allocator*, the remainder of the `MetaEntry` is zero. +That is, it appears to have small sizeclass 0 and an implausible `RemoteAllocator*`. + +For chunks owned by a *large buddy allocator*, the `MetaEntry` is instead a node in a red-black tree of all such chunks. +Its contents can be decoded as follows: + +1. The `meta` field's `META_BOUNDARY_BIT` is preserved, with the same meaning as in the frontend case, above. + +2. `meta` (resp. `remote_and_sizeclass`) includes a pointer to the left (resp. right) *chunk* of address space. + (The corresponding child *node* in this tree is found by taking the *address* of this chunk and looking up the `MetaEntry` in the Pagemap. + This trick of pointing at the child's chunk rather than at the child `MetaEntry` is particularly useful on CHERI: + it allows us to capture the authority to the chunk without needing another pointer and costs just a shift and add.) + +3. The `meta` field's `LargeBuddyRep::RED_BIT` is used to carry the red/black color of this node. + +See `src/backend/largebuddyrange.h`. + +### Encoding a MetaEntry + +We can also consider the process for generating a MetaEntry for a chunk of the address space given its state. +The following cases apply: + +1. The address is not associated with `snmalloc`: + Here, the `MetaEntry`, if it is mapped, is all zeros and so it... + * has `REMOTE_BACKEND_MARKER` clear in `remote_and_sizeclass`. + * appears to be owned by a frontend RemoteAllocator at address 0 (probably, but not certainly, `nullptr`). + * has "small" sizeclass 0, which has size 0. + * has no associated metadata structure. + +2. The address is part of a free chunk in a backend's Large Buddy Allocator: + The `MetaEntry`... + * has `REMOTE_BACKEND_MARKER` asserted in `remote_and_sizeclass`. + * has "small" sizeclass 0, which has size 0. + * the remainder of its `MetaEntry` structure will be a Large Buddy Allocator rbtree node. + * has no associated metadata structure. + +3. The address is part of a free chunk inside a backend's Small Buddy Allocator: + Here, the `MetaEntry` is zero aside from the asserted `REMOTE_BACKEND_MARKER` bit, and so it... + * has "small" sizeclass 0, which has size 0. + * has no associated metadata structure. + +4. The address is part of a live large allocation (spanning one or more 16KiB chunks): + Here, the `MetaEntry`... + * has `REMOTE_BACKEND_MARKER` clear in `remote_and_sizeclass`. + * has a *large* sizeclass value. + * has an associated `RemoteAllocator*` and `Metaslab*` metadata structure + (holding just the original chunk pointer in its `MetaCommon` substructure; + it is configured to always trigger the deallocation slow-path to skip the logic when a chunk is in use as a slab). + +5. The address, whether or not it is presently within an allocated object, is part of an active slab. Here, the `MetaEntry`.... + * encodes the *small* sizeclass of all objects in the slab. + * has a `RemoteAllocator*` referencing the owning `LocalAlloc`'s message queue. + * points to the slab's `Metaslab` structure containing additional metadata (e.g., free list). diff --git a/docs/BUILDING.md b/docs/BUILDING.md index 0a550ffc6..849a4c839 100644 --- a/docs/BUILDING.md +++ b/docs/BUILDING.md @@ -110,8 +110,19 @@ target_link_libraries([lib_name] PRIVATE snmalloc_lib) You will also need to compile the relevant parts of snmalloc itself. Create a new file with the following contents and compile it with the rest of your application. ```c++ -#define NO_BOOTSTRAP_ALLOCATOR - #include "snmalloc/src/override/malloc.cc" #include "snmalloc/src/override/new.cc" ``` + +To enable the `reallocarray` symbol export, this can be added to your cmake command line. + +``` +-DSNMALLOC_NO_REALLOCARRAY=OFF +``` + +likewise for `reallocarr`. + +``` +-DSNMALLOC_NO_REALLOCARR=OFF +``` + diff --git a/docs/PORTING.md b/docs/PORTING.md index 8238bf1cc..0f1f2f0a6 100644 --- a/docs/PORTING.md +++ b/docs/PORTING.md @@ -16,22 +16,37 @@ The PAL must implement the following methods: ``` Report a fatal error and exit. +The memory that snmalloc is supplied from the Pal should be in one of three +states + +* `using` +* `using readonly` +* `not using` + +Before accessing the memory for a read, `snmalloc` will change the state to +either `using` or `using readonly`, +and before a write by it will change the state to `using`. +If memory is not required any more, then `snmalloc` will change the state to +`not using`, and will ensure that it notifies the `Pal` again +before it every accesses that memory again. +The `not using` state allows the `Pal` to recycle the memory for other purposes. +If `PalEnforceAccess` is set to true, then accessing that has not been notified +correctly should trigger an exception/segfault. + +The state for a particular region of memory is set with ```c++ static void notify_not_using(void* p, size_t size) noexcept; -``` -Notify the system that the range of memory from `p` to `p` + `size` is no -longer in use, allowing the underlying physical pages to recycled for other -purposes. -```c++ template static void notify_using(void* p, size_t size) noexcept; + +static void notify_using_readonly(void* p, size_t size) noexcept; ``` -Notify the system that the range of memory from `p` to `p` + `size` is now in use. -On systems that lazily provide physical memory to virtual mappings, this -function may not be required to do anything. -If the template parameter is set to `YesZero` then this function is also -responsible for ensuring that the newly requested memory is full of zeros. +These functions notify the system that the range of memory from `p` to `p` + +`size` is in the relevant state. + +If the template parameter is set to `YesZero` then `notify_using` must ensure +the range is full of zeros. ```c++ template @@ -45,17 +60,17 @@ efficient to request that the operating system provides background-zeroed pages, rather than zeroing them synchronously in this call ```c++ -template +template static void* reserve_aligned(size_t size) noexcept; -static std::pair reserve_at_least(size_t size) noexcept; +static void* reserve(size_t size) noexcept; ``` -Only one of these needs to be implemented, depending on whether the underlying -system can provide strongly aligned memory regions. -If the system guarantees only page alignment, implement the second. The Pal is -free to overallocate based on the platform's desire and snmalloc -will find suitably aligned blocks inside the region. `reserve_at_least` should -not commit memory as snmalloc will commit the range of memory it requires of what -is returned. +All platforms should provide `reserve` and can optionally provide +`reserve_aligned` if the underlying system can provide strongly aligned +memory regions. +If the system guarantees only page alignment, implement only the second. `snmalloc` will +overallocate to ensure it can find suitably aligned blocks inside the region. +`reserve` should consider memory initially as `not_using`, and `snmalloc` will notify when it +needs the range of memory. If the system provides strong alignment, implement the first to return memory at the desired alignment. If providing the first, then the `Pal` should also diff --git a/docs/StrictProvenance.md b/docs/StrictProvenance.md index b2b6317cd..0678700fc 100644 --- a/docs/StrictProvenance.md +++ b/docs/StrictProvenance.md @@ -1,229 +1,255 @@ # StrictProvenance Architectures -To aid support of novel architectures, such as CHERI, which explicitly track pointer *provenance* and *bounds*, `snmalloc` makes heavy use of a `CapPtr` wrapper type around `T*` values. -You can view the annotation `B` on a `CapPtr` as characterising the set of operations that are supported on this pointer, such as +## What is Strict Provenance? -* address arithmetic within a certain range (e.g, a `Superslab` chunk) +Some architectures, such as CHERI (including Arm's Morello), explicitly consider pointer *provenance* and *bounds* in addition to their target *addresses*. +Adding these considerations to the architecture enables software to constrain uses of *particular pointers* in ways that are not available with traditional protection mechanisms. +For example, while code may *have* a pointer that spans its entire C stack, it may construct a pointer that authorizes access only to a particular stack allocation (e.g., a buffer) and use this latter pointer while copying data. +Even if an attacker is able to control the length of the copy, the bounds imposed upon pointers involved can ensure that an overflow is impossible. +(On the other hand, if the attacker can influence both the *bounds* and the copy length, an overflow may still be possible; in practice, however, the two concerns are often sufficiently separated.) +For `malloc()` in particular, it is enormously beneficial to be able to impose bounds on returned pointers: it becomes impossible for allocator clients to use a pointer from `malloc()` to access adjacent allocations! +(*Temporal* concerns still apply, in that live allocations can overlap prior, now-dead allocations. +Stochastic defenses are employed within `snmalloc` and deterministic defenses are ongoing research at MSR.) + +Borrowing terminology from CHERI, we speak of the **authority** (to a subset of the address space) held by a pointer and will justify actions in terms of this authority.[^mmu-perms] +While many kinds of authority can be envisioned, herein we will mean either + +* *spatial* authority to read/write/execute within a single *interval* within the address space, or +* *vmem* authority to request modification of the virtual page mappings for a given range of addresses. + +We may **bound** the authority of a pointer, deriving a new pointer with a subset of its progenitor's authority; this is assumed to be an ambient action requiring no additional authority. +Dually, given two pointers, one with a subset of the other's authority, we may **amplify** the less-authorized, constructing a pointer with the same address but with increased authority (up to the held superset authority).[^amplifier-state] + +## snmalloc Support For Strict Provenance + +### Static Annotations With CapPtr + +To aid code auditing, `snmalloc` makes heavy use of a `CapPtr` wrapper type around `T*` values. +You can think of the annotation `B` on a `CapPtr` as capturing something about the role of the pointer, e.g.: + +* A pointer to a whole chunk or slab, derived from an internal `void*`. +* A pointer to a particular allocation, destined for the user program +* A putative pointer returned from the user program + +You can also view the annotation `B` as characterising the set of operations that are supported on this pointer, such as + +* nothing (because we haven't checked that it's actually a valid pointer) +* memory access within a certain range (e.g, a chunk or an allocation) * requesting manipulation of the virtual memory mappings -Most architectures and platforms cannot enforce these restrictions, but CHERI enables software to constrain its future use of particular pointers and `snmalloc` imposes strong constraints on its *client(s)* use of memory it manages. +Most architectures and platforms cannot enforce these restrictions outside of static constraints, but CHERI enables software to constrain its future use of particular pointers and `snmalloc` imposes strong constraints on its *client(s)* use of memory it manages. The remainder of this document... * gives a "quick start" guide, * provides a summary of the constraints imposed on clients, -* motivates and introduces the internal `ArenaMap` structure and the `capptr_amplify` function, and -* describes the `StrictProvenance` `capptr_*` functions provided by the Architecture Abstraction Layer (AAL) and Platform Abstraction Layer (PAL). +* describes the `StrictProvenance` `capptr_*` functions provided by `ds/ptrwrap.h`, the Architecture Abstraction Layer (AAL), and the Platform Abstraction Layer (PAL). -## Preface +### Limitations The `CapPtr` and `capptr_*` primitives and derived functions are intended to guide developers in useful directions; they are not security mechanisms in and of themselves. -For non-CHERI architectures, the whole edifice crumbles in the face of an overzealous `reinterpret_cast<>` or `unsafe_capptr` member access. -On CHERI, these are likely to elicit capability violations, but may not if all subsequent access happen to be within bounds. +For non-CHERI architectures, the whole edifice crumbles in the face of an overzealous `reinterpret_cast<>` or `unsafe_*ptr` call. +On CHERI, these are likely to elicit capability violations, but may not if all subsequent access happen to be within the architecturally-enforced bounds. ## Quick Start Guide -### How do I safely get an ordinary pointer to reveal to the client? - -If you are... +### What will I see? -* Adding an interface like `external_pointer`, and so you have a `CapPtr`, `e`, whose bounds you want to *inherit* when revealing some other `CapPtr` `p`, use `capptr_rebound(e, p)` to obtain another `CapPtr` with address from `p`, then go to the last step here. +In practice, `CapPtr` and the details of `B` overtly show themselves only in primitive operations or when polymorphism across `B` is required. +(Or, sadly, when looking at compilation errors or demangled names in a debugger.) +All the concrete forms we have found useful have layers of aliasing to keep the verbosity down: `capptr::B` is a `CapPtr` with `capptr::bounds::B` itself an alias for a `capptr::bound<...>` type-level object. +This trend of aliasing continues into higher-level abstractions, such as the freelist, wherein one finds, for example, `freelist::HeadPtr`, which expands to a type involving several `CapPtr`s and associated annotations. -* Otherwise, if your object is... +### How do I safely get an ordinary pointer to reveal to the client? - * an entire `SUPERSLAB_SIZE` chunk or bigger, you should have in hand a `CapPtr` from the large allocator. Use `capptr_export` to make a `CapPtr`, then use `capptr_chunk_is_alloc` to convert that to a `CapPtr`, and then proceed. (If, instead, you find yourself holding a `CapPtr`, use `capptr_chunk_from_chunkd` first.) +Almost all memory manipulated by `snmalloc` frontends comes via the backend's `alloc_chunk` method. +The returned a `capptr::Chunk`; this pointer is spatially bounded to the returned region (which is at least as big as requested). +This pointer is, however, not restricted in its ability to manipulate the address space within its bounds; this permits the frontend to call `mmap` and `madvise` on pages therein. - * of size `sz` and smaller than such a chunk, +To derive a pointer that is suitable for client use, we must - * and have a `CapPtr p` in hand, use `Aal::capptr_bound(p, sz)` to get a `CapPtr`, and then proceed. +* further spatially refine the pointer: adjust its offset with `pointer_offset` and use `capptr_bound` and +* shed address space control: use `PAL::capptr_to_user_address_control()` to convert `AllocFull` to `Alloc`. - * an have a `CapPtr p`, `CapPtr p`, or `CapPtr p` in hand, use `Aal::capptr_bound(p, sz)` to get a `CapPtr`, and then proceed. +If no additional spatial refinement is required, because the entire chunk is intended for client use, -* If the above steps left you with a `CapPtr`, apply any platform constraints for its export with `Pal::capptr_export(p)` to obtain a `CapPtr`. +* shed address space control: use `PAL::capptr_to_user_address_control()` to obtain a `ChunkUser`-bounded pointer, then +* use `capptr_chunk_is_alloc` to capture intent, converting `ChunkUser` to `Alloc` without architectural consequence. -* Use `capptr_reveal` to safely convert a `CapPtr` to a `T*` for the client. +At this point, we hold a `capptr::Alloc`; use `capptr_reveal()` to obtain the underlying `T*`. ### How do I safely ingest an ordinary pointer from the client? -For all its majesty, `CapPtr`'s coverage is merely an impediment to, rather than a complete defense against, malicious client behavior even on CHERI-enabled architectures. +First, we must admit that, for all its majesty, `CapPtr`'s coverage is merely an impediment to, rather than a complete defense against, malicious client behavior even on CHERI-enabled architectures. Further protection is an open research project at MSR. -Nevertheless, if adding a new kind of deallocation, we suggest following the existing flows: +Nevertheless, if adding a new kind of deallocation, we suggest following the existing flows when given a `void* p_raw` from the client: -* Begin by wrapping it with `CapPtr` and avoid using the raw `T*` thereafter. +* Begin by calling `p_wild = capptr_from_client(p_raw)` to annotate it as `AllocWild` and avoid using the raw form thereafter. -* An `CapPtr` can be obtained using `large_allocator.capptr_amplify()`. -Note that this pointer and its progeny are *unsafe* beyond merely having elevated authority: it is possible to construct and dereference pointers with types that do not match memory, resulting in **undefined behavior**. +* Check the `Wild` pointer for domestication with `p_tame = capptr_domesticate(state_ptr, p_wild)`; `p_tame` will be a `capptr::Alloc` and will alias `p_wild` or will be `nullptr`. + At this point, we have no more use for `p_wild`. -* Derive the `ChunkMapSuperslabKind` associated with the putative pointer from the client, by reading the `ChunkMap`. -In some flows, the client will have made a *claim* as to the size (class) of the object which may be tentatively used, but should be validated (unless the client is trusted). +* We may now probe the Pagemap; either `p_tame` is a pointer we have given out or `nullptr`, or this access may trap (especially on platforms where domestication is just a rubber stamp). + This will give us access to the associated `MetaEntry` and, in general, a (path to a) `Chunk`-bounded pointer to the entire backing region. -* Based on the above, for non-Large objects, `::get()` the appropriate header structure (`Superslab` or `Mediumslab`). +* If desired, we can now validate other attributes of the provided capability, including its length, base, and permissions. + In fact, we can even go further and *reconstruct* the capability we would have given out for the indicated allocation, allowing for exact comparison. Eventually we would like to reliably detect references to free objects as part of these flows, especially as frees can change the type of metadata found at the head of a chunk. When that is possible, we will add guidance that only reads of non-pointer scalar types are to be performed until after such tests have confirmed the object's liveness. Until then, we have stochastic defenses (e.g., `encode` in `src/mem/freelist.h`) later on. -As alluded to above, `capptr_rebound` can be used to ensure that pointers manipulated within `snmalloc` inherit bounds from client-provided pointers. -In the future, these derived pointers will inherit *temporal bounds* as well as the spatial ones described herein. - ### What happened to my cast operators? Because `CapPtr` are not the kinds of pointers C++ expects to manipulate, `static_cast<>` and `reinterpret_cast<>` are not applicable. Instead, `CapPtr` exposes `as_void()`, `template as_static()`, and `template as_reinterpret()` to perform `static_cast`, `static_cast`, and `reinterpret_cast` (respectively). Please use the first viable option from this list, reserving `reinterpret_cast` for more exciting circumstances. -## StrictProvenance in More Detail - -Tracking pointer *provenance* and *bounds* enables software to constrain uses of *particular pointers* in ways that are not available with traditional protection mechanisms. -For example, while code my *have* a pointer that spans its entire C stack, it may construct a pointer that authorizes access only to a particular stack allocation (e.g., a buffer) and use this latter pointer while copying data. -Even if an attacker is able to control the length of the copy, the bounds imposed upon pointers involved can ensure that an overflow is impossible. -(Of course, if the attacker can influence both the *bounds* and the copy length, an overflow may still be possible; in practice, however, the two concerns are often sufficiently separated.) -For `malloc()` in particular, it is enormously beneficial to be able to impose bounds on returned pointers: it becomes impossible for allocator clients to use a pointer from `malloc()` to access adjacent allocations! - -Borrowing terminology from CHERI, we speak of the **authority** (to a subset of the address space) held by a pointer and will justify actions in terms of this authority. -While many kinds of authority can be envisioned, herein we will mean either - -* *spatial* authority to read/write/execute within a single *interval* within the address space, or -* *vmmap* authority to request modification of the virtual page mappings for a given range of addresses. - -We may **bound** the authority of a pointer, deriving a new pointer with a subset of its progenitor's authority; this is assumed to be an ambient action requiring no additional authority. -Dually, given two pointers, one with a subset of the other's authority, we may **amplify** the less-authorized, constructing a pointer with the same address but with increased authority (up to the held superset authority).[^amplifier-state] - ## Constraints Imposed Upon Allocations `snmalloc` ensures that returned pointers are bounded to no more than the slab entry used to back each allocation. +That is, **no two live allocations will have overlapping bounds**. It may be useful, mostly for debugging, to more precisely bound returned pointers to the actual allocation size,[^bounds-precision] but this is not required for security. -The pointers returned from `alloc()` will be stripped of their *vmmap* authority, if supported by the platform, ensuring that clients cannot manipulate the page mapping underlying `snmalloc`'s address space. +The pointers returned from `alloc()` will also be stripped of their *vmem* authority, if supported by the platform, ensuring that clients cannot manipulate the page mapping underlying `snmalloc`'s address space. `realloc()`-ation has several policies that may be sensible. -We choose a fairly simple one for the moment: resizing in ways that do not change the backing allocation's `snmalloc` size class are left in place, while any change to the size class triggers an allocate-copy-deallocate sequence. -Even if `realloc()` leaves the object in place, the returned pointer should have its authority bounded as if this were a new allocation (and so may have less authority than `realloc()`'s pointer argument if sub-slab-entry bounds are being applied). -(Notably, this policy is compatible with the existence of size-parameterized deallocation functions: the result of `realloc()` is always associated with the size class corresponding to the requested size. -By contrast, shrinking in place in ways that changed the size class would require tracking the largest size ever associated with the allocation.) +As a holdover from snmalloc v1, we have a fairly simple policy: resizing in ways that do not change the backing allocation's `snmalloc` size class are left in place, while any change to the size class triggers an allocate-copy-deallocate sequence. +Even if `realloc()` leaves the object in place, the returned pointer should have its authority bounded as if this were a new allocation (and so the result may be a subset of the input to `realloc()`). -## Impact of Constraints On Deallocation, or Introducing the ArenaMap +Because snmalloc v2 no longer benefits from being provided the size of an allocated object (in, for example, dealloc), we may wish to adopt policies that allow objects to shrink in place beyond the lower bound of their sizeclass. -Strict provenance and bounded returns from `alloc()` imply that we cannot expect things like +## Impact of Constraints On Deallocation -```c++ -void dealloc(void *p) -{ - Superslab *super = Superslab::get(p); - ... super->foo ... -} -``` +Previous editions of `snmalloc` stored metadata at "superslab" boundaries in the address space and relied on address arithmetic to map from small allocations to their associated metadata. +These operations relied on being able to take pointers out of bounds, and so posed challenges for `StrictProvenance` architectures. +The current edition of `snmalloc` instead follows pointers (starting from TLS or global roots), using address arithmetic only to derive indicies into these metadata pointers. -to work (using the existing `Superslab::get()` implementation). -Architecturally, `dealloc` is no different from any *allocator client* code and `Superslab::get()` is merely some pointer math. -As such, `Superslab::get()` must either fail to construct its return value (e.g., by trapping) or construct a useless return value (e.g., one that traps on dereference). -To proceed, we must take advantage of the fact that `snmalloc` has separate authority to the memory underlying its allocations. +When the allocator client returns memory (or otherwise refers to an allocation), we will be careful to use the *lower bound* address, not the indicated address per se, for looking up the allocation. +The indicated address may be out of bounds, while `StrictProvenance` architectures should ensure that bounds are monotonically non-increasing, and so either -Ultimately, all address space manipulated by `snmalloc` comes from its Platform's primitive allocator. -An **arena** is a region returned by that provider. -The `AddressSpaceManager` divides arenas into large allocations and manages their life cycles. -On `StrictProvenance` architectures, the ASM further maintains a map of all PAL-provided memory, called the `ArenaMap`, and uses this to implement `capptr_amplify`, copying the address of a low-authority pointer into a copy of the high-authority pointer provided by the PAL. -The resulting pointer can then be used much as on non-`StrictProvenance` architectures, with integer arithmetic being used to make it point anywhere within an arena. -`snmalloc`'s heap layouts ensure that metadata associated with any object are spread across globals and within the same arena as the object itself, and so, assuming access to globals as given, a single amplification suffices. +* the lower bound will always be within the original allocation. +* the pointer provided by the user will have zero length. + +If we must detach an address from the pointer, as in deallocation, we will generally reject zero-length pointers, as if they were nullptr. + +At the moment, **we permit a pointer to any part of an object to deallocate that object**. +`snmalloc`'s design ensures that we will not create a new, free "object" at an interior pointer but will, instead, always be able to find the beginning and end of the object in question. +In the future we are likely to be more strict, requiring authority to *the entire object* or at least *its lowest-address pointer-sized word* to free it. ## Object Lookup `snmalloc` extends the traditional allocator interface with the `template void* external_pointer(void*)` family of functions, which generate additional pointers to live allocations. -To ensure that this function is not used as an amplification oracle, it must construct a return pointer with the same validity as its input even as it internally amplifies to access metadata; see `capptr_rebound`. - -XXX It may be worth requiring that the input pointer authorize the entire object? -What are the desired security properties here? +To ensure that this function is not used as an amplification oracle, it must construct a return pointer with the same validity as its input even as it internally accesses metadata. +We make `external_pointer` use `pointer_offset` on the user-provided pointer, ensuring that the result has no more authority than the client already held. # Adapting the Implementation ## Design Overview -As mentioned, the `AddressSpaceManager` maintains an `ArenaMap`, a cache of pointers that span the entire heap managed by `snmalloc`. -To keep this cache small, we request very large swaths (GiB-scale on >48-bit ASes) of address space at a time, even if we only populate those regions very slowly. - -Within `snmalloc`, there are several data structures that hold free memory: - -* the `LargeAlloc` holds all regions too big to be managed by `MediumSlab`s - -* `MediumSlab`s hold free lists +For the majority of operations, no `StrictProvenance`-specific reasoning, beyond applying bounds, need be entertained. +However, as regions of memory move out of (and back into) the allocator's client and fast free lists, care must be taken to recover (and preserve) the internal, *vmem*-authorizing pointers from the user's much more tightly bounded pointers. -* `Slab`s hold free lists. +We store these internal pointers inside metadata, at different locations for each state: -* `Slab`s have associated "bump pointer" regions of address space not yet used (facilitating lazy construction of free lists) +* For free chunks in the "large `Range`" `Pipe`, we expect the `Range` objects themselves to work with these pointers. + In practice, these ranges place these pointers in global state or the `Pagemap` `MetaEntry`s directly. -* `Alloc`s themselves also hold, per small size class, up to one free list and up to one bump pointer (so that the complexity of `Slab` manipulation is amortized across many allocations) +* Once outside the "large `Range`" `Pipe`, chunks holding heap objects will have a `SlabMetadata` structure associated with them, and we can store these high-authority pointers therein. + (Specifically, the `StrictProvenanceBackendSlabMetadata` class adds an `arena` member to the `SlabMetadata` in use.) -* `Alloc`s have or point to `RemoteAllocator`s, which contain queues of `Remote` objects formed from deallocated memory. - -* `Alloc`s have `RemoteCaches` that also hold `Remote`s. +* Metadata chunks themselves, however, do not have `SlabMetadata` structures and are managed using a "small `Range`" `Pipe`. + These require special handling, considered below. +Within each (data) slab, there is (at least) one free list of objects. We take the position that free list entries should be suitable for return, i.e., with authority bounded to their backing slab entry. (However, the *contents* of free memory may be dangerous to expose to the user and require clearing prior to handing out.) -This means that allocation fast paths are unaffected by the requirement to bound return pointers, but that deallocation paths may need to amplify twice, once on receipt of the pointer from the application and again on receipt of the pointer from another `Allocator` through the `Remote` mechanism. ## Static Pointer Bound Taxonomy -At the moment, we introduce six possible annotations, though the taxonomy is imperfect: +We introduce a multi-dimensional space of bounds. The facets are `enum class`-es in `snmalloc::capptr::dimension`. + +* `Spatial` captures the intended spatial extent / role of the pointer: `Alloc`-ation, `Chunk`, or an entire `Arena`. + +* `AddressSpaceControl` captures whether the pointer conveys control of its address space. -* bounded only to an underlying arena without platform constraints, `CBArena`; -* bounded to a `SUPERSLAB_SIZE` or larger chunk without platform constraints, `CBChunk`; -* bounded to a `SUPERSLAB_SIZE` or larger chunk with platform constraints, `CBChunkE`; -* bounded *on debug builds* to a `SUPERSLAB_SIZE` or larger chunk without platform constraints, `CBChunkD`; -* bounded to an allocation but without platform constraints yet applied, `CBAlloc`; -* bounded to an allocation and with platform constraints, `CBAllocE`; +* `Wildness` captures whether the pointer has been checked to belong to this allocator. -By "platform constraints" we mean, for example, CheriBSD's ability to remove the authority to manage the VM mappings underlying a pointer. -Clients of malloc have no business attempting to manage the backing pages. +These `dimension`s are composited using a `capptr::bound<>` type that we use as `B` in `CapPtr`. +This is enforced (loosely) using the `IsBound` C++20 concept. -In practice, we use the pair of the type `T` and the bounds annotation for additional light-weight verification. -For example, we differentiate `CapPtr` from `CapPtr`, with the former being offset (if cache-friendly offsets are in effect) and the latter almost always pointing to the start of the object. -While it is possible to write code which subverts the annotation scheme, in general method signatures should provide the correct affordance. +The namespace `snmalloc::capptr::bounds` contains particular points in the space of `capptr::bound<>` types: + +* bounded to a large region of the address space with address space control, `Arena`; +* bounded to at least `MIN_CHUNK_SIZE` bytes with address space control, `Chunk`; +* bounded to at least `MIN_CHUNK_SIZE` bytes without address space control, `ChunkUser`; +* bounded to a smaller region but with address space control, `AllocFull`; +* bounded to a smaller region and without address space control, `Alloc`; +* unverified but presumed to be to an `Alloc`-ation, `AllocWild`. ## Primitive Architectural Operations Several new functions are introduced to AALs to capture primitives of the Architecture. -* `CapPtr capptr_bound(CapPtr a, size_t sz)` - spatially bounds the pointer `a` to have authority ranging only from its current target to its current target plus `sz` bytes (which must be within `a`'s authority). +* `CapPtr capptr_bound(CapPtr a, size_t sz)` spatially bounds the pointer `a` to have authority ranging only from its current target to its current target plus `sz` bytes (which must be within `a`'s authority). No imprecision in authority is permitted. - The `obounds` annotation is required to be either strictly higher authority than `CBAlloc` or `CBChunkE`, and the bounds annotations must obey `capptr_is_bounds_refinement`. + The bounds annotations must obey `capptr_is_spatial_refinement`: the spatial dimension may change, but the others must be constant. -* `CapPtr capptr_rebound(CapPtr a, CapPtr p)` is the *architectural primitive* enabling the software amplification mechanism. - It combines the authority of `a` and the current target of `p`. - The result may be safely dereferenced iff `a` authorizes access to `p`'s target. - The simplest sufficient (but not necessary) condition to ensure safety is that authority of `a` is a superset of the authority of `p` and `p` points within its authority. +Ultimately, all address space manipulated by `snmalloc` comes from its Platform's primitive allocator. +An **arena** is a region returned by that provider. +The "large `Range`" `Pipe` serves to carve up `Arena`s; `Arena` pointers become `Chunk`s in the `backend`'s `alloc_chunk`. +`snmalloc`'s (new, as of `snmalloc2`) heap layouts ensure that metadata associated with any object are reachable through globals, meaning no explicit amplification is required. ## Primitive Platform Operations -* `CapPtr capptr_export(CapPtr f)` applies any additional platform constraints required before handing permissions out to the client. -On CheriBSD, specifically, this strips the `VMMAP` software permission, ensuring that clients cannot have the kernel manipulate heap pages. +* `CapPtr capptr_to_user_address_control(CapPtr f)` sheds authority over the address space from the `CapPtr`, on platforms where that is possible. +On CheriBSD, specifically, this strips the `VMAP` software permission, ensuring that clients cannot have the kernel manipulate heap pages. +The annotation `Bout` is *computed* as a function of `Bin`. In future architectures, this is increasingly likely to be a no-op. -The annotation `BO` is *computed* as a function of `BI`, which must be `CBChunk` or `CBAlloc`. + +## Backend-Provided Operations + +* `CapPtr capptr_domesticate(LocalState *, CapPtr ptr)` allows the backend to test whether `ptr` is sensible, by some definition thereof. +The annotation `Bout` is *computed* as a function of `Bin`. +`Bin` is required to be `Wild`, and `Bout` is `Tame` but otherwise identical. ## Constructed Operators -* `capptr_bound_chunkd` and `capptr_chunk_from_chunkd` manage the construction and elimination of `CapPtr` pointers. +* `capptr_from_client` wraps a `void *` as a `capptr::AllocWild` to mark it as unverified. + +* `capptr_chunk_is_alloc` converts a `capptr::ChunkUser` to a `capptr::Alloc` without computational effect; it is intended to ease auditing. + +* `capptr_reveal` converts a `capptr::Alloc` to a `void*`, annotating where we mean to return a pointer to the user. -* `capptr_chunk_is_alloc` converts a `CapPtr` to a `CapPtr` unsafely; it is intended to ease auditing. +* `capptr_reveal_wild` converts a `capptr::AllocWild` to a `void*`, annotating where we mean to return a *wild* pointer to the user (in `external_pointer`, e.g., where the result is just an offset of the user's pointer). -* `capptr_reveal` converts a `CapPtr` to a `void*`. +## Metadata Bounds Handling -## Amplification +We presently envision three policies for handling metadata: -The `AddressSpaceManager` now exposes a method with signature `CapPtr capptr_amplify(CapPtr p)` which uses `capptr_rebound` to construct a pointer targeting `p`'s target but bearing the authority of the primordial allocation granule (as provided by the kernel) containing this address. -This pointer can be used to reach the `Allocslab` metadata associated with `p` (and a good bit more, besides!). +1. Metadata kept within `snmalloc` is always `Arena`-bounded; metadata handed to the user (the `Allocator`s themselves) are exactly bounded as `Alloc`. + Recycling internal metadata needs no amplification, and, as `Allocator`s are never deallocated, there is never a need to amplify an `Allocator*` back to a `Chunk`- or `Arena`-bounded pointer. + +2. All metadata is exactly bounded as `Alloc`. + Here, the "small `Range`" `Pipe` will require the ability to amplify back to `Chunk`- or `Arena`-bounded pointers when internal metadata is recycled. + We believe this is straightforwardly possible using a "provenance capturing `Range`" above the outermost `Range` in the small `Range` `Pipe`. + +3. No metadata is handed to the user; instead, opaque handles to `Allocator`s are given out. + (CHERI sealed capabilities are an excellent candidate for such handles, but are beyond the scope of this document.) + Here, it is no longer essential to bound any metadata pointers at all, though we may find it useful as a defence in depth. + + +**At the time of this writing, policy 1 is in effect**; pointers to `Allocator`s are bounded only at the periphery of `snmalloc`. # Endnotes -[^mmu-perms] Pointer authority generally *intersects* with MMU-based authorization. +[^mmu-perms]: Pointer authority generally *intersects* with MMU-based authorization. For example, software using a pointer with both write and execute authority will still find that it cannot write to pages considered read-only by the MMU nor will it be able to execute non-executable pages. -Generally speaking, `snmalloc` requires only read-write access to memory it manages and merely passes through other permissions, with the exception of *vmmap*, which it removes from any pointer it returns. +Generally speaking, `snmalloc` requires only read-write access to memory it manages and merely passes through other permissions, with the exception of *vmem*, which it removes from any pointer it returns. -[^amplifier-state] As we are largely following the fat pointer model and its evolution into CHERI capabilities, we achieve amplification through a *stateful*, *software* mechanism, rather than an architectural mechanism. +[^amplifier-state]: As we are largely following the fat pointer model and its evolution into CHERI capabilities, we achieve amplification through a *stateful*, *software* mechanism, rather than an architectural mechanism. Specifically, the amplification mechanism will retain a superset of any authority it may be asked to reconstruct. There have, in times past, been capability systems with architectural amplification (e.g., HYDRA's type-directed amplification), but we believe that future systems are unlikely to adopt this latter approach, necessitating the changes we propose below. -[^bounds-precision] `StrictProvenance` architectures have historically differed in the precision with which authority can be represented. +[^bounds-precision]: `StrictProvenance` architectures have historically differed in the precision with which authority can be represented. Notably, it may not be possible to achieve byte-granular authority boundaries at every size scale. In the case of CHERI specifically, `snmalloc`'s size classes and its alignment policies are already much coarser than existing architectural requirements for representable authority on all existing implementations. - - diff --git a/docs/security/FreelistProtection.md b/docs/security/FreelistProtection.md new file mode 100644 index 000000000..9263100b3 --- /dev/null +++ b/docs/security/FreelistProtection.md @@ -0,0 +1,130 @@ +# Protecting meta-data + +Corrupting an allocator's meta-data is a common pattern for increasing the power of a use-after-free or out-of-bounds write vulnerabilities. +If you can corrupt the allocator's meta-data, then you can take a control gadget in one part of a system, and use it to affect other parts of the system. +There are various approaches to protecting allocator meta-data, the most common are: + +* make the allocator meta-data hard to find through randomisation +* use completely separate ranges of memory for meta-data and allocations +* surround meta-data with guard pages +* add some level of encryption/checksuming + +With the refactoring of the page table ([described earlier](./VariableSizedChunks.md)), we can put all the slab meta-data in completely separate regions of memory to the allocations. +We maintain this separation over time, and never allow memory that has been used for allocations to become meta-data and vice versa. +Within the meta-data regions, we add randomisation to make the data hard to find, and add large guard regions around the meta-data. +By using completely separate regions of memory for allocations and meta-data we ensure that no dangling allocation can refer to current meta-data. +This is particularly important for CHERI as it means a UAF can be used to corrupt allocator meta-data. + +But there is one super important bit that still remains: free lists. + +## What are free lists? + +Many allocators chain together unused allocations into a linked list. +This is remarkably space efficient, as it doesn't require meta-data proportional to the number of allocations on a slab. +The disused objects can be used in either a linked stack or queue. +However, the key problem is neither randomisation or guard pages can be used to protect this _in-band_ meta-data. + +In snmalloc, we have introduced a novel technique for protecting this data. + +## Protecting a free queue. + +The idea is remarkably simple: a doubly linked list is far harder to corrupt than a single linked list, because you can check its invariant: +``` + x.next.prev == x +``` +In every kind of free list in snmalloc, we encode both the forward and backward pointers in our lists. +For the forward direction, we use an [involution](https://en.wikipedia.org/wiki/Involution_(mathematics)), `f`, such as XORing a randomly choosen value: +``` + f(a) = a XOR k0 +``` +For the backward direction, we use a more complex, two-argument function +``` + g(a, b) = (a XOR k1) * (b XOR k2) +``` +where `k1` and `k2` are two randomly chosen 64 bit values. +The encoded back pointer of the node after `x` in the list is `g(x, f(x.next))`, which gives a value that is hard to forge and still encodes the back edge relationship. + +As we build the list, we add this value to the disused object, and when we consume the free list later, we check the value is correct. +Importantly, the order of construction and consumption have to be the same, which means we can only use queues, and not stacks. + +The checks give us a way to detect that the list has not been corrupted. +In particular, use-after-free or out-of-bounds writes to either the `next` or `prev` value are highly likely to be detected later. + +## Double free protection + +This encoding also provides a great double free protection. +If you free twice, it will corrupt the `prev` pointer, and thus when we come to reallocate that object later, we will detect the double free. +The following animation shows the effect of a double free: + +![Double free protection example](./data/doublefreeprotection.gif) + +This is a weak protection as it is lazy, in that only when the object is reused will snmalloc raise an error, so a `malloc` can fail due to double free, but we are only aiming to make exploits harder; this is not a bug finding tool. + + +## Where do we use this? + +Everywhere we link disused objects, so (1) per-slab free queues and (2) per-allocator message queues for returning freed allocations to other threads. +Originally, snmalloc used queues for returning memory to other threads. +We had to refactor the per slab free lists to be queues rather than stacks, but that is fairly straightforward. +The code for the free lists can be found here: + +[Code](https://github.com/microsoft/snmalloc/blob/main/src/snmalloc/mem/freelist.h) + +The idea could easily be applied to other allocators, and we're happy to discuss this. + +## Finished assembly + +So let's look at what costs we incur from this. +There are bits that are added to both creating the queues, and taking elements from the queues. +Here we show the assembly for taking from a per-slab free list, which is integrated into the fast path of allocation: +```x86asm +: + lea rax,[rdi-0x1] # Check for small size class + cmp rax,0xdfff # | zero is considered a large size + ja SLOW_SIZE # | to remove from fast path. + shr rax,0x4 # Lookup size class in table + lea rcx,[size_table] # | + movzx edx,BYTE PTR [rax+rcx*1] # | + mov rdi,rdx #+Caclulate index into free lists + shl rdi,0x4 #+| (without checks this is a shift by + # | 0x3, and can be fused into an lea) + mov r8,QWORD PTR [rip+0xab9b] # Find thread local allocator state + mov rcx,QWORD PTR fs:0x0 # | + add rcx,r8 # | + add rcx,rdi # Load head of free list for size class + mov rax,QWORD PTR fs:[r8+rdi*1] # | + test rax,rax # Check if free list is empty + je SLOW_PATH_REFILL # | + mov rsi,QWORD PTR fs:0x0 # Calculate location of free list structure + add rsi,r8 # | rsi = fs:[r8] + mov rdx,QWORD PTR fs:[r8+0x2e8] #+Load next pointer key + xor rdx,QWORD PTR [rax] # Load next pointer + prefetcht0 BYTE PTR [rdx] # Prefetch next object + mov QWORD PTR [rcx],rdx # Update head of free list + mov rcx,QWORD PTR [rax+0x8] #+Check signed_prev value is correct + cmp rcx,QWORD PTR fs:[r8+rdi*1+0x8] #+| + jne CORRUPTION_ERROR #+| + lea rcx,[rdi+rsi*1] #+Calculate signed_prev location + add rcx,0x8 #+| rcx = fs:[r8+rdi*1+0x8] + mov rsi,QWORD PTR fs:[r8+0x2d8] #+Calculate next signed_prev value + add rsi,rax #+| + add rdx,QWORD PTR fs:[r8+0x2e0] #+| + imul rdx,rsi #+| + mov QWORD PTR [rcx],rdx #+Store signed_prev for next entry. + ret +``` +The extra instructions specific to handling the checks are marked with `+`. +As you can see the fast path is about twice the length of the fast path without protection, but only adds a single branch to the fast path, one multiplication, five additional loads, and one store. +The loads only involve one additional cache line for key material. +Overall, the cost is surprisingly low. + +Note: the free list header now contains the value that `prev` should contain, which leads to slightly worse x86 codegen. +For instance the checks introduce `shl rdi,0x4`, which was previously fused with an `lea` instruction without the checks. + +## Conclusion + +This approach provides a strong defense against corruption of the free lists used in snmalloc. +This means all inline meta-data has corruption detection. +The check is remarkably simple for building double free detection, and has far lower memory overhead compared to using an allocation bitmap. + +[Next we show how to randomise the layout of memory in snmalloc, and thus make it harder to guess relative address of a pair of allocations.](./Randomisation.md) diff --git a/docs/security/GuardedMemcpy.md b/docs/security/GuardedMemcpy.md new file mode 100644 index 000000000..a871042b8 --- /dev/null +++ b/docs/security/GuardedMemcpy.md @@ -0,0 +1,151 @@ +# Providing a guarded memcpy + +Out of bounds errors are a serious problem for systems. +We did some analysis of the Microsoft Security Response Center data to look at the out-of-bounds heap corruption, and found a common culprit: `memcpy`. +Of the OOB writes that were categorised as leading to remote code execution (RCE), 1/3 of them had a block copy operation like memcpy as the initial source of corruption. +This makes any mitigation to `memcpy` extremely high-value. + +Now, if a `memcpy` crosses a boundary of a `malloc` allocation, then we have a well-defined error in the semantics of the program. +No sensible program should do this. +So let's see how we detect this with snmalloc. + + +## What is `memcpy`? + +So `memcpy(src, dst, len)` copies `len` bytes from `src` to `dst`. +For this to be valid, we can check: +``` + if (src is managed by snmalloc) + check(remaining_bytes(src) >= len) + if (dst is managed by snmalloc) + check(remaining_bytes(dst) >= len) +``` +Now, the first `if` is checking for reading beyond the end of the object, and the second is checking for writing beyond the end of the destination object. +By default, for release checks we only check the `dst` is big enough. + + +## How can we implement `remaining_bytes`? + +In the previous [page](./VariableSizedChunks.md), we discussed how we enable variable sized slabs. +Let's consider how that representation enables us to quickly find the start/end of any object. + +All slab sizes are powers of two, and a given slab's lowest address will be naturally aligned for the slab's size. +(For brevity, slabs are sometimes said to be "naturally aligned (at) powers of two".) +That is if `x` is the start of a slab of size `2^n`, then `x % (2^n) == 0`. +This means that a single mask can be used to find the offset into a slab. +As the objects are layed out continguously, we can also get the offset in the object with a modulus operations, that is, `remaining_bytes(p)` is effectively: +``` + object_size - ((p % slab_size) % object_size) +``` + +Well, as anyone will tell you, division/modulus on a fast path is a non-starter. +The first modulus is easy to deal with, we can replace `% slab_size` with a bit-wise mask. +However, as `object_size` can be non-power-of-two values, we need to work a little harder. + +## Reciprocal division to the rescue + +When you have a finite domain, you can lower divisions into a multiply and shift. +By pre-calculating `c = (((2^n) - 1)/size) + 1`, the division `x / size` can instead be computed by +``` + (x * c) >> n +``` +The choice of `n` has to be done carefully for the possible values of `x`, but with a large enough `n` we can make this work for all slab offsets and sizes. + +Now from division, we can calculate the modulus, by multiplying the result of the division +by the size, and then subtracting the result from the original value: +``` + x - (((x * c) >> n) * size) +``` +and thus `remaining_bytes(x)` is: +``` + (((x * c) >> n) * size) + size - x +``` + +There is a great article that explains this in more detail by [Daniel Lemire](https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/). + +Making sure you have everything correct is tricky, but thankfully computers are fast enough to check all possilities. +In snmalloc, we have a test program that verifies, for all possible slab offsets and all object sizes, that our optimised result is equivalent to the original modulus. + +We build the set of constants per sizeclass using `constexpr`, which enables us to determine the end of an object in a handful of instructions. + +## Non-snmalloc memory. + +The `memcpy` function is not just called on memory that is received from `malloc`. +This means we need our lookup to work on all memory, and in the case where it is not managed by `snmalloc` to assume it is correct. +We ensure that the `0` value in the chunk map is interpreted as an object covering the whole of the address space. +This works for compatibility. + +To achieve this nicely, we map 0 to a slab that covers the whole of address space, and consider there to be single object in this space. +This works by setting the reciprocal constant to 0, and then the division term is always zero. + +There is a second complication: `memcpy` can be called before `snmalloc` has been initialised. +So we need a check for this case. + +## Finished Assembly + +The finished assembly for checking the destination length in `memcpy` is: + +```x86asm +: + mov rax,QWORD PTR [rip+0xbfa] # Load Chunk map base + test rax,rax # Check if chunk map is initialised + je DONE # | + mov rcx,rdi # Get chunk map entry + shr rcx,0xa # | + and rcx,0xfffffffffffffff0 # | + mov rax,QWORD PTR [rax+rcx*1+0x8] # Load sizeclass + and eax,0x7f # | + shl rax,0x5 # | + lea r8,[sizeclass_meta_data] # | + mov rcx,QWORD PTR [rax+r8*1] # Load object size + mov r9,QWORD PTR [rax+r8*1+0x8] # Load slab mask + and r9,rdi # Offset within slab + mov rax,QWORD PTR [rax+r8*1+0x10] # Load modulus constant + imul rax,r9 # Perform recripocal modulus + shr rax,0x36 # | + imul rax,rcx # | + sub rcx,r9 # Find distance to end of object. + add rcx,rax # | + cmp rax,rdx # Compare to length of memcpy. + jb ERROR # | +DONE: + jmp +ERROR: + ud2 # Trap +``` + +## Performance + +We measured the overhead of adding checks to various sizes of `memcpy`s. +We did a batch of 1000 `memcpy`s, and measured the time with and without checks. +The benchmark code can be found here: [Benchmark Code](../../src/test/perf/memcpy/) + +![Performance graphs](./data/memcpy_perf.png) + +As you can see, the overhead for small copies can be significant (60% on a single byte `memcpy`), but the overhead rapidly drops and is mostly in the noise once you hit 128 bytes. + +When we actually apply this to more realistic examples, we can see a small overhead, which for many examples is not significant. +We compared snmalloc (`libsnmallocshim.so`) to snmalloc with just the checks enabled for bounds of the destination of the `memcpy` (`libsnmallocshim-checks-memcpy-only`) on the applications contained in mimalloc-bench. +The results of this comparison are in the following graph: + +![Performance Graphs](./data/perfgraph-memcpy-only.png) + +The worst regression is for `redis` with a 2-3% regression relative to snmalloc running without memcpy checks. +However, given that we this benchmark runs 20% faster than jemalloc, we believe the feature is able to be switched on for production workloads. + +## Conclusion + +We have an efficient check we can add to any block memory operation to prevent corruption. +The cost on small allocations will be higher due to the number of arithmetic instructions, but as the objects grow the overhead diminishes. +The memory overhead for adding checks is almost zero as all the dynamic meta-data was already required by snmalloc to understand the memory layout, and the small cost for lookup tables in the binary is negligible. + +The idea can easily be applied to other block operations in libc, we have just done `memcpy` as a proof of concept. +If the feature is tightly coupled with libc, then an initialisation check could also be removed improving the performance. + +[Next, we look at how to defend the internal structures of snmalloc against corruption due to memory safety violations.](./FreelistProtection.md) + + +# Thanks + +The research behind this has involved a lot of discussions with a lot of people. +We are particularly grateful to Andrew Paverd, Joe Bialek, Matt Miller, Mike Macelletti, Rohit Mothe, Saar Amar and Swamy Nagaraju for countless discussions on guarded memcpy, its possible implementations and applications. diff --git a/docs/security/README.md b/docs/security/README.md new file mode 100644 index 000000000..8789009e5 --- /dev/null +++ b/docs/security/README.md @@ -0,0 +1,39 @@ +# Hardening snmalloc + +The key focus of the 0.6.0 release of snmalloc is security. +This was inspired by a few different things coming together. + +First, we had been discussing with the Microsoft Security Response various research on allocator hardening. +Saar Amar had been categorising exploits and what features an allocator should have. +As part of this, we both realised the existing structures of snmalloc made certain things hard to harden, but more interesting we had some ideas for stuff that could advance the state of the art. + +Secondly, we had been experimenting with adding support to snmalloc for [CHERI](http://www.chericpu.org). +This support illustrated many places where snmalloc (like most allocators) does pointer tricks that go against the grain of CHERI. +There were refactorings that would make CHERI support much more natural, but those refactorings were quite involved. +Fortunately, they were the very refactorings we needed for the other allocator hardening research we wanted to conduct. + +The core aim of our refactoring for 0.6.0 is to provide hardening that can be switched on all the time even in allocation heavy work loads. +We have been super keen to keep fast paths fast, and not lose the awesome performance. +Here we illustrate the performance using the application benchmarks from mimalloc-bench: + +![Performance graph](./data/perfgraph.png) + +The primary comparison point in the graphs is to show the introduced overheads of the checks by comparing `sn-0.6.0` with `sn-0.6.0-checks`. +Here you can see the switching the hardening on leads to regressions under 5%. This is running on a 72-core VM in Azure with each run benchmark repeated 20 times. + +We have also included a few other allocators. +Firstly, [jemalloc](https://github.com/jemalloc/jemalloc) v5.2.1 (labelled `je`) as a baseline for a world-class allocator, and two secure allocators [mimalloc](https://github.com/microsoft/mimalloc) v1.7.6 with its security features enabled (labelled mi-secure), and [SCUDO](https://www.llvm.org/docs/ScudoHardenedAllocator.html) (Commit hash bf0bcd5e, labelled `scudo`). +The precise hardening features in these allocators is different to snmalloc, hence the performance is not directly comparable. +We present them to show the hardenings snmalloc hit a lower performance penalty. + +To really understand the performance security trade-off, you need to understand the hardening features we have implemented. We have a series of short explanations to explain these mechanisms, and what protections we get: + +* [Enabling variable sized slabs](./VariableSizedChunks.md) +* [Enabling guarded `memcpy`](./GuardedMemcpy.md) +* [Protecting free lists from user corruption](./FreelistProtection.md) +* [Randomisation of allocations](./Randomisation.md) + +To try out the hardening features of snmalloc on Elf platforms (e.g. Linux, BSD) you can simply [build](../BUILDING.md) and then preload with: +``` +LD_PRELOAD=[snmalloc_build]/libsnmalloc-checks.so ./my_app +``` diff --git a/docs/security/Randomisation.md b/docs/security/Randomisation.md new file mode 100644 index 000000000..dc08df947 --- /dev/null +++ b/docs/security/Randomisation.md @@ -0,0 +1,69 @@ +# Randomisation + +The relative allocation pattern of objects can also be used to increase the power of an exploit. +This is a weak defence as spraying can defeat pretty much any randomisation, so this is just a case of doing enough to raise the bar. + +There are three things we randomise about the allocation pattern in snmalloc: + +* Initial order of allocations on a slab +* Subsequent order of allocations on a slab +* When we consume all allocations on a slab + +## Initial slab order + +We build the initial order of allocation using a classic algorithm for building a permutation of a set. +When I started writing this code, I remembered my undergraduate lectures on creating a permutation using a Fisher–Yates shuffle. +Unfortunately, I couldn't find my very old notes, so I had to use Wikipedia to refresh my knowledge. + +After reading Wikipedia I realised, I actually wanted Sattolo's algorithm for generating a cyclic permutation using the "inside-out" algorithm. +This algorithm builds a cyclic permutation of a set, which is exactly what we need to build all possible free lists. +Using the "inside-out" algorithm gives much better cache performance. + +The algorithm is: +```C++ + object[0].next = &(object[0]); // 1 element cycle + for (i = 1; i < n; i++) + { + auto j = random(0, i-1); // Pick element in cycle + // Cyclic list insert of i after j + object[i].next = object[j].next; + object[j].next = &(object[i]); + } + auto end_index = random(0,n-1); // Select last element of cycle. + auto start = object[end_index].next; // Find start + object[end_index].next = nullptr; // Terminate list +``` +When this completes you are guaranteed that `start` will be a list where next takes you through all the other elements. + +Now, to generate all possible free lists with equal probabilty `random` has to be a uniform distribution, but that is prohibitively expensive. +Here we cut a corner and approximate the distribution for performance. + +Another complexity is that to build the protected free list from the previous blog post, we actually require a second pass over this list as we cannot build the back edges until we know the order of the list. + +## Preserving randomness + +We have an amazing amount of randomness within a slab, but that becomes predictable if we can't introduce more entropy as the system runs. +To address this, we actually build pairs of free-queue for each slab. + +Each slab has two free-queues, when we deallocate an object we use a cheap coin flip to decide which queue to add the element to. +When we want a new free-queue to start allocating from, we take the longer of the free-queues from the meta-data and use that in our thread local allocator. + +## Almost full slabs + +Now the two randomisations above make relative addresses hard to guess, but those alone do not prevent it being easy to predict when a slab will be full. +We use two mechanisms to handle this + +* Only consider a slab for reuse when it has a certain percentage of free elements +* If there is a single slab that can currently be used, use a random coin flip to decide whether we allocate a new slab instead of using the existing slab. + +These two mechanisms are aimed at making it hard to allocate an object that is with high probability adjacent to another allocated object. +This is important for using the free-queue protection to catch various corruptions. + + +## Improving protection + +Now the free-queue protection with randomisation will make exploits considerably harder, but it will not catch all corruptions. +We have been working on adding support for both CHERI and memory tagging to snmalloc, which are more comprehensive defences to memory corruption. +Our aim with the hardening of snmalloc has been to provide something that can be always on in production. + +[Now, we have explained the various hardening concepts, you are better placed to judge the performance we achieve.](./README.md) diff --git a/docs/security/VariableSizedChunks.md b/docs/security/VariableSizedChunks.md new file mode 100644 index 000000000..001b3c831 --- /dev/null +++ b/docs/security/VariableSizedChunks.md @@ -0,0 +1,97 @@ +# Supporting variable sized slabs + +Before we explain the hardening features, we need to give a bit of background on how snmalloc is structured. +In snmalloc, we have effectively two layers of allocation: + +1) an underlying allocator that returns power-of-two sized, naturally aligned blocks of memory, called chunks +2) a slab allocator layered on top of the chunk allocator + +Large allocations are served directly by the chunk allocator and small allocations through slabs. + +## What is a slab? + +A slab is a naturally aligned, power-of-two sized chunk split into a series of allocations of exactly the same size. +For instance, a 16KiB slab could be split into 341 48-byte allocations with 16 bytes that are unused at the end. + +## What size should a slab be? + +Finding a new slab is inherently going to be a slower path than just allocating an object. +So we want a slab size that means all our common allocations can fit multiple times onto a slab. +But making larger slabs means that we can potentially waste a lot of space for small allocations. + +In our redesign of snmalloc, we allow multiple slab sizes so that we can ensure a minimum number of allocations on a slab. +The rest of the article will describe how we achieve this, while efficiently accessing the meta-data associated to a slab. + +## Finding meta-data quickly + +Allocators must map allocations to associated meta-data. +There are two common approaches for locating this associated meta-data: + +* At some specified aligned position relative to a current pointer +* In a global map + +Most allocators use some combination of both. +In the original snmalloc design we had a concept of superslab, where the first part represented the meta-data for all the slabs contained in the superslab. +A superslab was initially 16MiB, with the first 64KiB treated specially as it contained meta-data. +There was then a global map to specify if memory was a superslab or not, that global map kept a byte per 16MiB of address space. + +This worked well for fixed sizes of slabs, but the granularity was hard coded. + +## Chunk map representation + +In snmalloc 0.6.0, we are using a two-level global map. +The top-level entries each contain two pointers (with other fields and flags bit-packed into known-zero bits). +For a region of memory being used as a slab, its top-level entry contains + +* sizeclass of memory in the chunk (it may be either part of a large allocation, or a slab of small allocations) +* which allocator is responsible for this memory +* a pointer to the associated second-level entry of the map. + A given second level entry may be pointed to by each of a contiguous span of one or more top-level entries. + +This representation allows multiple 16KiB chunks of memory to have the same meta-data. +For instance: + +![ChunkMap](./data/ChunkMap.png) + +This illustrates how a 32KiB slab, a 64KiB slab, and a 16KiB slab would be represented. +The first (yellow) has two contiguous entries in the chunk map, and the second (blue) has four contiguous entries in the chunk map, and the final (green) has a single entry in the chunk map. + +This representation means we can find the meta-data for any slab in a handful of instructions. +(Unlike the original design, this does not need any branching on the particular size of the slab.) + +```C++ + SlabMetadata* get_slab_metadata(address_t addr) + { + return chunk_map[addr >> CHUNK_BITS].meta; + } +``` + +By having a shared `SlabMetadata` across all the entries for the slab, we can have a single free list that covers the whole slab. +This is quite important as it means our fast path for deallocation can handle deallocations for multiple slab sizes without branching, while having the granularity that particular size requires. + +The following annotated asm snippet covers the fast path for deallocation: +```x86asm +: + mov rax,rdi + mov rcx,QWORD PTR [rip+0x99a6] # TLS OFFSET for allocator + mov rdx,QWORD PTR [rip+0x6df7] # Chunk Map root + shr rdi,0xa # Calculate chunk map entry + and rdi,0xfffffffffffffff0 # | + lea rsi,[rdx+rdi*1] # | + mov rdx,QWORD PTR [rdx+rdi*1+0x8] # Load owning allocator + mov rdi,rdx # | + and rdi,0xffffffffffffff80 # | + cmp QWORD PTR fs:[rcx+0x1a0],rdi # Check if allocator is current one + jne REMOTE_DEALLOCATION # Slow path remote deallocation + mov rdx,QWORD PTR [rsi] # Get SlabMetadata + mov rdi,QWORD PTR [rdx+0x18] # Add to free list + mov QWORD PTR [rdi],rax # | + mov QWORD PTR [rdx+0x18],rax # | + add WORD PTR [rdx+0x28],0xffff # Decrement count to slow path + je SLOW_PATH # Check if more complex slab management is required. + ret +``` + +As you can see this representation gives a very compact code sequence for deallocation that handles multiple slab sizes. +It also means the majority of meta-data can be stored away from the memory space it is describing. +[Next, we discuss how we can capitalise on this meta-data representation to provide an efficient checked memcpy.](./GuardedMemcpy.md) diff --git a/docs/security/data/ChunkMap.png b/docs/security/data/ChunkMap.png new file mode 100644 index 000000000..cb3a703e9 Binary files /dev/null and b/docs/security/data/ChunkMap.png differ diff --git a/docs/security/data/benchres.csv b/docs/security/data/benchres.csv new file mode 100644 index 000000000..98bec8734 --- /dev/null +++ b/docs/security/data/benchres.csv @@ -0,0 +1,1281 @@ +, +barnes, mi, 02.90, 66712, 2.88, 0.01, 0, 2461 +barnes, sn-0.6.0-full-checks, 02.91, 65716, 2.88, 0.02, 0, 2863 +barnes, sn-0.6.0, 02.87, 65508, 2.85, 0.01, 0, 2838 +barnes, sn-0.5.3, 02.87, 70068, 2.84, 0.02, 0, 2518 +barnes, sn-0.6.0-memcpy-checks, 02.89, 65608, 2.88, 0.01, 0, 2837 +barnes, je, 02.90, 76652, 2.87, 0.02, 0, 2550 +barnes, scudo, 02.99, 61892, 2.94, 0.04, 0, 4270 +barnes, smi, 03.02, 66728, 3.00, 0.01, 0, 2657 +espresso, mi, 05.27, 8220, 5.25, 0.02, 0, 174 +espresso, sn-0.6.0-full-checks, 05.35, 12640, 5.30, 0.04, 0, 744 +espresso, sn-0.6.0, 05.19, 6216, 5.16, 0.03, 0, 654 +espresso, sn-0.5.3, 05.21, 10244, 5.19, 0.01, 0, 410 +espresso, sn-0.6.0-memcpy-checks, 05.20, 6256, 5.19, 0.00, 0, 657 +espresso, je, 05.54, 12184, 5.50, 0.03, 0, 322 +espresso, scudo, 06.07, 4940, 6.05, 0.02, 0, 645 +espresso, smi, 05.50, 6620, 5.48, 0.02, 0, 288 +z3, mi, 01.19, 71272, 1.18, 0.01, 0, 458 +z3, sn-0.6.0-full-checks, 01.18, 72428, 1.17, 0.01, 0, 773 +z3, sn-0.6.0, 01.17, 66184, 1.16, 0.01, 0, 734 +z3, sn-0.5.3, 01.18, 73508, 1.16, 0.01, 0, 563 +z3, sn-0.6.0-memcpy-checks, 01.19, 66128, 1.17, 0.02, 0, 738 +z3, je, 01.20, 65792, 1.18, 0.02, 0, 2770 +z3, scudo, 01.27, 56116, 1.24, 0.03, 0, 8681 +z3, smi, 01.26, 66104, 1.23, 0.02, 0, 3836 +gs, mi, 01.17, 57476, 1.13, 0.03, 0, 1660 +gs, sn-0.6.0-full-checks, 01.21, 54304, 1.18, 0.02, 0, 2018 +gs, sn-0.6.0, 01.18, 48280, 1.17, 0.01, 0, 1999 +gs, sn-0.5.3, 01.17, 56084, 1.16, 0.01, 0, 1873 +gs, sn-0.6.0-memcpy-checks, 01.17, 48512, 1.14, 0.03, 0, 2000 +gs, je, 01.20, 53216, 1.16, 0.04, 0, 3724 +gs, scudo, 01.21, 41248, 1.16, 0.05, 0, 17117 +gs, smi, 01.20, 56372, 1.16, 0.04, 0, 4550 +redis, mi, 4.357, 35112, 1.87, 0.32, 0, 8007 +redis, sn-0.6.0-full-checks, 4.435, 33628, 1.91, 0.32, 0, 8964 +redis, sn-0.6.0, 4.216, 30440, 1.74, 0.38, 0, 8556 +redis, sn-0.5.3, 4.348, 37168, 1.90, 0.28, 0, 7518 +redis, sn-0.6.0-memcpy-checks, 4.274, 30408, 1.75, 0.40, 0, 8544 +redis, je, 5.060, 36836, 2.21, 0.32, 1, 6765 +redis, scudo, 5.252, 37900, 2.22, 0.42, 0, 9863 +redis, smi, 4.622, 35372, 1.97, 0.35, 0, 8035 +cfrac, mi, 06.37, 4508, 6.37, 0.00, 0, 184 +cfrac, sn-0.6.0-full-checks, 06.63, 3584, 6.63, 0.00, 0, 496 +cfrac, sn-0.6.0, 06.27, 3332, 6.27, 0.00, 0, 432 +cfrac, sn-0.5.3, 06.33, 8244, 6.32, 0.00, 0, 446 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3320, 6.27, 0.00, 0, 431 +cfrac, je, 06.64, 10056, 6.64, 0.00, 0, 273 +cfrac, scudo, 08.45, 4684, 8.45, 0.00, 0, 616 +cfrac, smi, 07.09, 4464, 7.09, 0.00, 0, 183 +leanN, mi, 25.62, 591256, 96.05, 1.07, 0, 200920 +leanN, sn-0.6.0-full-checks, 26.78, 681948, 102.76, 1.04, 0, 16260 +leanN, sn-0.6.0, 24.85, 545020, 95.77, 0.89, 0, 12529 +leanN, sn-0.5.3, 25.46, 546652, 96.19, 0.86, 0, 3333 +leanN, sn-0.6.0-memcpy-checks, 25.01, 545020, 96.33, 0.92, 0, 13126 +leanN, je, 26.06, 503092, 97.26, 1.13, 0, 171614 +leanN, scudo, 33.01, 593072, 120.15, 1.81, 0, 598360 +leanN, smi, 27.04, 624072, 96.26, 1.92, 0, 354731 +sed, mi, 01.74, 324400, 1.66, 0.07, 0, 402 +sed, sn-0.6.0-full-checks, 01.74, 349752, 1.65, 0.08, 0, 1670 +sed, sn-0.6.0, 01.73, 342816, 1.64, 0.08, 0, 1449 +sed, sn-0.5.3, 01.73, 310148, 1.65, 0.08, 0, 682 +sed, sn-0.6.0-memcpy-checks, 01.72, 342792, 1.62, 0.10, 0, 1473 +sed, je, 01.74, 300292, 1.65, 0.08, 0, 8858 +sed, scudo, 01.81, 245512, 1.69, 0.11, 0, 60801 +sed, smi, 01.82, 317312, 1.71, 0.11, 0, 34437 +barnes, mi, 02.85, 66836, 2.84, 0.01, 0, 2464 +barnes, sn-0.6.0-full-checks, 02.91, 65620, 2.90, 0.01, 0, 2854 +barnes, sn-0.6.0, 02.86, 65508, 2.83, 0.02, 0, 2839 +barnes, sn-0.5.3, 02.84, 70132, 2.82, 0.01, 0, 2530 +barnes, sn-0.6.0-memcpy-checks, 02.91, 65628, 2.89, 0.02, 0, 2840 +barnes, je, 02.86, 76748, 2.84, 0.01, 0, 2547 +barnes, scudo, 02.92, 61480, 2.90, 0.02, 0, 4252 +barnes, smi, 02.91, 66940, 2.90, 0.01, 0, 2670 +espresso, mi, 05.19, 8316, 5.16, 0.03, 0, 174 +espresso, sn-0.6.0-full-checks, 05.27, 12760, 5.23, 0.04, 0, 739 +espresso, sn-0.6.0, 05.12, 6536, 5.09, 0.02, 0, 658 +espresso, sn-0.5.3, 05.13, 10240, 5.11, 0.02, 0, 411 +espresso, sn-0.6.0-memcpy-checks, 05.12, 6440, 5.09, 0.02, 0, 659 +espresso, je, 05.44, 9956, 5.44, 0.00, 0, 355 +espresso, scudo, 06.04, 4900, 6.01, 0.02, 0, 642 +espresso, smi, 05.48, 6740, 5.47, 0.01, 0, 288 +z3, mi, 01.20, 71076, 1.20, 0.00, 0, 455 +z3, sn-0.6.0-full-checks, 01.17, 70204, 1.16, 0.01, 0, 758 +z3, sn-0.6.0, 01.16, 66108, 1.14, 0.01, 0, 735 +z3, sn-0.5.3, 01.17, 73488, 1.15, 0.02, 0, 565 +z3, sn-0.6.0-memcpy-checks, 01.17, 65872, 1.16, 0.00, 0, 735 +z3, je, 01.17, 66016, 1.15, 0.02, 0, 2773 +z3, scudo, 01.26, 56052, 1.23, 0.02, 0, 8668 +z3, smi, 01.22, 65448, 1.20, 0.01, 0, 3699 +gs, mi, 01.16, 57396, 1.12, 0.03, 0, 1657 +gs, sn-0.6.0-full-checks, 01.19, 56340, 1.15, 0.03, 0, 1955 +gs, sn-0.6.0, 01.17, 48452, 1.15, 0.02, 0, 1998 +gs, sn-0.5.3, 01.19, 56076, 1.16, 0.02, 0, 1875 +gs, sn-0.6.0-memcpy-checks, 01.17, 48300, 1.14, 0.02, 0, 1997 +gs, je, 01.20, 53824, 1.16, 0.04, 0, 3729 +gs, scudo, 01.21, 41532, 1.17, 0.03, 0, 17160 +gs, smi, 01.19, 57232, 1.14, 0.05, 0, 4648 +redis, mi, 4.395, 35208, 1.88, 0.33, 0, 8001 +redis, sn-0.6.0-full-checks, 4.504, 33404, 1.72, 0.54, 0, 8904 +redis, sn-0.6.0, 4.241, 30464, 1.80, 0.34, 0, 8558 +redis, sn-0.5.3, 4.314, 37128, 1.78, 0.38, 0, 7346 +redis, sn-0.6.0-memcpy-checks, 4.317, 30544, 1.78, 0.39, 0, 8538 +redis, je, 5.109, 36816, 2.16, 0.41, 0, 6769 +redis, scudo, 5.209, 37820, 2.27, 0.34, 0, 9874 +redis, smi, 4.631, 35436, 1.95, 0.38, 0, 8036 +cfrac, mi, 06.33, 4460, 6.33, 0.00, 0, 188 +cfrac, sn-0.6.0-full-checks, 06.62, 3560, 6.62, 0.00, 0, 499 +cfrac, sn-0.6.0, 06.27, 3244, 6.27, 0.00, 0, 434 +cfrac, sn-0.5.3, 06.31, 8424, 6.30, 0.01, 0, 444 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3320, 6.27, 0.00, 0, 432 +cfrac, je, 06.64, 10072, 6.63, 0.00, 0, 273 +cfrac, scudo, 08.53, 4616, 8.53, 0.00, 0, 615 +cfrac, smi, 07.14, 4580, 7.14, 0.00, 0, 180 +leanN, mi, 25.83, 587052, 98.11, 1.28, 0, 163240 +leanN, sn-0.6.0-full-checks, 26.59, 650568, 102.61, 1.04, 0, 12623 +leanN, sn-0.6.0, 25.01, 517504, 97.71, 0.77, 0, 11406 +leanN, sn-0.5.3, 26.53, 537680, 101.52, 0.99, 0, 4183 +leanN, sn-0.6.0-memcpy-checks, 27.40, 529564, 110.40, 0.89, 0, 12793 +leanN, je, 25.99, 529828, 98.94, 1.02, 0, 106383 +leanN, scudo, 32.58, 602392, 117.53, 1.81, 0, 532564 +leanN, smi, 27.65, 631068, 99.97, 1.98, 0, 422694 +sed, mi, 01.74, 326500, 1.65, 0.08, 0, 401 +sed, sn-0.6.0-full-checks, 01.74, 347584, 1.63, 0.10, 0, 1694 +sed, sn-0.6.0, 01.73, 342784, 1.65, 0.07, 0, 1477 +sed, sn-0.5.3, 01.73, 310248, 1.64, 0.08, 0, 684 +sed, sn-0.6.0-memcpy-checks, 01.72, 342716, 1.61, 0.10, 0, 1452 +sed, je, 01.73, 295420, 1.64, 0.08, 0, 9180 +sed, scudo, 01.82, 245464, 1.71, 0.10, 0, 60799 +sed, smi, 01.81, 315944, 1.67, 0.13, 0, 34063 +barnes, mi, 02.88, 66932, 2.86, 0.02, 0, 2462 +barnes, sn-0.6.0-full-checks, 02.86, 65748, 2.82, 0.03, 0, 2853 +barnes, sn-0.6.0, 02.90, 65604, 2.88, 0.02, 0, 2837 +barnes, sn-0.5.3, 02.85, 70108, 2.83, 0.02, 0, 2526 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65628, 2.84, 0.01, 0, 2840 +barnes, je, 02.85, 76568, 2.83, 0.02, 0, 2545 +barnes, scudo, 02.94, 61888, 2.94, 0.00, 0, 4270 +barnes, smi, 02.94, 66856, 2.92, 0.02, 0, 2657 +espresso, mi, 05.17, 8328, 5.15, 0.01, 0, 176 +espresso, sn-0.6.0-full-checks, 05.28, 12604, 5.25, 0.03, 0, 730 +espresso, sn-0.6.0, 05.10, 6248, 5.06, 0.04, 0, 659 +espresso, sn-0.5.3, 05.14, 10212, 5.11, 0.02, 0, 411 +espresso, sn-0.6.0-memcpy-checks, 05.11, 6256, 5.08, 0.03, 0, 655 +espresso, je, 05.44, 9880, 5.41, 0.03, 0, 297 +espresso, scudo, 06.04, 5000, 6.02, 0.01, 0, 648 +espresso, smi, 05.48, 6640, 5.46, 0.02, 0, 288 +z3, mi, 01.21, 71316, 1.19, 0.01, 0, 462 +z3, sn-0.6.0-full-checks, 01.19, 72360, 1.18, 0.01, 0, 783 +z3, sn-0.6.0, 01.18, 66188, 1.16, 0.01, 0, 740 +z3, sn-0.5.3, 01.17, 73712, 1.16, 0.01, 0, 566 +z3, sn-0.6.0-memcpy-checks, 01.17, 66060, 1.15, 0.01, 0, 738 +z3, je, 01.19, 65880, 1.16, 0.03, 0, 2762 +z3, scudo, 01.26, 56160, 1.23, 0.02, 0, 8672 +z3, smi, 01.23, 65868, 1.21, 0.02, 0, 3826 +gs, mi, 01.17, 57272, 1.11, 0.05, 0, 1656 +gs, sn-0.6.0-full-checks, 01.18, 56336, 1.15, 0.02, 0, 2378 +gs, sn-0.6.0, 01.18, 48188, 1.15, 0.02, 0, 1998 +gs, sn-0.5.3, 01.15, 56044, 1.14, 0.01, 0, 1871 +gs, sn-0.6.0-memcpy-checks, 01.19, 48188, 1.16, 0.03, 0, 2000 +gs, je, 01.19, 53716, 1.16, 0.03, 0, 3723 +gs, scudo, 01.20, 41456, 1.16, 0.04, 0, 17153 +gs, smi, 01.20, 57060, 1.16, 0.03, 0, 4647 +redis, mi, 4.294, 35128, 1.84, 0.31, 0, 8032 +redis, sn-0.6.0-full-checks, 4.406, 33360, 1.83, 0.39, 0, 8930 +redis, sn-0.6.0, 4.254, 30504, 1.80, 0.34, 0, 8552 +redis, sn-0.5.3, 4.244, 37064, 1.74, 0.39, 0, 7619 +redis, sn-0.6.0-memcpy-checks, 4.257, 30400, 1.75, 0.39, 0, 8555 +redis, je, 5.021, 36832, 2.09, 0.43, 0, 6775 +redis, scudo, 5.238, 37884, 2.18, 0.45, 0, 9872 +redis, smi, 4.599, 35428, 1.93, 0.39, 0, 8050 +cfrac, mi, 06.33, 4568, 6.32, 0.00, 0, 186 +cfrac, sn-0.6.0-full-checks, 06.60, 3628, 6.60, 0.00, 0, 496 +cfrac, sn-0.6.0, 06.26, 3332, 6.26, 0.00, 0, 434 +cfrac, sn-0.5.3, 06.31, 8408, 6.31, 0.00, 0, 447 +cfrac, sn-0.6.0-memcpy-checks, 06.31, 3320, 6.31, 0.00, 0, 432 +cfrac, je, 06.72, 10136, 6.72, 0.00, 0, 270 +cfrac, scudo, 08.44, 4696, 8.44, 0.00, 0, 610 +cfrac, smi, 07.03, 4572, 7.02, 0.00, 0, 183 +leanN, mi, 25.89, 588932, 98.21, 1.16, 0, 165165 +leanN, sn-0.6.0-full-checks, 26.99, 655148, 105.01, 1.00, 0, 13196 +leanN, sn-0.6.0, 27.54, 544660, 111.08, 1.01, 0, 13274 +leanN, sn-0.5.3, 25.44, 547264, 96.18, 0.87, 0, 3596 +leanN, sn-0.6.0-memcpy-checks, 26.73, 542128, 106.45, 0.90, 0, 11923 +leanN, je, 26.89, 529516, 103.02, 1.26, 0, 157578 +leanN, scudo, 33.36, 589768, 121.28, 2.00, 0, 601688 +leanN, smi, 26.96, 620428, 96.80, 1.63, 0, 326027 +sed, mi, 01.73, 330532, 1.65, 0.08, 0, 405 +sed, sn-0.6.0-full-checks, 01.73, 349380, 1.65, 0.08, 0, 1545 +sed, sn-0.6.0, 01.72, 342796, 1.60, 0.11, 0, 1474 +sed, sn-0.5.3, 01.72, 310392, 1.65, 0.07, 0, 683 +sed, sn-0.6.0-memcpy-checks, 01.72, 342820, 1.65, 0.07, 0, 1451 +sed, je, 01.73, 293900, 1.64, 0.08, 0, 8280 +sed, scudo, 01.81, 245412, 1.70, 0.10, 0, 60796 +sed, smi, 01.82, 316752, 1.69, 0.12, 0, 34265 +barnes, mi, 02.85, 66912, 2.84, 0.00, 0, 2466 +barnes, sn-0.6.0-full-checks, 02.87, 65740, 2.85, 0.02, 0, 2853 +barnes, sn-0.6.0, 02.86, 65672, 2.84, 0.02, 0, 2839 +barnes, sn-0.5.3, 02.86, 69948, 2.84, 0.01, 0, 2523 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65628, 2.84, 0.01, 0, 2837 +barnes, je, 02.86, 76616, 2.84, 0.01, 0, 2548 +barnes, scudo, 02.92, 61692, 2.91, 0.01, 0, 4279 +barnes, smi, 02.95, 66820, 2.93, 0.02, 0, 2655 +espresso, mi, 05.17, 8276, 5.14, 0.02, 0, 177 +espresso, sn-0.6.0-full-checks, 05.27, 12648, 5.20, 0.06, 0, 740 +espresso, sn-0.6.0, 05.11, 6232, 5.09, 0.02, 0, 654 +espresso, sn-0.5.3, 05.11, 10280, 5.09, 0.02, 0, 406 +espresso, sn-0.6.0-memcpy-checks, 05.15, 6216, 5.10, 0.04, 0, 656 +espresso, je, 05.55, 9940, 5.52, 0.02, 0, 388 +espresso, scudo, 06.08, 4876, 6.03, 0.05, 0, 635 +espresso, smi, 05.49, 6684, 5.45, 0.03, 0, 285 +z3, mi, 01.19, 71344, 1.16, 0.02, 0, 460 +z3, sn-0.6.0-full-checks, 01.17, 70140, 1.14, 0.02, 0, 761 +z3, sn-0.6.0, 01.17, 66068, 1.15, 0.01, 0, 741 +z3, sn-0.5.3, 01.19, 73804, 1.17, 0.02, 0, 564 +z3, sn-0.6.0-memcpy-checks, 01.17, 66232, 1.15, 0.01, 0, 741 +z3, je, 01.18, 67784, 1.16, 0.02, 0, 2757 +z3, scudo, 01.26, 56312, 1.24, 0.01, 0, 8163 +z3, smi, 01.23, 66204, 1.21, 0.01, 0, 3834 +gs, mi, 01.16, 57488, 1.12, 0.04, 0, 1652 +gs, sn-0.6.0-full-checks, 01.18, 54004, 1.16, 0.02, 0, 1961 +gs, sn-0.6.0, 01.18, 48352, 1.14, 0.03, 0, 2001 +gs, sn-0.5.3, 01.16, 56260, 1.11, 0.04, 0, 1875 +gs, sn-0.6.0-memcpy-checks, 01.18, 48336, 1.15, 0.02, 0, 2000 +gs, je, 01.19, 55668, 1.15, 0.03, 0, 3715 +gs, scudo, 01.20, 41428, 1.15, 0.05, 0, 17159 +gs, smi, 01.20, 56748, 1.18, 0.01, 0, 4635 +redis, mi, 4.675, 35124, 1.81, 0.54, 0, 7996 +redis, sn-0.6.0-full-checks, 4.357, 33356, 1.85, 0.34, 0, 8977 +redis, sn-0.6.0, 4.199, 30520, 1.74, 0.36, 0, 8579 +redis, sn-0.5.3, 4.254, 37024, 1.80, 0.33, 0, 7116 +redis, sn-0.6.0-memcpy-checks, 4.236, 30356, 1.68, 0.45, 0, 8560 +redis, je, 5.026, 36816, 2.20, 0.32, 0, 6771 +redis, scudo, 5.156, 38016, 2.25, 0.34, 0, 9884 +redis, smi, 4.564, 35472, 1.90, 0.40, 0, 8051 +cfrac, mi, 06.32, 4460, 6.31, 0.00, 0, 179 +cfrac, sn-0.6.0-full-checks, 06.61, 3584, 6.61, 0.00, 0, 495 +cfrac, sn-0.6.0, 06.28, 3300, 6.28, 0.00, 0, 433 +cfrac, sn-0.5.3, 06.38, 8424, 6.37, 0.00, 0, 446 +cfrac, sn-0.6.0-memcpy-checks, 06.24, 3336, 6.24, 0.00, 0, 434 +cfrac, je, 06.62, 10056, 6.62, 0.00, 0, 271 +cfrac, scudo, 08.40, 4592, 8.39, 0.00, 0, 611 +cfrac, smi, 07.04, 4596, 7.03, 0.00, 0, 182 +leanN, mi, 26.06, 592944, 99.54, 1.21, 0, 241980 +leanN, sn-0.6.0-full-checks, 26.66, 675748, 101.94, 1.01, 0, 13980 +leanN, sn-0.6.0, 25.55, 535144, 99.90, 0.83, 0, 12792 +leanN, sn-0.5.3, 25.91, 536216, 98.51, 0.89, 0, 3475 +leanN, sn-0.6.0-memcpy-checks, 25.17, 533080, 97.46, 0.95, 0, 13412 +leanN, je, 26.95, 514740, 103.52, 1.21, 0, 204418 +leanN, scudo, 32.10, 628432, 114.89, 1.64, 0, 568793 +leanN, smi, 27.59, 635276, 100.36, 1.84, 2, 343670 +sed, mi, 01.72, 326452, 1.63, 0.09, 0, 404 +sed, sn-0.6.0-full-checks, 01.73, 347264, 1.64, 0.09, 0, 1571 +sed, sn-0.6.0, 01.73, 342908, 1.63, 0.09, 0, 1480 +sed, sn-0.5.3, 01.71, 310356, 1.63, 0.08, 0, 684 +sed, sn-0.6.0-memcpy-checks, 01.72, 342824, 1.62, 0.09, 0, 1453 +sed, je, 01.73, 295488, 1.67, 0.06, 0, 9192 +sed, scudo, 01.81, 245284, 1.67, 0.13, 0, 60797 +sed, smi, 01.80, 316116, 1.69, 0.11, 0, 34107 +barnes, mi, 02.85, 66800, 2.84, 0.01, 0, 2464 +barnes, sn-0.6.0-full-checks, 02.87, 65632, 2.86, 0.01, 0, 2853 +barnes, sn-0.6.0, 02.86, 65552, 2.83, 0.02, 0, 2838 +barnes, sn-0.5.3, 02.84, 70108, 2.83, 0.00, 0, 2520 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65608, 2.82, 0.03, 0, 2834 +barnes, je, 02.85, 76636, 2.83, 0.02, 0, 2547 +barnes, scudo, 02.89, 64112, 2.86, 0.02, 0, 3516 +barnes, smi, 02.95, 66948, 2.93, 0.01, 0, 2662 +espresso, mi, 05.16, 8228, 5.13, 0.02, 0, 176 +espresso, sn-0.6.0-full-checks, 05.24, 12608, 5.22, 0.02, 0, 748 +espresso, sn-0.6.0, 05.11, 6328, 5.07, 0.03, 0, 661 +espresso, sn-0.5.3, 05.14, 10392, 5.10, 0.03, 0, 414 +espresso, sn-0.6.0-memcpy-checks, 05.14, 6412, 5.11, 0.02, 0, 659 +espresso, je, 05.53, 11876, 5.49, 0.04, 0, 334 +espresso, scudo, 06.10, 5000, 6.07, 0.03, 0, 648 +espresso, smi, 05.48, 6748, 5.46, 0.02, 0, 289 +z3, mi, 01.18, 71260, 1.16, 0.02, 0, 458 +z3, sn-0.6.0-full-checks, 01.18, 72220, 1.17, 0.01, 0, 772 +z3, sn-0.6.0, 01.17, 66052, 1.17, 0.00, 0, 734 +z3, sn-0.5.3, 01.16, 73748, 1.15, 0.01, 0, 566 +z3, sn-0.6.0-memcpy-checks, 01.17, 66104, 1.15, 0.01, 0, 738 +z3, je, 01.17, 65920, 1.16, 0.01, 0, 2771 +z3, scudo, 01.27, 55960, 1.24, 0.02, 0, 8678 +z3, smi, 01.23, 65724, 1.21, 0.02, 0, 3728 +gs, mi, 01.15, 57376, 1.12, 0.03, 0, 1655 +gs, sn-0.6.0-full-checks, 01.19, 56208, 1.15, 0.03, 0, 1961 +gs, sn-0.6.0, 01.17, 48216, 1.14, 0.01, 0, 1999 +gs, sn-0.5.3, 01.16, 56052, 1.15, 0.01, 0, 1871 +gs, sn-0.6.0-memcpy-checks, 01.17, 48108, 1.13, 0.04, 0, 1997 +gs, je, 01.18, 53792, 1.16, 0.01, 0, 3727 +gs, scudo, 01.20, 41180, 1.17, 0.03, 0, 17107 +gs, smi, 01.18, 56612, 1.15, 0.03, 0, 4610 +redis, mi, 4.314, 35176, 1.85, 0.31, 0, 8029 +redis, sn-0.6.0-full-checks, 4.369, 33168, 1.79, 0.40, 0, 8840 +redis, sn-0.6.0, 4.248, 30392, 1.74, 0.40, 0, 8556 +redis, sn-0.5.3, 4.271, 37064, 1.74, 0.41, 0, 6995 +redis, sn-0.6.0-memcpy-checks, 4.265, 30460, 1.72, 0.42, 0, 8543 +redis, je, 4.991, 36876, 2.10, 0.40, 0, 6772 +redis, scudo, 5.188, 38000, 2.16, 0.45, 0, 9878 +redis, smi, 4.662, 35452, 2.03, 0.31, 0, 8040 +cfrac, mi, 06.52, 4588, 6.52, 0.00, 0, 185 +cfrac, sn-0.6.0-full-checks, 06.76, 3580, 6.76, 0.00, 0, 497 +cfrac, sn-0.6.0, 06.39, 3272, 6.39, 0.00, 0, 429 +cfrac, sn-0.5.3, 06.46, 8348, 6.45, 0.00, 0, 445 +cfrac, sn-0.6.0-memcpy-checks, 06.28, 3336, 6.27, 0.00, 0, 432 +cfrac, je, 06.63, 10056, 6.63, 0.00, 0, 271 +cfrac, scudo, 08.39, 4564, 8.39, 0.00, 0, 608 +cfrac, smi, 07.07, 4604, 7.07, 0.00, 0, 182 +leanN, mi, 25.77, 579000, 97.39, 1.21, 0, 226057 +leanN, sn-0.6.0-full-checks, 26.67, 671232, 102.70, 1.00, 0, 13239 +leanN, sn-0.6.0, 26.24, 529888, 103.74, 0.90, 0, 10606 +leanN, sn-0.5.3, 25.31, 555048, 95.87, 0.90, 0, 3509 +leanN, sn-0.6.0-memcpy-checks, 25.95, 534752, 101.70, 0.95, 0, 11941 +leanN, je, 25.51, 520756, 95.50, 1.06, 0, 164558 +leanN, scudo, 32.49, 589876, 116.58, 1.64, 0, 607090 +leanN, smi, 27.65, 627320, 100.04, 2.02, 0, 363186 +sed, mi, 01.73, 326428, 1.67, 0.06, 0, 403 +sed, sn-0.6.0-full-checks, 01.75, 351632, 1.63, 0.11, 0, 1713 +sed, sn-0.6.0, 01.73, 342732, 1.66, 0.06, 0, 1477 +sed, sn-0.5.3, 01.72, 310300, 1.65, 0.07, 0, 685 +sed, sn-0.6.0-memcpy-checks, 01.73, 342804, 1.64, 0.08, 0, 1476 +sed, je, 01.73, 295712, 1.65, 0.08, 0, 9184 +sed, scudo, 01.82, 245648, 1.69, 0.11, 0, 60799 +sed, smi, 01.82, 316576, 1.71, 0.11, 0, 34234 +barnes, mi, 02.84, 66972, 2.83, 0.01, 0, 2465 +barnes, sn-0.6.0-full-checks, 02.88, 65700, 2.86, 0.02, 0, 2854 +barnes, sn-0.6.0, 02.86, 65660, 2.85, 0.01, 0, 2839 +barnes, sn-0.5.3, 02.94, 70008, 2.93, 0.00, 0, 2522 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65604, 2.84, 0.01, 0, 2841 +barnes, je, 02.86, 76672, 2.84, 0.01, 0, 2550 +barnes, scudo, 02.90, 64096, 2.88, 0.01, 0, 3531 +barnes, smi, 02.92, 66776, 2.90, 0.01, 0, 2661 +espresso, mi, 05.19, 8220, 5.17, 0.02, 0, 172 +espresso, sn-0.6.0-full-checks, 05.33, 12760, 5.29, 0.04, 0, 726 +espresso, sn-0.6.0, 05.21, 6272, 5.18, 0.03, 0, 660 +espresso, sn-0.5.3, 05.15, 10252, 5.13, 0.01, 0, 413 +espresso, sn-0.6.0-memcpy-checks, 05.14, 6440, 5.12, 0.02, 0, 661 +espresso, je, 05.43, 9908, 5.39, 0.04, 0, 332 +espresso, scudo, 06.03, 4884, 6.00, 0.02, 0, 642 +espresso, smi, 05.48, 6680, 5.44, 0.03, 0, 287 +z3, mi, 01.18, 71236, 1.17, 0.01, 0, 458 +z3, sn-0.6.0-full-checks, 01.17, 70276, 1.16, 0.00, 0, 767 +z3, sn-0.6.0, 01.17, 65988, 1.14, 0.02, 0, 733 +z3, sn-0.5.3, 01.17, 73808, 1.15, 0.01, 0, 571 +z3, sn-0.6.0-memcpy-checks, 01.16, 66264, 1.14, 0.02, 0, 740 +z3, je, 01.18, 65916, 1.17, 0.01, 0, 2774 +z3, scudo, 01.25, 56064, 1.23, 0.02, 0, 8664 +z3, smi, 01.23, 66032, 1.21, 0.01, 0, 3809 +gs, mi, 01.17, 57300, 1.15, 0.01, 0, 1658 +gs, sn-0.6.0-full-checks, 01.19, 54364, 1.15, 0.03, 0, 2014 +gs, sn-0.6.0, 01.16, 48528, 1.13, 0.03, 0, 1997 +gs, sn-0.5.3, 01.16, 56216, 1.14, 0.02, 0, 1872 +gs, sn-0.6.0-memcpy-checks, 01.18, 48464, 1.14, 0.04, 0, 1999 +gs, je, 01.18, 53628, 1.16, 0.02, 0, 3712 +gs, scudo, 01.20, 41440, 1.14, 0.06, 0, 17166 +gs, smi, 01.18, 57068, 1.15, 0.03, 0, 4675 +redis, mi, 4.288, 35248, 1.74, 0.42, 0, 8036 +redis, sn-0.6.0-full-checks, 4.343, 33512, 1.90, 0.29, 0, 9002 +redis, sn-0.6.0, 4.203, 30428, 1.78, 0.34, 0, 8564 +redis, sn-0.5.3, 4.224, 37128, 1.74, 0.38, 0, 7520 +redis, sn-0.6.0-memcpy-checks, 4.229, 30404, 1.71, 0.42, 0, 8564 +redis, je, 5.033, 36940, 2.16, 0.36, 0, 6761 +redis, scudo, 5.361, 37964, 2.31, 0.38, 0, 9853 +redis, smi, 4.665, 35436, 1.91, 0.43, 0, 8032 +cfrac, mi, 06.31, 4460, 6.30, 0.00, 0, 180 +cfrac, sn-0.6.0-full-checks, 06.60, 3596, 6.60, 0.00, 0, 507 +cfrac, sn-0.6.0, 06.23, 3276, 6.23, 0.00, 0, 431 +cfrac, sn-0.5.3, 06.30, 8292, 6.30, 0.00, 0, 445 +cfrac, sn-0.6.0-memcpy-checks, 06.24, 3336, 6.24, 0.00, 0, 430 +cfrac, je, 06.67, 9996, 6.66, 0.00, 0, 270 +cfrac, scudo, 08.64, 4756, 8.63, 0.00, 0, 613 +cfrac, smi, 07.18, 4572, 7.18, 0.00, 0, 184 +leanN, mi, 26.23, 583088, 100.11, 1.16, 2, 161724 +leanN, sn-0.6.0-full-checks, 26.23, 673484, 100.36, 0.96, 0, 12775 +leanN, sn-0.6.0, 27.12, 530604, 108.73, 1.00, 0, 12559 +leanN, sn-0.5.3, 27.30, 526460, 106.32, 0.96, 0, 2879 +leanN, sn-0.6.0-memcpy-checks, 26.90, 518652, 106.80, 0.83, 0, 11945 +leanN, je, 26.70, 513676, 103.21, 0.97, 0, 78470 +leanN, scudo, 33.24, 625584, 121.33, 1.88, 0, 656737 +leanN, smi, 27.25, 617944, 97.77, 2.02, 0, 449513 +sed, mi, 01.75, 324396, 1.69, 0.05, 0, 402 +sed, sn-0.6.0-full-checks, 01.74, 349496, 1.65, 0.09, 0, 1693 +sed, sn-0.6.0, 01.73, 342848, 1.64, 0.09, 0, 1474 +sed, sn-0.5.3, 01.72, 310416, 1.64, 0.07, 0, 686 +sed, sn-0.6.0-memcpy-checks, 01.74, 342748, 1.66, 0.07, 0, 1482 +sed, je, 01.74, 295408, 1.65, 0.09, 0, 9194 +sed, scudo, 01.81, 245432, 1.69, 0.12, 0, 60798 +sed, smi, 01.80, 316564, 1.69, 0.11, 0, 34217 +barnes, mi, 02.90, 66824, 2.87, 0.02, 0, 2463 +barnes, sn-0.6.0-full-checks, 02.94, 65752, 2.90, 0.03, 0, 2856 +barnes, sn-0.6.0, 02.93, 65796, 2.92, 0.01, 0, 2844 +barnes, sn-0.5.3, 02.87, 70156, 2.85, 0.02, 0, 2519 +barnes, sn-0.6.0-memcpy-checks, 02.85, 65648, 2.82, 0.02, 0, 2844 +barnes, je, 02.85, 76616, 2.83, 0.01, 0, 2551 +barnes, scudo, 02.92, 64456, 2.89, 0.03, 0, 3607 +barnes, smi, 02.91, 66984, 2.89, 0.02, 0, 2659 +espresso, mi, 05.17, 8224, 5.15, 0.01, 0, 176 +espresso, sn-0.6.0-full-checks, 05.25, 12740, 5.22, 0.02, 0, 734 +espresso, sn-0.6.0, 05.11, 6256, 5.11, 0.00, 0, 658 +espresso, sn-0.5.3, 05.13, 10420, 5.09, 0.03, 0, 414 +espresso, sn-0.6.0-memcpy-checks, 05.12, 6548, 5.08, 0.04, 0, 663 +espresso, je, 05.44, 10148, 5.42, 0.02, 0, 307 +espresso, scudo, 06.05, 4992, 6.03, 0.02, 0, 642 +espresso, smi, 05.45, 6684, 5.43, 0.02, 0, 289 +z3, mi, 01.18, 71336, 1.16, 0.02, 0, 456 +z3, sn-0.6.0-full-checks, 01.18, 70424, 1.16, 0.01, 0, 759 +z3, sn-0.6.0, 01.16, 66164, 1.15, 0.01, 0, 739 +z3, sn-0.5.3, 01.16, 73580, 1.14, 0.02, 0, 565 +z3, sn-0.6.0-memcpy-checks, 01.17, 66016, 1.15, 0.01, 0, 742 +z3, je, 01.19, 66028, 1.16, 0.02, 0, 2773 +z3, scudo, 01.24, 56172, 1.22, 0.01, 0, 8145 +z3, smi, 01.23, 65560, 1.20, 0.03, 0, 3753 +gs, mi, 01.15, 57368, 1.13, 0.02, 0, 1657 +gs, sn-0.6.0-full-checks, 01.19, 53812, 1.17, 0.01, 0, 1939 +gs, sn-0.6.0, 01.17, 48532, 1.15, 0.01, 0, 1993 +gs, sn-0.5.3, 01.15, 56056, 1.12, 0.02, 0, 1876 +gs, sn-0.6.0-memcpy-checks, 01.17, 48452, 1.13, 0.03, 0, 1998 +gs, je, 01.18, 53568, 1.13, 0.05, 0, 3723 +gs, scudo, 01.20, 41540, 1.16, 0.03, 0, 17162 +gs, smi, 01.19, 57140, 1.16, 0.02, 0, 4643 +redis, mi, 4.308, 35204, 1.77, 0.39, 0, 8019 +redis, sn-0.6.0-full-checks, 4.392, 33984, 1.86, 0.35, 0, 9164 +redis, sn-0.6.0, 4.196, 30348, 1.74, 0.37, 0, 8562 +redis, sn-0.5.3, 4.389, 37100, 1.73, 0.47, 0, 7100 +redis, sn-0.6.0-memcpy-checks, 4.328, 30368, 1.83, 0.34, 0, 8531 +redis, je, 4.989, 36816, 2.17, 0.33, 0, 6756 +redis, scudo, 5.182, 37880, 2.22, 0.39, 0, 9882 +redis, smi, 4.593, 35444, 1.88, 0.43, 0, 8057 +cfrac, mi, 06.29, 4480, 6.29, 0.00, 0, 183 +cfrac, sn-0.6.0-full-checks, 06.61, 3664, 6.61, 0.00, 0, 512 +cfrac, sn-0.6.0, 06.25, 3328, 6.25, 0.00, 0, 431 +cfrac, sn-0.5.3, 06.36, 8348, 6.36, 0.00, 0, 445 +cfrac, sn-0.6.0-memcpy-checks, 06.25, 3340, 6.25, 0.00, 0, 430 +cfrac, je, 06.63, 10000, 6.62, 0.00, 0, 270 +cfrac, scudo, 08.40, 4564, 8.39, 0.00, 0, 610 +cfrac, smi, 07.00, 4604, 7.00, 0.00, 0, 183 +leanN, mi, 26.14, 585036, 100.89, 1.35, 0, 185556 +leanN, sn-0.6.0-full-checks, 27.06, 681168, 105.03, 1.04, 0, 11898 +leanN, sn-0.6.0, 26.39, 518364, 104.78, 0.84, 0, 11801 +leanN, sn-0.5.3, 26.22, 540668, 102.18, 0.90, 0, 3315 +leanN, sn-0.6.0-memcpy-checks, 25.73, 544584, 100.10, 0.90, 0, 13151 +leanN, je, 26.63, 489512, 103.36, 1.12, 0, 178461 +leanN, scudo, 32.24, 586964, 116.42, 1.71, 4, 656609 +leanN, smi, 27.36, 612228, 98.39, 1.86, 3, 411598 +sed, mi, 01.75, 326444, 1.66, 0.08, 0, 398 +sed, sn-0.6.0-full-checks, 01.79, 347472, 1.72, 0.06, 0, 1697 +sed, sn-0.6.0, 01.76, 342848, 1.68, 0.07, 0, 1478 +sed, sn-0.5.3, 01.78, 310088, 1.68, 0.09, 0, 679 +sed, sn-0.6.0-memcpy-checks, 01.74, 342872, 1.63, 0.10, 0, 1479 +sed, je, 01.76, 295512, 1.69, 0.07, 0, 9198 +sed, scudo, 01.83, 245376, 1.73, 0.09, 0, 60796 +sed, smi, 01.81, 315884, 1.69, 0.11, 0, 34061 +barnes, mi, 02.88, 66716, 2.86, 0.02, 0, 2461 +barnes, sn-0.6.0-full-checks, 02.91, 65776, 2.88, 0.02, 0, 2856 +barnes, sn-0.6.0, 02.86, 65636, 2.85, 0.01, 0, 2847 +barnes, sn-0.5.3, 02.88, 69948, 2.85, 0.02, 0, 2519 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65648, 2.84, 0.02, 0, 2848 +barnes, je, 02.89, 76676, 2.87, 0.01, 0, 2546 +barnes, scudo, 02.91, 61900, 2.89, 0.01, 0, 3644 +barnes, smi, 02.95, 66896, 2.94, 0.00, 0, 2657 +espresso, mi, 05.20, 8260, 5.18, 0.01, 0, 174 +espresso, sn-0.6.0-full-checks, 05.28, 12608, 5.27, 0.01, 0, 736 +espresso, sn-0.6.0, 05.12, 6268, 5.09, 0.03, 0, 659 +espresso, sn-0.5.3, 05.16, 10328, 5.14, 0.01, 0, 413 +espresso, sn-0.6.0-memcpy-checks, 05.12, 6428, 5.10, 0.02, 0, 657 +espresso, je, 05.49, 9904, 5.47, 0.01, 0, 346 +espresso, scudo, 06.05, 4884, 6.02, 0.02, 0, 632 +espresso, smi, 05.50, 6684, 5.46, 0.03, 0, 284 +z3, mi, 01.19, 71380, 1.17, 0.02, 0, 459 +z3, sn-0.6.0-full-checks, 01.18, 70388, 1.15, 0.02, 0, 768 +z3, sn-0.6.0, 01.18, 66116, 1.17, 0.00, 0, 738 +z3, sn-0.5.3, 01.16, 73572, 1.14, 0.01, 0, 562 +z3, sn-0.6.0-memcpy-checks, 01.17, 66184, 1.15, 0.01, 0, 741 +z3, je, 01.19, 65840, 1.16, 0.02, 0, 2772 +z3, scudo, 01.26, 56180, 1.25, 0.01, 0, 8670 +z3, smi, 01.25, 65808, 1.23, 0.01, 0, 3798 +gs, mi, 01.16, 57188, 1.14, 0.02, 0, 1655 +gs, sn-0.6.0-full-checks, 01.18, 54236, 1.16, 0.02, 0, 1958 +gs, sn-0.6.0, 01.18, 48488, 1.16, 0.02, 0, 2000 +gs, sn-0.5.3, 01.16, 56172, 1.13, 0.03, 0, 1872 +gs, sn-0.6.0-memcpy-checks, 01.19, 48188, 1.15, 0.04, 0, 2000 +gs, je, 01.20, 53220, 1.15, 0.04, 0, 3711 +gs, scudo, 01.21, 41180, 1.15, 0.06, 0, 17153 +gs, smi, 01.20, 57036, 1.16, 0.04, 0, 4612 +redis, mi, 4.340, 35184, 1.79, 0.39, 0, 8010 +redis, sn-0.6.0-full-checks, 4.403, 33604, 1.88, 0.33, 0, 9014 +redis, sn-0.6.0, 4.320, 30360, 1.80, 0.37, 0, 8539 +redis, sn-0.5.3, 4.348, 36980, 1.82, 0.36, 0, 7483 +redis, sn-0.6.0-memcpy-checks, 4.306, 30440, 1.81, 0.35, 0, 8542 +redis, je, 5.087, 36972, 2.19, 0.36, 0, 6784 +redis, scudo, 5.234, 37924, 2.22, 0.41, 0, 9880 +redis, smi, 4.690, 35696, 1.92, 0.44, 0, 8085 +cfrac, mi, 06.33, 4496, 6.33, 0.00, 0, 186 +cfrac, sn-0.6.0-full-checks, 06.62, 3576, 6.62, 0.00, 0, 495 +cfrac, sn-0.6.0, 06.27, 3336, 6.27, 0.00, 0, 435 +cfrac, sn-0.5.3, 06.32, 8408, 6.32, 0.00, 0, 448 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3376, 6.27, 0.00, 0, 434 +cfrac, je, 06.66, 10104, 6.65, 0.00, 0, 271 +cfrac, scudo, 08.43, 4656, 8.42, 0.00, 0, 611 +cfrac, smi, 07.06, 4636, 7.06, 0.00, 0, 183 +leanN, mi, 26.72, 595404, 103.20, 1.25, 3, 232568 +leanN, sn-0.6.0-full-checks, 26.55, 655872, 101.70, 1.10, 0, 13236 +leanN, sn-0.6.0, 25.36, 534340, 98.64, 0.81, 0, 12637 +leanN, sn-0.5.3, 25.75, 549084, 98.06, 0.79, 0, 3423 +leanN, sn-0.6.0-memcpy-checks, 26.59, 534420, 106.42, 0.76, 0, 12479 +leanN, je, 26.02, 524788, 98.90, 1.07, 0, 140108 +leanN, scudo, 32.32, 617108, 115.04, 1.95, 3, 560579 +leanN, smi, 27.69, 626448, 100.56, 2.05, 0, 453730 +sed, mi, 01.75, 326400, 1.67, 0.07, 0, 404 +sed, sn-0.6.0-full-checks, 01.79, 347480, 1.70, 0.09, 0, 1697 +sed, sn-0.6.0, 01.76, 342736, 1.66, 0.09, 0, 1478 +sed, sn-0.5.3, 01.76, 310376, 1.70, 0.05, 0, 681 +sed, sn-0.6.0-memcpy-checks, 01.79, 342792, 1.69, 0.10, 0, 1477 +sed, je, 01.75, 294172, 1.65, 0.09, 0, 8283 +sed, scudo, 01.81, 245416, 1.69, 0.11, 0, 60794 +sed, smi, 01.82, 312476, 1.68, 0.13, 0, 33703 +barnes, mi, 02.86, 66764, 2.85, 0.01, 0, 2461 +barnes, sn-0.6.0-full-checks, 02.87, 65632, 2.85, 0.02, 0, 2855 +barnes, sn-0.6.0, 02.87, 65668, 2.84, 0.03, 0, 2837 +barnes, sn-0.5.3, 02.90, 70064, 2.87, 0.02, 0, 2525 +barnes, sn-0.6.0-memcpy-checks, 02.88, 65616, 2.87, 0.01, 0, 2841 +barnes, je, 02.85, 76744, 2.85, 0.00, 0, 2549 +barnes, scudo, 02.90, 63144, 2.88, 0.02, 0, 3502 +barnes, smi, 02.94, 66808, 2.90, 0.03, 0, 2660 +espresso, mi, 05.19, 8308, 5.17, 0.02, 0, 176 +espresso, sn-0.6.0-full-checks, 05.28, 12608, 5.25, 0.02, 0, 758 +espresso, sn-0.6.0, 05.13, 6236, 5.10, 0.02, 0, 656 +espresso, sn-0.5.3, 05.14, 10240, 5.10, 0.04, 0, 411 +espresso, sn-0.6.0-memcpy-checks, 05.12, 6240, 5.10, 0.02, 0, 655 +espresso, je, 05.45, 9860, 5.45, 0.00, 0, 336 +espresso, scudo, 06.02, 4804, 6.02, 0.00, 0, 640 +espresso, smi, 05.48, 6756, 5.45, 0.03, 0, 290 +z3, mi, 01.20, 71272, 1.18, 0.01, 0, 456 +z3, sn-0.6.0-full-checks, 01.19, 70156, 1.17, 0.01, 0, 765 +z3, sn-0.6.0, 01.17, 66192, 1.16, 0.00, 0, 737 +z3, sn-0.5.3, 01.17, 73556, 1.15, 0.01, 0, 562 +z3, sn-0.6.0-memcpy-checks, 01.18, 66120, 1.16, 0.02, 0, 731 +z3, je, 01.18, 65900, 1.17, 0.01, 0, 2771 +z3, scudo, 01.27, 56036, 1.26, 0.01, 0, 8678 +z3, smi, 01.23, 65884, 1.20, 0.02, 0, 3786 +gs, mi, 01.17, 57512, 1.15, 0.01, 0, 1655 +gs, sn-0.6.0-full-checks, 01.19, 54220, 1.15, 0.04, 0, 1945 +gs, sn-0.6.0, 01.17, 48364, 1.16, 0.01, 0, 1998 +gs, sn-0.5.3, 01.16, 56032, 1.14, 0.02, 0, 1876 +gs, sn-0.6.0-memcpy-checks, 01.19, 47984, 1.15, 0.03, 0, 1996 +gs, je, 01.20, 53552, 1.17, 0.03, 0, 3733 +gs, scudo, 01.22, 41524, 1.15, 0.06, 0, 17151 +gs, smi, 01.22, 56648, 1.18, 0.03, 0, 4597 +redis, mi, 4.380, 35048, 1.87, 0.33, 0, 8004 +redis, sn-0.6.0-full-checks, 4.406, 33720, 1.87, 0.35, 0, 9048 +redis, sn-0.6.0, 4.182, 30384, 1.72, 0.39, 0, 8574 +redis, sn-0.5.3, 4.314, 36988, 1.81, 0.36, 0, 7349 +redis, sn-0.6.0-memcpy-checks, 4.492, 30384, 1.88, 0.38, 0, 8479 +redis, je, 5.061, 36840, 2.14, 0.40, 0, 6776 +redis, scudo, 5.222, 37976, 2.24, 0.39, 0, 9876 +redis, smi, 4.618, 35384, 1.88, 0.44, 0, 8039 +cfrac, mi, 06.36, 4496, 6.35, 0.00, 0, 183 +cfrac, sn-0.6.0-full-checks, 06.62, 3672, 6.62, 0.00, 0, 512 +cfrac, sn-0.6.0, 06.27, 3332, 6.27, 0.00, 0, 436 +cfrac, sn-0.5.3, 06.34, 8424, 6.33, 0.00, 0, 446 +cfrac, sn-0.6.0-memcpy-checks, 06.28, 3376, 6.28, 0.00, 0, 434 +cfrac, je, 06.64, 10108, 6.64, 0.00, 0, 271 +cfrac, scudo, 08.56, 4644, 8.56, 0.00, 0, 610 +cfrac, smi, 07.06, 4496, 7.06, 0.00, 0, 184 +leanN, mi, 25.91, 597148, 98.60, 1.04, 0, 190255 +leanN, sn-0.6.0-full-checks, 27.77, 662424, 108.83, 0.93, 0, 13224 +leanN, sn-0.6.0, 24.73, 534340, 94.63, 0.75, 0, 13335 +leanN, sn-0.5.3, 26.49, 539092, 101.33, 0.99, 3, 3592 +leanN, sn-0.6.0-memcpy-checks, 26.56, 530728, 106.76, 0.88, 0, 12442 +leanN, je, 27.05, 504264, 103.22, 1.17, 0, 156329 +leanN, scudo, 32.94, 612364, 117.66, 2.10, 5, 805633 +leanN, smi, 27.56, 625856, 99.23, 1.87, 1, 381154 +sed, mi, 01.74, 326504, 1.66, 0.08, 0, 403 +sed, sn-0.6.0-full-checks, 01.74, 349428, 1.65, 0.08, 0, 1596 +sed, sn-0.6.0, 01.73, 342912, 1.64, 0.08, 0, 1475 +sed, sn-0.5.3, 01.72, 310300, 1.65, 0.07, 0, 679 +sed, sn-0.6.0-memcpy-checks, 01.73, 342796, 1.64, 0.09, 0, 1474 +sed, je, 01.74, 296120, 1.65, 0.09, 0, 8271 +sed, scudo, 01.82, 245408, 1.70, 0.12, 0, 60794 +sed, smi, 01.82, 312548, 1.70, 0.11, 0, 33757 +barnes, mi, 02.86, 66808, 2.84, 0.01, 0, 2465 +barnes, sn-0.6.0-full-checks, 02.86, 65732, 2.84, 0.02, 0, 2854 +barnes, sn-0.6.0, 02.86, 65640, 2.84, 0.02, 0, 2848 +barnes, sn-0.5.3, 02.86, 70068, 2.85, 0.01, 0, 2523 +barnes, sn-0.6.0-memcpy-checks, 02.92, 65608, 2.90, 0.02, 0, 2837 +barnes, je, 02.91, 76640, 2.88, 0.02, 0, 2547 +barnes, scudo, 02.92, 63716, 2.91, 0.00, 0, 3472 +barnes, smi, 02.93, 66852, 2.89, 0.03, 0, 2658 +espresso, mi, 05.19, 8316, 5.15, 0.03, 0, 172 +espresso, sn-0.6.0-full-checks, 05.27, 12808, 5.26, 0.00, 0, 733 +espresso, sn-0.6.0, 05.14, 6296, 5.10, 0.03, 0, 658 +espresso, sn-0.5.3, 05.15, 10352, 5.14, 0.01, 0, 415 +espresso, sn-0.6.0-memcpy-checks, 05.13, 6548, 5.11, 0.01, 0, 660 +espresso, je, 05.47, 9932, 5.44, 0.02, 0, 355 +espresso, scudo, 06.13, 4804, 6.12, 0.01, 0, 633 +espresso, smi, 05.50, 6692, 5.46, 0.04, 0, 288 +z3, mi, 01.20, 71152, 1.18, 0.01, 0, 456 +z3, sn-0.6.0-full-checks, 01.17, 70276, 1.15, 0.02, 0, 767 +z3, sn-0.6.0, 01.17, 66000, 1.14, 0.02, 0, 732 +z3, sn-0.5.3, 01.17, 73528, 1.15, 0.01, 0, 564 +z3, sn-0.6.0-memcpy-checks, 01.16, 65980, 1.16, 0.00, 0, 736 +z3, je, 01.19, 65772, 1.17, 0.01, 0, 2774 +z3, scudo, 01.26, 56280, 1.24, 0.01, 0, 8661 +z3, smi, 01.24, 65928, 1.22, 0.02, 0, 3817 +gs, mi, 01.17, 57516, 1.17, 0.00, 0, 1654 +gs, sn-0.6.0-full-checks, 01.19, 54336, 1.15, 0.03, 0, 1947 +gs, sn-0.6.0, 01.18, 48316, 1.14, 0.03, 0, 1996 +gs, sn-0.5.3, 01.17, 56208, 1.15, 0.02, 0, 1872 +gs, sn-0.6.0-memcpy-checks, 01.18, 48384, 1.16, 0.02, 0, 1997 +gs, je, 01.20, 53652, 1.18, 0.02, 0, 3718 +gs, scudo, 01.21, 41432, 1.18, 0.02, 0, 17155 +gs, smi, 01.19, 56816, 1.12, 0.06, 0, 4649 +redis, mi, 4.314, 35128, 1.80, 0.36, 0, 8009 +redis, sn-0.6.0-full-checks, 4.415, 33776, 1.86, 0.36, 0, 9147 +redis, sn-0.6.0, 4.239, 30492, 1.74, 0.39, 0, 8547 +redis, sn-0.5.3, 4.277, 36992, 1.79, 0.36, 0, 7345 +redis, sn-0.6.0-memcpy-checks, 4.320, 30388, 1.80, 0.37, 0, 8544 +redis, je, 5.188, 36876, 2.28, 0.32, 0, 6771 +redis, scudo, 5.231, 37984, 2.27, 0.36, 0, 9869 +redis, smi, 4.599, 35488, 1.92, 0.39, 0, 8062 +cfrac, mi, 06.39, 4496, 6.38, 0.00, 0, 182 +cfrac, sn-0.6.0-full-checks, 06.73, 3572, 6.73, 0.00, 0, 494 +cfrac, sn-0.6.0, 06.27, 3264, 6.26, 0.00, 0, 430 +cfrac, sn-0.5.3, 06.32, 8368, 6.32, 0.00, 0, 447 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3308, 6.27, 0.00, 0, 433 +cfrac, je, 06.68, 10028, 6.68, 0.00, 0, 271 +cfrac, scudo, 08.43, 4624, 8.43, 0.00, 0, 611 +cfrac, smi, 07.06, 4468, 7.06, 0.00, 0, 181 +leanN, mi, 25.73, 599228, 96.25, 1.11, 0, 222805 +leanN, sn-0.6.0-full-checks, 27.00, 657728, 104.05, 0.79, 0, 14663 +leanN, sn-0.6.0, 25.04, 538884, 96.63, 0.76, 0, 13205 +leanN, sn-0.5.3, 25.42, 542936, 96.17, 0.85, 0, 3418 +leanN, sn-0.6.0-memcpy-checks, 25.50, 536352, 99.48, 0.93, 0, 13333 +leanN, je, 26.47, 507228, 100.33, 0.96, 0, 126259 +leanN, scudo, 33.81, 608676, 124.38, 1.69, 0, 497025 +leanN, smi, 27.40, 612612, 100.31, 1.90, 0, 403445 +sed, mi, 01.74, 330548, 1.67, 0.07, 0, 406 +sed, sn-0.6.0-full-checks, 01.76, 347356, 1.66, 0.09, 0, 1590 +sed, sn-0.6.0, 01.73, 342812, 1.65, 0.08, 0, 1454 +sed, sn-0.5.3, 01.73, 310336, 1.66, 0.07, 0, 680 +sed, sn-0.6.0-memcpy-checks, 01.73, 342988, 1.64, 0.09, 0, 1481 +sed, je, 01.73, 295408, 1.63, 0.09, 0, 9187 +sed, scudo, 01.81, 245452, 1.70, 0.10, 0, 60797 +sed, smi, 01.81, 316720, 1.69, 0.11, 0, 34280 +barnes, mi, 02.85, 66840, 2.83, 0.01, 0, 2463 +barnes, sn-0.6.0-full-checks, 02.86, 65688, 2.84, 0.02, 0, 2855 +barnes, sn-0.6.0, 02.90, 65508, 2.89, 0.00, 0, 2839 +barnes, sn-0.5.3, 02.88, 70236, 2.87, 0.01, 0, 2521 +barnes, sn-0.6.0-memcpy-checks, 02.87, 65556, 2.85, 0.02, 0, 2842 +barnes, je, 02.87, 76584, 2.87, 0.00, 0, 2549 +barnes, scudo, 02.94, 61456, 2.92, 0.02, 0, 4264 +barnes, smi, 02.94, 66808, 2.92, 0.02, 0, 2657 +espresso, mi, 05.24, 8276, 5.20, 0.03, 0, 177 +espresso, sn-0.6.0-full-checks, 05.28, 12940, 5.26, 0.02, 0, 736 +espresso, sn-0.6.0, 05.13, 6432, 5.12, 0.01, 0, 659 +espresso, sn-0.5.3, 05.27, 10284, 5.25, 0.01, 0, 413 +espresso, sn-0.6.0-memcpy-checks, 05.26, 6440, 5.23, 0.02, 0, 659 +espresso, je, 05.47, 10276, 5.46, 0.01, 0, 295 +espresso, scudo, 06.03, 4980, 6.00, 0.03, 0, 639 +espresso, smi, 05.49, 6728, 5.47, 0.02, 0, 292 +z3, mi, 01.20, 71076, 1.18, 0.01, 0, 453 +z3, sn-0.6.0-full-checks, 01.18, 70316, 1.17, 0.01, 0, 764 +z3, sn-0.6.0, 01.17, 66128, 1.16, 0.01, 0, 734 +z3, sn-0.5.3, 01.16, 73644, 1.14, 0.02, 0, 564 +z3, sn-0.6.0-memcpy-checks, 01.18, 66176, 1.17, 0.01, 0, 738 +z3, je, 01.18, 65908, 1.17, 0.01, 0, 2770 +z3, scudo, 01.25, 56216, 1.23, 0.02, 0, 8662 +z3, smi, 01.23, 66256, 1.21, 0.02, 0, 3864 +gs, mi, 01.16, 57492, 1.13, 0.02, 0, 1655 +gs, sn-0.6.0-full-checks, 01.20, 54284, 1.17, 0.02, 0, 1957 +gs, sn-0.6.0, 01.18, 48284, 1.15, 0.02, 0, 1997 +gs, sn-0.5.3, 01.17, 55884, 1.13, 0.03, 0, 1869 +gs, sn-0.6.0-memcpy-checks, 01.18, 48476, 1.15, 0.02, 0, 2006 +gs, je, 01.19, 53308, 1.16, 0.02, 0, 3724 +gs, scudo, 01.20, 41576, 1.13, 0.06, 0, 17165 +gs, smi, 01.20, 57036, 1.17, 0.03, 0, 4615 +redis, mi, 4.314, 35236, 1.84, 0.33, 0, 8029 +redis, sn-0.6.0-full-checks, 4.645, 33244, 1.93, 0.41, 0, 8835 +redis, sn-0.6.0, 4.231, 30396, 1.68, 0.44, 0, 8561 +redis, sn-0.5.3, 4.289, 37040, 1.77, 0.38, 0, 6881 +redis, sn-0.6.0-memcpy-checks, 4.328, 30440, 1.79, 0.38, 0, 8532 +redis, je, 5.107, 36840, 2.19, 0.37, 0, 6749 +redis, scudo, 5.297, 37972, 2.29, 0.37, 0, 9852 +redis, smi, 4.717, 35384, 1.97, 0.40, 0, 8022 +cfrac, mi, 06.34, 4460, 6.34, 0.00, 0, 184 +cfrac, sn-0.6.0-full-checks, 06.62, 3600, 6.61, 0.00, 0, 494 +cfrac, sn-0.6.0, 06.26, 3312, 6.26, 0.00, 0, 432 +cfrac, sn-0.5.3, 06.36, 8408, 6.36, 0.00, 0, 447 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3336, 6.27, 0.00, 0, 429 +cfrac, je, 06.64, 10028, 6.64, 0.00, 0, 269 +cfrac, scudo, 08.45, 4648, 8.45, 0.00, 0, 611 +cfrac, smi, 07.02, 4488, 7.02, 0.00, 0, 177 +leanN, mi, 25.88, 585348, 97.10, 1.24, 4, 281872 +leanN, sn-0.6.0-full-checks, 26.70, 667524, 101.43, 0.87, 0, 12013 +leanN, sn-0.6.0, 24.98, 530168, 95.53, 0.83, 0, 12350 +leanN, sn-0.5.3, 25.63, 538804, 97.85, 0.82, 0, 3326 +leanN, sn-0.6.0-memcpy-checks, 25.23, 533980, 96.62, 0.83, 0, 12236 +leanN, je, 25.87, 521736, 96.91, 1.09, 0, 178770 +leanN, scudo, 32.13, 589304, 114.72, 1.66, 0, 541686 +leanN, smi, 27.63, 611432, 101.04, 1.79, 0, 402887 +sed, mi, 01.74, 324444, 1.63, 0.11, 0, 402 +sed, sn-0.6.0-full-checks, 01.74, 347576, 1.67, 0.07, 0, 1696 +sed, sn-0.6.0, 01.73, 342840, 1.63, 0.10, 0, 1475 +sed, sn-0.5.3, 01.74, 310300, 1.67, 0.06, 0, 679 +sed, sn-0.6.0-memcpy-checks, 01.73, 342752, 1.64, 0.09, 0, 1452 +sed, je, 01.74, 293788, 1.67, 0.07, 0, 8269 +sed, scudo, 01.83, 245484, 1.70, 0.12, 0, 60795 +sed, smi, 01.81, 316640, 1.71, 0.09, 0, 34248 +barnes, mi, 02.90, 66716, 2.88, 0.01, 0, 2461 +barnes, sn-0.6.0-full-checks, 02.87, 65668, 2.84, 0.03, 0, 2853 +barnes, sn-0.6.0, 02.86, 65600, 2.86, 0.00, 0, 2837 +barnes, sn-0.5.3, 02.85, 70236, 2.82, 0.02, 0, 2516 +barnes, sn-0.6.0-memcpy-checks, 02.87, 65572, 2.84, 0.02, 0, 2839 +barnes, je, 02.88, 76748, 2.86, 0.01, 0, 2547 +barnes, scudo, 02.95, 61908, 2.93, 0.01, 0, 4305 +barnes, smi, 02.94, 66940, 2.91, 0.02, 0, 2660 +espresso, mi, 05.18, 8312, 5.15, 0.03, 0, 174 +espresso, sn-0.6.0-full-checks, 05.28, 12644, 5.25, 0.02, 0, 736 +espresso, sn-0.6.0, 05.22, 6232, 5.19, 0.02, 0, 655 +espresso, sn-0.5.3, 05.26, 10352, 5.21, 0.04, 0, 414 +espresso, sn-0.6.0-memcpy-checks, 05.20, 6260, 5.17, 0.02, 0, 656 +espresso, je, 05.47, 9972, 5.45, 0.02, 0, 307 +espresso, scudo, 06.04, 4980, 5.99, 0.04, 0, 644 +espresso, smi, 05.49, 6640, 5.47, 0.02, 0, 286 +z3, mi, 01.20, 71316, 1.19, 0.00, 0, 460 +z3, sn-0.6.0-full-checks, 01.18, 72472, 1.16, 0.02, 0, 780 +z3, sn-0.6.0, 01.16, 66120, 1.14, 0.02, 0, 733 +z3, sn-0.5.3, 01.17, 73692, 1.16, 0.01, 0, 563 +z3, sn-0.6.0-memcpy-checks, 01.17, 66212, 1.15, 0.01, 0, 737 +z3, je, 01.19, 65768, 1.17, 0.01, 0, 2772 +z3, scudo, 01.25, 56092, 1.23, 0.02, 0, 8666 +z3, smi, 01.24, 65416, 1.21, 0.02, 0, 3713 +gs, mi, 01.18, 57324, 1.14, 0.03, 0, 1650 +gs, sn-0.6.0-full-checks, 01.18, 54196, 1.17, 0.01, 0, 1955 +gs, sn-0.6.0, 01.17, 48504, 1.15, 0.02, 0, 1996 +gs, sn-0.5.3, 01.17, 56216, 1.14, 0.02, 0, 1869 +gs, sn-0.6.0-memcpy-checks, 01.17, 48144, 1.15, 0.02, 0, 1997 +gs, je, 01.20, 53332, 1.17, 0.02, 0, 3721 +gs, scudo, 01.21, 41008, 1.17, 0.04, 0, 17108 +gs, smi, 01.19, 57232, 1.16, 0.03, 0, 4622 +redis, mi, 4.669, 35204, 1.98, 0.37, 0, 7916 +redis, sn-0.6.0-full-checks, 4.429, 33292, 1.83, 0.40, 0, 8884 +redis, sn-0.6.0, 4.189, 30384, 1.69, 0.41, 0, 8580 +redis, sn-0.5.3, 4.263, 36996, 1.75, 0.39, 0, 7261 +redis, sn-0.6.0-memcpy-checks, 4.289, 30472, 1.78, 0.37, 0, 8542 +redis, je, 5.073, 36820, 2.24, 0.31, 0, 6761 +redis, scudo, 5.271, 37924, 2.22, 0.42, 0, 9862 +redis, smi, 4.612, 35620, 1.94, 0.38, 0, 8048 +cfrac, mi, 06.36, 4408, 6.36, 0.00, 0, 181 +cfrac, sn-0.6.0-full-checks, 06.71, 3668, 6.70, 0.00, 0, 511 +cfrac, sn-0.6.0, 06.43, 3328, 6.43, 0.00, 0, 434 +cfrac, sn-0.5.3, 06.38, 8252, 6.37, 0.00, 0, 442 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3420, 6.26, 0.00, 0, 435 +cfrac, je, 06.65, 10024, 6.65, 0.00, 0, 271 +cfrac, scudo, 08.45, 4568, 8.44, 0.00, 0, 610 +cfrac, smi, 07.07, 4580, 7.07, 0.00, 0, 180 +leanN, mi, 25.42, 592852, 95.62, 1.14, 0, 239072 +leanN, sn-0.6.0-full-checks, 26.04, 680596, 99.47, 0.93, 0, 14036 +leanN, sn-0.6.0, 25.93, 546076, 101.21, 0.89, 0, 12098 +leanN, sn-0.5.3, 25.65, 530420, 97.33, 0.95, 0, 3349 +leanN, sn-0.6.0-memcpy-checks, 24.51, 522388, 93.95, 0.87, 0, 11928 +leanN, je, 25.38, 538536, 95.43, 0.90, 0, 84230 +leanN, scudo, 32.97, 591284, 118.01, 1.87, 0, 721248 +leanN, smi, 28.42, 620424, 105.04, 1.68, 0, 232111 +sed, mi, 01.73, 326448, 1.65, 0.08, 0, 404 +sed, sn-0.6.0-full-checks, 01.74, 347564, 1.63, 0.10, 0, 1649 +sed, sn-0.6.0, 01.73, 342820, 1.59, 0.13, 0, 1454 +sed, sn-0.5.3, 01.73, 310348, 1.65, 0.07, 0, 685 +sed, sn-0.6.0-memcpy-checks, 01.72, 342740, 1.64, 0.08, 0, 1472 +sed, je, 01.75, 300848, 1.66, 0.08, 0, 7482 +sed, scudo, 01.80, 245376, 1.68, 0.11, 0, 60799 +sed, smi, 01.81, 316680, 1.66, 0.14, 0, 34248 +barnes, mi, 02.87, 66904, 2.84, 0.03, 0, 2463 +barnes, sn-0.6.0-full-checks, 02.87, 65620, 2.83, 0.03, 0, 2851 +barnes, sn-0.6.0, 02.84, 65664, 2.83, 0.01, 0, 2834 +barnes, sn-0.5.3, 02.92, 70236, 2.89, 0.02, 0, 2525 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65616, 2.84, 0.02, 0, 2829 +barnes, je, 02.85, 76700, 2.83, 0.01, 0, 2550 +barnes, scudo, 02.95, 61764, 2.93, 0.01, 0, 4295 +barnes, smi, 02.98, 66920, 2.95, 0.02, 0, 2657 +espresso, mi, 05.20, 8316, 5.16, 0.04, 0, 175 +espresso, sn-0.6.0-full-checks, 05.29, 12660, 5.27, 0.01, 0, 729 +espresso, sn-0.6.0, 05.24, 6204, 5.21, 0.03, 0, 658 +espresso, sn-0.5.3, 05.26, 10276, 5.23, 0.03, 0, 408 +espresso, sn-0.6.0-memcpy-checks, 05.16, 6336, 5.14, 0.01, 0, 658 +espresso, je, 05.45, 9988, 5.41, 0.03, 0, 280 +espresso, scudo, 06.04, 4780, 6.01, 0.03, 0, 641 +espresso, smi, 05.53, 6620, 5.52, 0.00, 0, 290 +z3, mi, 01.21, 71204, 1.19, 0.02, 0, 461 +z3, sn-0.6.0-full-checks, 01.19, 70084, 1.17, 0.01, 0, 759 +z3, sn-0.6.0, 01.16, 66120, 1.14, 0.01, 0, 732 +z3, sn-0.5.3, 01.17, 73648, 1.16, 0.01, 0, 569 +z3, sn-0.6.0-memcpy-checks, 01.18, 66124, 1.17, 0.01, 0, 734 +z3, je, 01.19, 65748, 1.17, 0.01, 0, 2768 +z3, scudo, 01.25, 56248, 1.23, 0.01, 0, 8657 +z3, smi, 01.23, 65568, 1.19, 0.03, 0, 3746 +gs, mi, 01.17, 57032, 1.13, 0.03, 0, 1650 +gs, sn-0.6.0-full-checks, 01.18, 54320, 1.17, 0.01, 0, 1964 +gs, sn-0.6.0, 01.18, 48436, 1.16, 0.02, 0, 1994 +gs, sn-0.5.3, 01.18, 56084, 1.14, 0.03, 0, 1874 +gs, sn-0.6.0-memcpy-checks, 01.17, 48504, 1.16, 0.01, 0, 1998 +gs, je, 01.19, 53616, 1.16, 0.03, 0, 3722 +gs, scudo, 01.20, 41332, 1.13, 0.07, 0, 17114 +gs, smi, 01.19, 56812, 1.14, 0.05, 0, 4576 +redis, mi, 4.366, 35140, 1.87, 0.32, 0, 7994 +redis, sn-0.6.0-full-checks, 4.443, 33348, 1.77, 0.46, 0, 8872 +redis, sn-0.6.0, 4.205, 30532, 1.72, 0.39, 0, 8568 +redis, sn-0.5.3, 4.324, 36948, 1.82, 0.36, 0, 7594 +redis, sn-0.6.0-memcpy-checks, 4.270, 30452, 1.82, 0.32, 0, 8553 +redis, je, 5.081, 36804, 2.16, 0.39, 0, 6746 +redis, scudo, 5.268, 38072, 2.26, 0.38, 0, 9861 +redis, smi, 4.587, 35500, 1.88, 0.43, 0, 8049 +cfrac, mi, 06.32, 4572, 6.32, 0.00, 0, 183 +cfrac, sn-0.6.0-full-checks, 06.62, 3640, 6.62, 0.00, 0, 512 +cfrac, sn-0.6.0, 06.26, 3300, 6.26, 0.00, 0, 429 +cfrac, sn-0.5.3, 06.33, 8424, 6.33, 0.00, 0, 445 +cfrac, sn-0.6.0-memcpy-checks, 06.26, 3308, 6.26, 0.00, 0, 434 +cfrac, je, 06.65, 10136, 6.65, 0.00, 0, 270 +cfrac, scudo, 08.42, 4664, 8.41, 0.00, 0, 613 +cfrac, smi, 07.06, 4640, 7.05, 0.00, 0, 182 +leanN, mi, 25.47, 597420, 94.71, 1.27, 0, 246037 +leanN, sn-0.6.0-full-checks, 27.41, 674896, 106.67, 0.85, 0, 14026 +leanN, sn-0.6.0, 24.78, 528436, 95.18, 0.90, 0, 12185 +leanN, sn-0.5.3, 27.13, 532548, 104.80, 0.79, 0, 3409 +leanN, sn-0.6.0-memcpy-checks, 24.86, 533536, 96.02, 0.79, 0, 13301 +leanN, je, 25.86, 541932, 97.58, 0.99, 0, 98387 +leanN, scudo, 33.37, 615476, 121.70, 1.84, 0, 548507 +leanN, smi, 27.35, 620144, 98.41, 1.95, 1, 396107 +sed, mi, 01.74, 326364, 1.66, 0.07, 0, 401 +sed, sn-0.6.0-full-checks, 01.79, 349524, 1.69, 0.10, 0, 1683 +sed, sn-0.6.0, 01.73, 342816, 1.63, 0.09, 0, 1453 +sed, sn-0.5.3, 01.73, 310464, 1.66, 0.07, 0, 683 +sed, sn-0.6.0-memcpy-checks, 01.73, 342864, 1.64, 0.08, 0, 1477 +sed, je, 01.74, 295660, 1.64, 0.10, 0, 9190 +sed, scudo, 01.81, 245396, 1.70, 0.11, 0, 60801 +sed, smi, 01.80, 316024, 1.65, 0.14, 0, 34081 +barnes, mi, 02.93, 66764, 2.91, 0.01, 0, 2464 +barnes, sn-0.6.0-full-checks, 02.90, 65716, 2.89, 0.01, 0, 2870 +barnes, sn-0.6.0, 02.90, 65672, 2.88, 0.01, 0, 2838 +barnes, sn-0.5.3, 02.86, 70068, 2.84, 0.01, 0, 2525 +barnes, sn-0.6.0-memcpy-checks, 02.90, 65628, 2.88, 0.01, 0, 2840 +barnes, je, 02.93, 74592, 2.91, 0.01, 0, 2546 +barnes, scudo, 02.93, 62684, 2.90, 0.02, 0, 3959 +barnes, smi, 03.04, 66940, 3.02, 0.02, 0, 2661 +espresso, mi, 05.25, 8276, 5.23, 0.02, 0, 177 +espresso, sn-0.6.0-full-checks, 05.28, 10580, 5.26, 0.02, 0, 1111 +espresso, sn-0.6.0, 05.11, 6332, 5.08, 0.03, 0, 659 +espresso, sn-0.5.3, 05.14, 10388, 5.12, 0.02, 0, 411 +espresso, sn-0.6.0-memcpy-checks, 05.11, 6336, 5.10, 0.01, 0, 658 +espresso, je, 05.46, 9816, 5.42, 0.04, 0, 300 +espresso, scudo, 06.02, 4792, 5.99, 0.02, 0, 640 +espresso, smi, 05.50, 6648, 5.46, 0.03, 0, 287 +z3, mi, 01.19, 71124, 1.16, 0.02, 0, 458 +z3, sn-0.6.0-full-checks, 01.18, 70424, 1.17, 0.01, 0, 766 +z3, sn-0.6.0, 01.17, 66156, 1.14, 0.02, 0, 737 +z3, sn-0.5.3, 01.18, 73764, 1.16, 0.01, 0, 568 +z3, sn-0.6.0-memcpy-checks, 01.18, 66104, 1.16, 0.01, 0, 736 +z3, je, 01.20, 69616, 1.19, 0.00, 0, 2198 +z3, scudo, 01.24, 56236, 1.21, 0.02, 0, 8669 +z3, smi, 01.23, 66260, 1.20, 0.02, 0, 3863 +gs, mi, 01.16, 57296, 1.15, 0.01, 0, 1655 +gs, sn-0.6.0-full-checks, 01.18, 54272, 1.16, 0.02, 0, 1944 +gs, sn-0.6.0, 01.17, 48076, 1.14, 0.03, 0, 1993 +gs, sn-0.5.3, 01.16, 56212, 1.14, 0.01, 0, 1874 +gs, sn-0.6.0-memcpy-checks, 01.18, 47980, 1.14, 0.03, 0, 1995 +gs, je, 01.19, 53280, 1.16, 0.03, 0, 3722 +gs, scudo, 01.21, 41568, 1.16, 0.05, 0, 17154 +gs, smi, 01.18, 57304, 1.14, 0.04, 0, 4728 +redis, mi, 4.340, 35180, 1.79, 0.38, 0, 8014 +redis, sn-0.6.0-full-checks, 4.441, 33556, 1.88, 0.35, 0, 8974 +redis, sn-0.6.0, 4.241, 30492, 1.70, 0.43, 0, 8554 +redis, sn-0.5.3, 4.310, 37092, 1.80, 0.37, 0, 7027 +redis, sn-0.6.0-memcpy-checks, 4.441, 30420, 1.90, 0.33, 0, 8490 +redis, je, 5.076, 36892, 2.13, 0.41, 0, 6769 +redis, scudo, 5.196, 37996, 2.24, 0.37, 0, 9886 +redis, smi, 4.613, 35436, 1.88, 0.44, 0, 8042 +cfrac, mi, 06.32, 4596, 6.32, 0.00, 0, 183 +cfrac, sn-0.6.0-full-checks, 06.61, 3632, 6.61, 0.00, 0, 504 +cfrac, sn-0.6.0, 06.26, 3272, 6.25, 0.00, 0, 431 +cfrac, sn-0.5.3, 06.32, 8468, 6.31, 0.00, 0, 444 +cfrac, sn-0.6.0-memcpy-checks, 06.26, 3312, 6.26, 0.00, 0, 432 +cfrac, je, 06.66, 9992, 6.66, 0.00, 0, 271 +cfrac, scudo, 08.43, 4656, 8.43, 0.00, 0, 614 +cfrac, smi, 07.04, 4640, 7.03, 0.00, 0, 182 +leanN, mi, 25.58, 577132, 96.07, 1.15, 0, 224607 +leanN, sn-0.6.0-full-checks, 26.09, 670788, 99.44, 0.85, 0, 13183 +leanN, sn-0.6.0, 25.24, 540816, 98.88, 0.86, 0, 13196 +leanN, sn-0.5.3, 26.48, 553936, 101.92, 0.96, 1, 3961 +leanN, sn-0.6.0-memcpy-checks, 24.50, 526484, 93.75, 0.78, 0, 11139 +leanN, je, 26.93, 532264, 104.61, 1.12, 0, 99886 +leanN, scudo, 32.45, 594880, 117.90, 1.73, 0, 569976 +leanN, smi, 27.77, 626188, 101.15, 1.75, 0, 326530 +sed, mi, 01.74, 326444, 1.65, 0.09, 0, 401 +sed, sn-0.6.0-full-checks, 01.74, 347336, 1.64, 0.09, 0, 1539 +sed, sn-0.6.0, 01.72, 342792, 1.62, 0.10, 0, 1473 +sed, sn-0.5.3, 01.74, 310380, 1.66, 0.07, 0, 682 +sed, sn-0.6.0-memcpy-checks, 01.74, 342876, 1.65, 0.09, 0, 1479 +sed, je, 01.74, 301688, 1.65, 0.08, 0, 5053 +sed, scudo, 01.81, 245436, 1.71, 0.09, 0, 60800 +sed, smi, 01.81, 316808, 1.72, 0.08, 0, 34267 +barnes, mi, 02.84, 66772, 2.83, 0.01, 0, 2465 +barnes, sn-0.6.0-full-checks, 02.85, 65668, 2.84, 0.01, 0, 2850 +barnes, sn-0.6.0, 02.88, 65672, 2.86, 0.01, 0, 2846 +barnes, sn-0.5.3, 02.91, 69964, 2.89, 0.02, 0, 2519 +barnes, sn-0.6.0-memcpy-checks, 02.93, 65692, 2.91, 0.01, 0, 2840 +barnes, je, 02.93, 74488, 2.90, 0.03, 0, 2544 +barnes, scudo, 02.92, 65468, 2.89, 0.02, 0, 3574 +barnes, smi, 02.98, 66852, 2.97, 0.01, 0, 2657 +espresso, mi, 05.25, 8288, 5.23, 0.02, 0, 176 +espresso, sn-0.6.0-full-checks, 05.33, 12672, 5.29, 0.04, 0, 744 +espresso, sn-0.6.0, 05.16, 6204, 5.14, 0.02, 0, 657 +espresso, sn-0.5.3, 05.22, 10328, 5.20, 0.02, 0, 413 +espresso, sn-0.6.0-memcpy-checks, 05.16, 6204, 5.13, 0.03, 0, 655 +espresso, je, 05.50, 9992, 5.46, 0.04, 0, 331 +espresso, scudo, 06.08, 4932, 6.07, 0.01, 0, 640 +espresso, smi, 05.55, 6648, 5.51, 0.04, 0, 287 +z3, mi, 01.21, 71384, 1.19, 0.01, 0, 461 +z3, sn-0.6.0-full-checks, 01.20, 70352, 1.19, 0.01, 0, 763 +z3, sn-0.6.0, 01.19, 66132, 1.16, 0.02, 0, 740 +z3, sn-0.5.3, 01.18, 73564, 1.17, 0.01, 0, 565 +z3, sn-0.6.0-memcpy-checks, 01.22, 66008, 1.20, 0.02, 0, 732 +z3, je, 01.20, 65816, 1.18, 0.02, 0, 2768 +z3, scudo, 01.27, 56472, 1.25, 0.01, 0, 8671 +z3, smi, 01.26, 65784, 1.24, 0.02, 0, 3787 +gs, mi, 01.18, 57240, 1.14, 0.03, 0, 1653 +gs, sn-0.6.0-full-checks, 01.19, 54336, 1.15, 0.03, 0, 2006 +gs, sn-0.6.0, 01.19, 48452, 1.14, 0.05, 0, 1998 +gs, sn-0.5.3, 01.18, 56072, 1.15, 0.02, 0, 1875 +gs, sn-0.6.0-memcpy-checks, 01.19, 48452, 1.15, 0.04, 0, 1998 +gs, je, 01.21, 53448, 1.19, 0.01, 0, 3717 +gs, scudo, 01.23, 41300, 1.18, 0.04, 0, 17115 +gs, smi, 01.19, 57044, 1.14, 0.05, 0, 4617 +redis, mi, 4.328, 35216, 1.77, 0.40, 0, 8017 +redis, sn-0.6.0-full-checks, 4.438, 33844, 1.91, 0.31, 0, 9062 +redis, sn-0.6.0, 4.162, 30416, 1.72, 0.37, 0, 8595 +redis, sn-0.5.3, 4.342, 36944, 1.80, 0.38, 0, 7593 +redis, sn-0.6.0-memcpy-checks, 4.367, 30356, 1.80, 0.39, 0, 8516 +redis, je, 5.139, 36888, 2.16, 0.42, 0, 6762 +redis, scudo, 5.432, 37996, 2.35, 0.38, 0, 9824 +redis, smi, 4.675, 35420, 1.96, 0.39, 0, 8044 +cfrac, mi, 06.35, 4492, 6.35, 0.00, 0, 184 +cfrac, sn-0.6.0-full-checks, 06.64, 3604, 6.64, 0.00, 0, 498 +cfrac, sn-0.6.0, 06.27, 3276, 6.27, 0.00, 0, 429 +cfrac, sn-0.5.3, 06.35, 8424, 6.34, 0.00, 0, 444 +cfrac, sn-0.6.0-memcpy-checks, 06.29, 3336, 6.29, 0.00, 0, 430 +cfrac, je, 06.66, 10020, 6.66, 0.00, 0, 271 +cfrac, scudo, 10.05, 4680, 10.05, 0.00, 0, 612 +cfrac, smi, 07.09, 4600, 7.09, 0.00, 0, 182 +leanN, mi, 25.94, 591264, 98.57, 1.13, 0, 184057 +leanN, sn-0.6.0-full-checks, 27.52, 671932, 105.43, 0.99, 0, 15234 +leanN, sn-0.6.0, 25.06, 516652, 97.97, 0.97, 0, 11333 +leanN, sn-0.5.3, 26.71, 540632, 103.48, 0.85, 5, 3013 +leanN, sn-0.6.0-memcpy-checks, 26.06, 528052, 101.95, 0.89, 0, 11474 +leanN, je, 25.78, 527208, 96.93, 1.01, 0, 119894 +leanN, scudo, 34.30, 593504, 123.18, 2.01, 0, 558728 +leanN, smi, 27.56, 625864, 101.01, 1.84, 2, 356616 +sed, mi, 01.73, 330580, 1.65, 0.07, 0, 406 +sed, sn-0.6.0-full-checks, 01.73, 345648, 1.65, 0.07, 0, 1696 +sed, sn-0.6.0, 01.73, 342792, 1.64, 0.08, 0, 1478 +sed, sn-0.5.3, 01.73, 310388, 1.67, 0.06, 0, 683 +sed, sn-0.6.0-memcpy-checks, 01.73, 342804, 1.64, 0.08, 0, 1455 +sed, je, 01.74, 301020, 1.67, 0.06, 0, 6483 +sed, scudo, 01.82, 245464, 1.68, 0.13, 0, 60798 +sed, smi, 01.81, 315672, 1.69, 0.11, 0, 34008 +barnes, mi, 02.89, 66772, 2.87, 0.02, 0, 2459 +barnes, sn-0.6.0-full-checks, 02.93, 65632, 2.92, 0.01, 0, 2854 +barnes, sn-0.6.0, 02.92, 65664, 2.91, 0.01, 0, 2840 +barnes, sn-0.5.3, 02.90, 69944, 2.87, 0.02, 0, 2518 +barnes, sn-0.6.0-memcpy-checks, 02.85, 65772, 2.83, 0.02, 0, 2825 +barnes, je, 02.91, 76676, 2.89, 0.01, 0, 2547 +barnes, scudo, 02.90, 62304, 2.87, 0.02, 0, 3184 +barnes, smi, 02.94, 66860, 2.92, 0.02, 0, 2656 +espresso, mi, 05.18, 8196, 5.14, 0.03, 0, 172 +espresso, sn-0.6.0-full-checks, 05.26, 12632, 5.22, 0.03, 0, 727 +espresso, sn-0.6.0, 05.11, 6296, 5.09, 0.02, 0, 656 +espresso, sn-0.5.3, 05.16, 10312, 5.14, 0.02, 0, 411 +espresso, sn-0.6.0-memcpy-checks, 05.11, 6300, 5.07, 0.04, 0, 655 +espresso, je, 05.44, 9796, 5.42, 0.02, 0, 317 +espresso, scudo, 06.07, 4884, 6.05, 0.02, 0, 642 +espresso, smi, 05.48, 6700, 5.45, 0.02, 0, 287 +z3, mi, 01.20, 71148, 1.18, 0.01, 0, 457 +z3, sn-0.6.0-full-checks, 01.19, 70152, 1.15, 0.03, 0, 756 +z3, sn-0.6.0, 01.17, 66088, 1.15, 0.01, 0, 741 +z3, sn-0.5.3, 01.17, 73680, 1.16, 0.01, 0, 565 +z3, sn-0.6.0-memcpy-checks, 01.17, 66100, 1.16, 0.00, 0, 735 +z3, je, 01.18, 65736, 1.17, 0.01, 0, 2769 +z3, scudo, 01.26, 56144, 1.23, 0.02, 0, 8153 +z3, smi, 01.23, 65912, 1.21, 0.01, 0, 3802 +gs, mi, 01.17, 57300, 1.13, 0.03, 0, 1657 +gs, sn-0.6.0-full-checks, 01.19, 54004, 1.17, 0.01, 0, 1957 +gs, sn-0.6.0, 01.17, 48348, 1.15, 0.02, 0, 1996 +gs, sn-0.5.3, 01.17, 56240, 1.14, 0.02, 0, 1875 +gs, sn-0.6.0-memcpy-checks, 01.18, 48300, 1.16, 0.01, 0, 1997 +gs, je, 01.19, 53460, 1.15, 0.03, 0, 3726 +gs, scudo, 01.21, 41428, 1.18, 0.03, 0, 17120 +gs, smi, 01.19, 57004, 1.14, 0.04, 0, 4609 +redis, mi, 4.329, 35124, 1.79, 0.38, 0, 8018 +redis, sn-0.6.0-full-checks, 4.395, 33452, 1.84, 0.37, 0, 8967 +redis, sn-0.6.0, 4.202, 30528, 1.74, 0.37, 0, 8568 +redis, sn-0.5.3, 4.314, 37140, 1.73, 0.43, 0, 7378 +redis, sn-0.6.0-memcpy-checks, 4.367, 30380, 1.81, 0.38, 0, 8527 +redis, je, 5.193, 36928, 2.21, 0.40, 0, 6766 +redis, scudo, 5.236, 37972, 2.31, 0.32, 0, 9876 +redis, smi, 4.599, 35436, 1.85, 0.46, 0, 8052 +cfrac, mi, 06.33, 4496, 6.33, 0.00, 0, 182 +cfrac, sn-0.6.0-full-checks, 06.62, 3584, 6.62, 0.00, 0, 495 +cfrac, sn-0.6.0, 06.28, 3312, 6.28, 0.00, 0, 435 +cfrac, sn-0.5.3, 06.32, 8420, 6.32, 0.00, 0, 447 +cfrac, sn-0.6.0-memcpy-checks, 06.26, 3336, 6.26, 0.00, 0, 431 +cfrac, je, 06.68, 10084, 6.68, 0.00, 0, 273 +cfrac, scudo, 08.43, 4672, 8.43, 0.00, 0, 615 +cfrac, smi, 07.02, 4504, 7.01, 0.00, 0, 182 +leanN, mi, 25.94, 578956, 98.00, 1.17, 0, 216122 +leanN, sn-0.6.0-full-checks, 25.72, 685952, 96.81, 0.91, 0, 12094 +leanN, sn-0.6.0, 24.59, 547572, 94.33, 0.82, 0, 11927 +leanN, sn-0.5.3, 26.72, 553304, 103.27, 0.74, 0, 3848 +leanN, sn-0.6.0-memcpy-checks, 25.88, 525276, 101.33, 0.80, 0, 12426 +leanN, je, 25.71, 531268, 96.34, 1.03, 1, 177553 +leanN, scudo, 32.33, 597856, 116.09, 1.80, 0, 515053 +leanN, smi, 27.63, 626168, 101.20, 1.82, 0, 328983 +sed, mi, 01.77, 326436, 1.71, 0.05, 0, 402 +sed, sn-0.6.0-full-checks, 01.79, 347652, 1.70, 0.09, 0, 1639 +sed, sn-0.6.0, 01.76, 342800, 1.69, 0.07, 0, 1456 +sed, sn-0.5.3, 01.77, 310252, 1.70, 0.07, 0, 685 +sed, sn-0.6.0-memcpy-checks, 01.78, 342860, 1.67, 0.10, 0, 1480 +sed, je, 01.79, 293892, 1.71, 0.07, 0, 8287 +sed, scudo, 01.83, 245420, 1.71, 0.12, 0, 60795 +sed, smi, 01.83, 316888, 1.69, 0.14, 0, 34282 +barnes, mi, 02.85, 66972, 2.82, 0.03, 0, 2465 +barnes, sn-0.6.0-full-checks, 02.87, 65580, 2.85, 0.01, 0, 2854 +barnes, sn-0.6.0, 02.89, 65644, 2.87, 0.01, 0, 2849 +barnes, sn-0.5.3, 02.86, 70168, 2.84, 0.02, 0, 2518 +barnes, sn-0.6.0-memcpy-checks, 02.90, 65808, 2.87, 0.02, 0, 2847 +barnes, je, 02.86, 76616, 2.84, 0.01, 0, 2550 +barnes, scudo, 02.93, 61688, 2.90, 0.02, 0, 4275 +barnes, smi, 02.94, 66828, 2.92, 0.01, 0, 2659 +espresso, mi, 05.17, 8216, 5.15, 0.02, 0, 177 +espresso, sn-0.6.0-full-checks, 05.27, 12940, 5.25, 0.02, 0, 757 +espresso, sn-0.6.0, 05.13, 6248, 5.10, 0.02, 0, 657 +espresso, sn-0.5.3, 05.12, 10276, 5.09, 0.02, 0, 408 +espresso, sn-0.6.0-memcpy-checks, 05.14, 6252, 5.09, 0.04, 0, 657 +espresso, je, 05.46, 9956, 5.45, 0.01, 0, 300 +espresso, scudo, 06.02, 4996, 5.99, 0.03, 0, 642 +espresso, smi, 05.48, 6644, 5.45, 0.02, 0, 287 +z3, mi, 01.19, 71072, 1.18, 0.01, 0, 454 +z3, sn-0.6.0-full-checks, 01.18, 70108, 1.16, 0.01, 0, 766 +z3, sn-0.6.0, 01.17, 66224, 1.15, 0.01, 0, 738 +z3, sn-0.5.3, 01.18, 73504, 1.16, 0.01, 0, 562 +z3, sn-0.6.0-memcpy-checks, 01.18, 66176, 1.15, 0.02, 0, 737 +z3, je, 01.19, 70012, 1.17, 0.02, 0, 2781 +z3, scudo, 01.26, 56208, 1.21, 0.04, 0, 8662 +z3, smi, 01.24, 65296, 1.22, 0.02, 0, 3679 +gs, mi, 01.17, 57476, 1.13, 0.03, 0, 1656 +gs, sn-0.6.0-full-checks, 01.18, 54372, 1.15, 0.02, 0, 1953 +gs, sn-0.6.0, 01.17, 48396, 1.14, 0.02, 0, 1999 +gs, sn-0.5.3, 01.16, 56036, 1.12, 0.03, 0, 1875 +gs, sn-0.6.0-memcpy-checks, 01.17, 48444, 1.14, 0.02, 0, 1997 +gs, je, 01.19, 53260, 1.16, 0.02, 0, 3722 +gs, scudo, 01.21, 41520, 1.17, 0.03, 0, 17159 +gs, smi, 01.19, 56596, 1.17, 0.02, 0, 4579 +redis, mi, 4.335, 35180, 1.80, 0.38, 0, 8010 +redis, sn-0.6.0-full-checks, 4.486, 33236, 1.84, 0.42, 0, 8862 +redis, sn-0.6.0, 4.217, 30432, 1.76, 0.36, 0, 8574 +redis, sn-0.5.3, 4.326, 37000, 1.80, 0.37, 0, 7342 +redis, sn-0.6.0-memcpy-checks, 4.263, 30432, 1.80, 0.34, 0, 8561 +redis, je, 5.044, 36848, 2.17, 0.36, 0, 6771 +redis, scudo, 5.209, 38000, 2.16, 0.45, 0, 9888 +redis, smi, 4.618, 35860, 2.01, 0.31, 0, 8127 +cfrac, mi, 06.33, 4548, 6.32, 0.00, 0, 185 +cfrac, sn-0.6.0-full-checks, 06.63, 3632, 6.63, 0.00, 0, 510 +cfrac, sn-0.6.0, 06.26, 3336, 6.26, 0.00, 0, 435 +cfrac, sn-0.5.3, 06.33, 8404, 6.32, 0.00, 0, 446 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3336, 6.27, 0.00, 0, 432 +cfrac, je, 06.68, 10080, 6.68, 0.00, 0, 272 +cfrac, scudo, 08.44, 4644, 8.43, 0.00, 0, 610 +cfrac, smi, 07.05, 4572, 7.05, 0.00, 0, 178 +leanN, mi, 26.35, 601364, 100.71, 1.07, 0, 172904 +leanN, sn-0.6.0-full-checks, 26.27, 673524, 100.89, 0.85, 0, 13522 +leanN, sn-0.6.0, 25.81, 534344, 100.53, 0.99, 0, 12584 +leanN, sn-0.5.3, 25.97, 547068, 99.19, 0.88, 0, 3336 +leanN, sn-0.6.0-memcpy-checks, 24.65, 529484, 93.79, 0.87, 0, 12943 +leanN, je, 26.09, 530264, 98.26, 1.12, 0, 153838 +leanN, scudo, 32.48, 621548, 116.91, 1.89, 2, 604914 +leanN, smi, 26.99, 617668, 97.22, 1.82, 0, 395341 +sed, mi, 01.74, 326300, 1.65, 0.08, 0, 398 +sed, sn-0.6.0-full-checks, 01.76, 349592, 1.69, 0.07, 0, 1642 +sed, sn-0.6.0, 01.76, 342784, 1.64, 0.12, 0, 1476 +sed, sn-0.5.3, 01.77, 310252, 1.68, 0.08, 0, 683 +sed, sn-0.6.0-memcpy-checks, 01.76, 342828, 1.65, 0.10, 0, 1454 +sed, je, 01.73, 301076, 1.62, 0.10, 0, 5991 +sed, scudo, 01.81, 245472, 1.69, 0.12, 0, 60797 +sed, smi, 01.81, 317340, 1.70, 0.10, 0, 34402 +barnes, mi, 02.88, 66928, 2.85, 0.02, 0, 2466 +barnes, sn-0.6.0-full-checks, 02.88, 65744, 2.87, 0.01, 0, 2863 +barnes, sn-0.6.0, 02.88, 65636, 2.86, 0.02, 0, 2848 +barnes, sn-0.5.3, 02.87, 70164, 2.84, 0.02, 0, 2523 +barnes, sn-0.6.0-memcpy-checks, 02.87, 65672, 2.86, 0.01, 0, 2840 +barnes, je, 02.87, 76688, 2.84, 0.02, 0, 2551 +barnes, scudo, 02.93, 63700, 2.92, 0.00, 0, 3472 +barnes, smi, 02.95, 66776, 2.93, 0.01, 0, 2660 +espresso, mi, 05.18, 8220, 5.15, 0.02, 0, 173 +espresso, sn-0.6.0-full-checks, 05.27, 12588, 5.26, 0.01, 0, 725 +espresso, sn-0.6.0, 05.13, 6432, 5.11, 0.01, 0, 656 +espresso, sn-0.5.3, 05.17, 10280, 5.15, 0.02, 0, 410 +espresso, sn-0.6.0-memcpy-checks, 05.13, 6268, 5.10, 0.03, 0, 658 +espresso, je, 05.48, 9980, 5.45, 0.03, 0, 317 +espresso, scudo, 06.13, 4980, 6.10, 0.02, 0, 604 +espresso, smi, 05.57, 6644, 5.55, 0.02, 0, 285 +z3, mi, 01.21, 71192, 1.19, 0.01, 0, 459 +z3, sn-0.6.0-full-checks, 01.19, 70400, 1.19, 0.00, 0, 772 +z3, sn-0.6.0, 01.18, 65980, 1.17, 0.01, 0, 736 +z3, sn-0.5.3, 01.18, 73772, 1.16, 0.02, 0, 568 +z3, sn-0.6.0-memcpy-checks, 01.20, 65928, 1.18, 0.01, 0, 731 +z3, je, 01.21, 65892, 1.19, 0.01, 0, 2772 +z3, scudo, 01.26, 56160, 1.23, 0.03, 0, 8667 +z3, smi, 01.25, 65920, 1.22, 0.02, 0, 3847 +gs, mi, 01.18, 57096, 1.15, 0.03, 0, 1652 +gs, sn-0.6.0-full-checks, 01.22, 56300, 1.18, 0.03, 0, 1960 +gs, sn-0.6.0, 01.19, 48464, 1.15, 0.03, 0, 1999 +gs, sn-0.5.3, 01.19, 56172, 1.16, 0.02, 0, 1872 +gs, sn-0.6.0-memcpy-checks, 01.19, 47956, 1.17, 0.02, 0, 1990 +gs, je, 01.21, 53468, 1.18, 0.02, 0, 3719 +gs, scudo, 01.23, 41448, 1.15, 0.07, 0, 17161 +gs, smi, 01.21, 57184, 1.18, 0.02, 0, 4672 +redis, mi, 4.357, 35104, 1.80, 0.39, 0, 7991 +redis, sn-0.6.0-full-checks, 4.432, 33844, 1.81, 0.42, 0, 9037 +redis, sn-0.6.0, 4.178, 30424, 1.69, 0.40, 0, 8592 +redis, sn-0.5.3, 4.310, 37016, 1.79, 0.37, 0, 7484 +redis, sn-0.6.0-memcpy-checks, 4.422, 30536, 1.94, 0.28, 0, 8505 +redis, je, 5.238, 36860, 2.25, 0.37, 0, 6765 +redis, scudo, 5.234, 37960, 2.21, 0.42, 0, 9891 +redis, smi, 4.669, 35336, 1.94, 0.41, 0, 8028 +cfrac, mi, 06.35, 4408, 6.35, 0.00, 0, 181 +cfrac, sn-0.6.0-full-checks, 06.61, 3708, 6.61, 0.00, 0, 516 +cfrac, sn-0.6.0, 06.29, 3304, 6.29, 0.00, 0, 436 +cfrac, sn-0.5.3, 06.33, 8260, 6.33, 0.00, 0, 448 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3336, 6.27, 0.00, 0, 432 +cfrac, je, 06.69, 10076, 6.68, 0.00, 0, 269 +cfrac, scudo, 08.43, 4708, 8.43, 0.00, 0, 610 +cfrac, smi, 07.14, 4596, 7.13, 0.00, 0, 184 +leanN, mi, 26.06, 591428, 99.05, 1.13, 3, 226227 +leanN, sn-0.6.0-full-checks, 26.06, 653060, 99.23, 1.01, 0, 12978 +leanN, sn-0.6.0, 25.81, 539980, 100.77, 0.90, 0, 13684 +leanN, sn-0.5.3, 25.73, 533320, 97.81, 0.66, 0, 3598 +leanN, sn-0.6.0-memcpy-checks, 25.97, 551676, 101.63, 0.87, 0, 14181 +leanN, je, 26.63, 496704, 101.91, 1.14, 1, 152314 +leanN, scudo, 32.46, 581268, 116.25, 1.70, 0, 531142 +leanN, smi, 27.36, 621904, 99.41, 2.03, 4, 392238 +sed, mi, 01.76, 326452, 1.68, 0.07, 0, 401 +sed, sn-0.6.0-full-checks, 01.75, 347260, 1.66, 0.08, 0, 1585 +sed, sn-0.6.0, 01.73, 342852, 1.63, 0.09, 0, 1479 +sed, sn-0.5.3, 01.73, 310284, 1.66, 0.07, 0, 682 +sed, sn-0.6.0-memcpy-checks, 01.73, 342800, 1.65, 0.08, 0, 1478 +sed, je, 01.74, 295456, 1.68, 0.06, 0, 9177 +sed, scudo, 01.81, 245652, 1.68, 0.13, 0, 60802 +sed, smi, 01.81, 317072, 1.70, 0.11, 0, 34381 +barnes, mi, 02.86, 66820, 2.84, 0.02, 0, 2464 +barnes, sn-0.6.0-full-checks, 02.87, 65616, 2.85, 0.01, 0, 2855 +barnes, sn-0.6.0, 02.88, 65684, 2.85, 0.02, 0, 2840 +barnes, sn-0.5.3, 02.85, 70132, 2.83, 0.01, 0, 2524 +barnes, sn-0.6.0-memcpy-checks, 02.87, 65672, 2.85, 0.02, 0, 2839 +barnes, je, 02.88, 78712, 2.87, 0.00, 0, 2546 +barnes, scudo, 02.95, 61940, 2.94, 0.01, 0, 4267 +barnes, smi, 02.98, 66836, 2.96, 0.01, 0, 2659 +espresso, mi, 05.19, 8196, 5.17, 0.01, 0, 169 +espresso, sn-0.6.0-full-checks, 05.28, 12652, 5.26, 0.02, 0, 733 +espresso, sn-0.6.0, 05.16, 6364, 5.15, 0.01, 0, 660 +espresso, sn-0.5.3, 05.13, 10292, 5.11, 0.02, 0, 411 +espresso, sn-0.6.0-memcpy-checks, 05.12, 6280, 5.10, 0.02, 0, 659 +espresso, je, 05.48, 9832, 5.46, 0.01, 0, 345 +espresso, scudo, 06.03, 4896, 6.01, 0.02, 0, 647 +espresso, smi, 05.48, 6700, 5.46, 0.02, 0, 286 +z3, mi, 01.20, 71152, 1.18, 0.02, 0, 457 +z3, sn-0.6.0-full-checks, 01.19, 70324, 1.16, 0.02, 0, 763 +z3, sn-0.6.0, 01.17, 65920, 1.15, 0.02, 0, 731 +z3, sn-0.5.3, 01.16, 73628, 1.14, 0.02, 0, 564 +z3, sn-0.6.0-memcpy-checks, 01.18, 66240, 1.15, 0.02, 0, 736 +z3, je, 01.18, 68604, 1.17, 0.01, 0, 2431 +z3, scudo, 01.26, 56284, 1.25, 0.01, 0, 8161 +z3, smi, 01.23, 65772, 1.21, 0.01, 0, 3727 +gs, mi, 01.17, 57540, 1.15, 0.02, 0, 1657 +gs, sn-0.6.0-full-checks, 01.19, 54320, 1.15, 0.03, 0, 1961 +gs, sn-0.6.0, 01.17, 48424, 1.14, 0.02, 0, 1998 +gs, sn-0.5.3, 01.18, 55808, 1.13, 0.03, 0, 1871 +gs, sn-0.6.0-memcpy-checks, 01.18, 48492, 1.17, 0.01, 0, 2005 +gs, je, 01.19, 53572, 1.15, 0.04, 0, 3728 +gs, scudo, 01.21, 41320, 1.15, 0.05, 0, 17111 +gs, smi, 01.22, 56964, 1.17, 0.04, 0, 4612 +redis, mi, 4.489, 35192, 1.81, 0.44, 0, 7969 +redis, sn-0.6.0-full-checks, 4.408, 33576, 1.89, 0.32, 0, 9014 +redis, sn-0.6.0, 4.193, 30440, 1.71, 0.40, 0, 8578 +redis, sn-0.5.3, 4.289, 37084, 1.80, 0.35, 0, 7347 +redis, sn-0.6.0-memcpy-checks, 4.294, 30464, 1.79, 0.37, 0, 8546 +redis, je, 5.052, 38772, 2.20, 0.34, 0, 6761 +redis, scudo, 5.244, 38000, 2.19, 0.44, 0, 9869 +redis, smi, 4.599, 35432, 1.87, 0.44, 0, 8057 +cfrac, mi, 06.34, 4596, 6.34, 0.00, 0, 185 +cfrac, sn-0.6.0-full-checks, 06.64, 3660, 6.64, 0.00, 0, 508 +cfrac, sn-0.6.0, 06.26, 3328, 6.26, 0.00, 0, 428 +cfrac, sn-0.5.3, 06.31, 8308, 6.30, 0.00, 0, 448 +cfrac, sn-0.6.0-memcpy-checks, 06.28, 3336, 6.28, 0.00, 0, 432 +cfrac, je, 06.72, 10056, 6.71, 0.00, 0, 271 +cfrac, scudo, 08.45, 4568, 8.44, 0.00, 0, 615 +cfrac, smi, 07.03, 4584, 7.03, 0.00, 0, 181 +leanN, mi, 26.55, 593452, 101.72, 1.13, 0, 230978 +leanN, sn-0.6.0-full-checks, 26.62, 676740, 102.76, 0.92, 0, 13834 +leanN, sn-0.6.0, 24.98, 545136, 96.54, 0.80, 0, 12708 +leanN, sn-0.5.3, 25.65, 551156, 97.81, 0.78, 4, 4057 +leanN, sn-0.6.0-memcpy-checks, 25.64, 536612, 100.11, 0.84, 0, 13271 +leanN, je, 26.16, 515292, 100.02, 0.99, 3, 110034 +leanN, scudo, 31.78, 599964, 114.04, 1.60, 4, 654494 +leanN, smi, 27.40, 619364, 99.87, 1.82, 0, 362050 +sed, mi, 01.75, 330536, 1.64, 0.11, 0, 404 +sed, sn-0.6.0-full-checks, 01.74, 347536, 1.62, 0.11, 0, 1703 +sed, sn-0.6.0, 01.74, 342788, 1.64, 0.10, 0, 1477 +sed, sn-0.5.3, 01.72, 310240, 1.64, 0.08, 0, 682 +sed, sn-0.6.0-memcpy-checks, 01.72, 342916, 1.64, 0.08, 0, 1477 +sed, je, 01.74, 295444, 1.66, 0.07, 0, 9184 +sed, scudo, 01.80, 245660, 1.70, 0.10, 0, 60801 +sed, smi, 01.82, 316496, 1.70, 0.12, 0, 34208 +barnes, mi, 02.86, 66908, 2.83, 0.01, 0, 2465 +barnes, sn-0.6.0-full-checks, 02.85, 65632, 2.83, 0.01, 0, 2853 +barnes, sn-0.6.0, 02.85, 65604, 2.84, 0.01, 0, 2829 +barnes, sn-0.5.3, 02.86, 69948, 2.84, 0.01, 0, 2525 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65692, 2.85, 0.00, 0, 2843 +barnes, je, 02.90, 74568, 2.88, 0.01, 0, 2547 +barnes, scudo, 02.86, 63084, 2.84, 0.01, 0, 3492 +barnes, smi, 02.94, 66784, 2.92, 0.02, 0, 2661 +espresso, mi, 05.19, 8288, 5.15, 0.04, 0, 177 +espresso, sn-0.6.0-full-checks, 05.27, 12648, 5.24, 0.03, 0, 737 +espresso, sn-0.6.0, 05.09, 6296, 5.07, 0.02, 0, 656 +espresso, sn-0.5.3, 05.14, 10328, 5.11, 0.02, 0, 412 +espresso, sn-0.6.0-memcpy-checks, 05.12, 6304, 5.09, 0.02, 0, 655 +espresso, je, 05.44, 9864, 5.41, 0.03, 0, 304 +espresso, scudo, 06.03, 4796, 6.01, 0.02, 0, 642 +espresso, smi, 05.48, 6708, 5.46, 0.02, 0, 291 +z3, mi, 01.18, 71348, 1.15, 0.03, 0, 459 +z3, sn-0.6.0-full-checks, 01.17, 70352, 1.15, 0.01, 0, 769 +z3, sn-0.6.0, 01.17, 66064, 1.15, 0.01, 0, 732 +z3, sn-0.5.3, 01.17, 73444, 1.16, 0.01, 0, 564 +z3, sn-0.6.0-memcpy-checks, 01.18, 66208, 1.17, 0.01, 0, 736 +z3, je, 01.18, 65876, 1.16, 0.01, 0, 2782 +z3, scudo, 01.25, 56096, 1.22, 0.02, 0, 8667 +z3, smi, 01.24, 65752, 1.21, 0.02, 0, 3822 +gs, mi, 01.17, 57476, 1.14, 0.02, 0, 1655 +gs, sn-0.6.0-full-checks, 01.19, 53988, 1.16, 0.02, 0, 2022 +gs, sn-0.6.0, 01.19, 48128, 1.17, 0.01, 0, 1997 +gs, sn-0.5.3, 01.17, 55792, 1.13, 0.03, 0, 1869 +gs, sn-0.6.0-memcpy-checks, 01.19, 48212, 1.16, 0.02, 0, 2001 +gs, je, 01.21, 53748, 1.18, 0.02, 0, 3721 +gs, scudo, 01.20, 41096, 1.16, 0.04, 0, 17116 +gs, smi, 01.19, 57064, 1.15, 0.04, 0, 4646 +redis, mi, 4.386, 35048, 1.86, 0.34, 0, 7998 +redis, sn-0.6.0-full-checks, 4.478, 33468, 1.91, 0.34, 0, 8968 +redis, sn-0.6.0, 4.199, 30328, 1.78, 0.33, 0, 8578 +redis, sn-0.5.3, 4.262, 37176, 1.76, 0.38, 0, 6883 +redis, sn-0.6.0-memcpy-checks, 4.283, 30304, 1.82, 0.34, 0, 8541 +redis, je, 5.047, 36796, 2.15, 0.39, 0, 6770 +redis, scudo, 5.271, 38052, 2.23, 0.41, 0, 9871 +redis, smi, 4.596, 35492, 1.89, 0.42, 0, 8051 +cfrac, mi, 06.34, 4460, 6.34, 0.00, 0, 182 +cfrac, sn-0.6.0-full-checks, 06.61, 3604, 6.60, 0.00, 0, 494 +cfrac, sn-0.6.0, 06.27, 3312, 6.27, 0.00, 0, 430 +cfrac, sn-0.5.3, 06.33, 8404, 6.32, 0.00, 0, 442 +cfrac, sn-0.6.0-memcpy-checks, 06.26, 3320, 6.26, 0.00, 0, 428 +cfrac, je, 06.65, 10052, 6.65, 0.00, 0, 271 +cfrac, scudo, 08.43, 4660, 8.42, 0.00, 0, 613 +cfrac, smi, 07.14, 4500, 7.13, 0.00, 0, 181 +leanN, mi, 27.12, 578536, 105.64, 1.15, 2, 163365 +leanN, sn-0.6.0-full-checks, 26.12, 671828, 98.39, 0.91, 0, 13146 +leanN, sn-0.6.0, 24.71, 546284, 94.03, 0.79, 0, 12227 +leanN, sn-0.5.3, 25.67, 549420, 97.34, 0.76, 0, 3992 +leanN, sn-0.6.0-memcpy-checks, 24.79, 534008, 95.51, 0.88, 0, 12152 +leanN, je, 25.97, 501392, 97.88, 1.12, 4, 154360 +leanN, scudo, 34.44, 587232, 125.79, 1.89, 1, 484336 +leanN, smi, 29.18, 626364, 109.19, 1.87, 0, 255797 +sed, mi, 01.74, 326500, 1.67, 0.07, 0, 402 +sed, sn-0.6.0-full-checks, 01.74, 349584, 1.65, 0.09, 0, 1713 +sed, sn-0.6.0, 01.73, 342852, 1.65, 0.08, 0, 1476 +sed, sn-0.5.3, 01.73, 310336, 1.67, 0.06, 0, 677 +sed, sn-0.6.0-memcpy-checks, 01.73, 342800, 1.65, 0.08, 0, 1477 +sed, je, 01.72, 301980, 1.65, 0.07, 0, 3141 +sed, scudo, 01.82, 245416, 1.72, 0.10, 0, 60795 +sed, smi, 01.81, 317056, 1.69, 0.11, 0, 34329 diff --git a/docs/security/data/doublefreeprotection.gif b/docs/security/data/doublefreeprotection.gif new file mode 100644 index 000000000..1ab646a99 Binary files /dev/null and b/docs/security/data/doublefreeprotection.gif differ diff --git a/docs/security/data/memcpy_perf.png b/docs/security/data/memcpy_perf.png new file mode 100644 index 000000000..cba91a725 Binary files /dev/null and b/docs/security/data/memcpy_perf.png differ diff --git a/docs/security/data/perfgraph-memcpy-only.png b/docs/security/data/perfgraph-memcpy-only.png new file mode 100644 index 000000000..719f721a4 Binary files /dev/null and b/docs/security/data/perfgraph-memcpy-only.png differ diff --git a/docs/security/data/perfgraph.png b/docs/security/data/perfgraph.png new file mode 100644 index 000000000..da8f7cce3 Binary files /dev/null and b/docs/security/data/perfgraph.png differ diff --git a/docs/security/data/res_je.csv b/docs/security/data/res_je.csv new file mode 100644 index 000000000..08ae80edf --- /dev/null +++ b/docs/security/data/res_je.csv @@ -0,0 +1,160 @@ +barnes, je, 02.90, 76652, 2.87, 0.02, 0, 2550 +espresso, je, 05.54, 12184, 5.50, 0.03, 0, 322 +z3, je, 01.20, 65792, 1.18, 0.02, 0, 2770 +gs, je, 01.20, 53216, 1.16, 0.04, 0, 3724 +redis, je, 5.060, 36836, 2.21, 0.32, 1, 6765 +cfrac, je, 06.64, 10056, 6.64, 0.00, 0, 273 +leanN, je, 26.06, 503092, 97.26, 1.13, 0, 171614 +sed, je, 01.74, 300292, 1.65, 0.08, 0, 8858 +barnes, je, 02.86, 76748, 2.84, 0.01, 0, 2547 +espresso, je, 05.44, 9956, 5.44, 0.00, 0, 355 +z3, je, 01.17, 66016, 1.15, 0.02, 0, 2773 +gs, je, 01.20, 53824, 1.16, 0.04, 0, 3729 +redis, je, 5.109, 36816, 2.16, 0.41, 0, 6769 +cfrac, je, 06.64, 10072, 6.63, 0.00, 0, 273 +leanN, je, 25.99, 529828, 98.94, 1.02, 0, 106383 +sed, je, 01.73, 295420, 1.64, 0.08, 0, 9180 +barnes, je, 02.85, 76568, 2.83, 0.02, 0, 2545 +espresso, je, 05.44, 9880, 5.41, 0.03, 0, 297 +z3, je, 01.19, 65880, 1.16, 0.03, 0, 2762 +gs, je, 01.19, 53716, 1.16, 0.03, 0, 3723 +redis, je, 5.021, 36832, 2.09, 0.43, 0, 6775 +cfrac, je, 06.72, 10136, 6.72, 0.00, 0, 270 +leanN, je, 26.89, 529516, 103.02, 1.26, 0, 157578 +sed, je, 01.73, 293900, 1.64, 0.08, 0, 8280 +barnes, je, 02.86, 76616, 2.84, 0.01, 0, 2548 +espresso, je, 05.55, 9940, 5.52, 0.02, 0, 388 +z3, je, 01.18, 67784, 1.16, 0.02, 0, 2757 +gs, je, 01.19, 55668, 1.15, 0.03, 0, 3715 +redis, je, 5.026, 36816, 2.20, 0.32, 0, 6771 +cfrac, je, 06.62, 10056, 6.62, 0.00, 0, 271 +leanN, je, 26.95, 514740, 103.52, 1.21, 0, 204418 +sed, je, 01.73, 295488, 1.67, 0.06, 0, 9192 +barnes, je, 02.85, 76636, 2.83, 0.02, 0, 2547 +espresso, je, 05.53, 11876, 5.49, 0.04, 0, 334 +z3, je, 01.17, 65920, 1.16, 0.01, 0, 2771 +gs, je, 01.18, 53792, 1.16, 0.01, 0, 3727 +redis, je, 4.991, 36876, 2.10, 0.40, 0, 6772 +cfrac, je, 06.63, 10056, 6.63, 0.00, 0, 271 +leanN, je, 25.51, 520756, 95.50, 1.06, 0, 164558 +sed, je, 01.73, 295712, 1.65, 0.08, 0, 9184 +barnes, je, 02.86, 76672, 2.84, 0.01, 0, 2550 +espresso, je, 05.43, 9908, 5.39, 0.04, 0, 332 +z3, je, 01.18, 65916, 1.17, 0.01, 0, 2774 +gs, je, 01.18, 53628, 1.16, 0.02, 0, 3712 +redis, je, 5.033, 36940, 2.16, 0.36, 0, 6761 +cfrac, je, 06.67, 9996, 6.66, 0.00, 0, 270 +leanN, je, 26.70, 513676, 103.21, 0.97, 0, 78470 +sed, je, 01.74, 295408, 1.65, 0.09, 0, 9194 +barnes, je, 02.85, 76616, 2.83, 0.01, 0, 2551 +espresso, je, 05.44, 10148, 5.42, 0.02, 0, 307 +z3, je, 01.19, 66028, 1.16, 0.02, 0, 2773 +gs, je, 01.18, 53568, 1.13, 0.05, 0, 3723 +redis, je, 4.989, 36816, 2.17, 0.33, 0, 6756 +cfrac, je, 06.63, 10000, 6.62, 0.00, 0, 270 +leanN, je, 26.63, 489512, 103.36, 1.12, 0, 178461 +sed, je, 01.76, 295512, 1.69, 0.07, 0, 9198 +barnes, je, 02.89, 76676, 2.87, 0.01, 0, 2546 +espresso, je, 05.49, 9904, 5.47, 0.01, 0, 346 +z3, je, 01.19, 65840, 1.16, 0.02, 0, 2772 +gs, je, 01.20, 53220, 1.15, 0.04, 0, 3711 +redis, je, 5.087, 36972, 2.19, 0.36, 0, 6784 +cfrac, je, 06.66, 10104, 6.65, 0.00, 0, 271 +leanN, je, 26.02, 524788, 98.90, 1.07, 0, 140108 +sed, je, 01.75, 294172, 1.65, 0.09, 0, 8283 +barnes, je, 02.85, 76744, 2.85, 0.00, 0, 2549 +espresso, je, 05.45, 9860, 5.45, 0.00, 0, 336 +z3, je, 01.18, 65900, 1.17, 0.01, 0, 2771 +gs, je, 01.20, 53552, 1.17, 0.03, 0, 3733 +redis, je, 5.061, 36840, 2.14, 0.40, 0, 6776 +cfrac, je, 06.64, 10108, 6.64, 0.00, 0, 271 +leanN, je, 27.05, 504264, 103.22, 1.17, 0, 156329 +sed, je, 01.74, 296120, 1.65, 0.09, 0, 8271 +barnes, je, 02.91, 76640, 2.88, 0.02, 0, 2547 +espresso, je, 05.47, 9932, 5.44, 0.02, 0, 355 +z3, je, 01.19, 65772, 1.17, 0.01, 0, 2774 +gs, je, 01.20, 53652, 1.18, 0.02, 0, 3718 +redis, je, 5.188, 36876, 2.28, 0.32, 0, 6771 +cfrac, je, 06.68, 10028, 6.68, 0.00, 0, 271 +leanN, je, 26.47, 507228, 100.33, 0.96, 0, 126259 +sed, je, 01.73, 295408, 1.63, 0.09, 0, 9187 +barnes, je, 02.87, 76584, 2.87, 0.00, 0, 2549 +espresso, je, 05.47, 10276, 5.46, 0.01, 0, 295 +z3, je, 01.18, 65908, 1.17, 0.01, 0, 2770 +gs, je, 01.19, 53308, 1.16, 0.02, 0, 3724 +redis, je, 5.107, 36840, 2.19, 0.37, 0, 6749 +cfrac, je, 06.64, 10028, 6.64, 0.00, 0, 269 +leanN, je, 25.87, 521736, 96.91, 1.09, 0, 178770 +sed, je, 01.74, 293788, 1.67, 0.07, 0, 8269 +barnes, je, 02.88, 76748, 2.86, 0.01, 0, 2547 +espresso, je, 05.47, 9972, 5.45, 0.02, 0, 307 +z3, je, 01.19, 65768, 1.17, 0.01, 0, 2772 +gs, je, 01.20, 53332, 1.17, 0.02, 0, 3721 +redis, je, 5.073, 36820, 2.24, 0.31, 0, 6761 +cfrac, je, 06.65, 10024, 6.65, 0.00, 0, 271 +leanN, je, 25.38, 538536, 95.43, 0.90, 0, 84230 +sed, je, 01.75, 300848, 1.66, 0.08, 0, 7482 +barnes, je, 02.85, 76700, 2.83, 0.01, 0, 2550 +espresso, je, 05.45, 9988, 5.41, 0.03, 0, 280 +z3, je, 01.19, 65748, 1.17, 0.01, 0, 2768 +gs, je, 01.19, 53616, 1.16, 0.03, 0, 3722 +redis, je, 5.081, 36804, 2.16, 0.39, 0, 6746 +cfrac, je, 06.65, 10136, 6.65, 0.00, 0, 270 +leanN, je, 25.86, 541932, 97.58, 0.99, 0, 98387 +sed, je, 01.74, 295660, 1.64, 0.10, 0, 9190 +barnes, je, 02.93, 74592, 2.91, 0.01, 0, 2546 +espresso, je, 05.46, 9816, 5.42, 0.04, 0, 300 +z3, je, 01.20, 69616, 1.19, 0.00, 0, 2198 +gs, je, 01.19, 53280, 1.16, 0.03, 0, 3722 +redis, je, 5.076, 36892, 2.13, 0.41, 0, 6769 +cfrac, je, 06.66, 9992, 6.66, 0.00, 0, 271 +leanN, je, 26.93, 532264, 104.61, 1.12, 0, 99886 +sed, je, 01.74, 301688, 1.65, 0.08, 0, 5053 +barnes, je, 02.93, 74488, 2.90, 0.03, 0, 2544 +espresso, je, 05.50, 9992, 5.46, 0.04, 0, 331 +z3, je, 01.20, 65816, 1.18, 0.02, 0, 2768 +gs, je, 01.21, 53448, 1.19, 0.01, 0, 3717 +redis, je, 5.139, 36888, 2.16, 0.42, 0, 6762 +cfrac, je, 06.66, 10020, 6.66, 0.00, 0, 271 +leanN, je, 25.78, 527208, 96.93, 1.01, 0, 119894 +sed, je, 01.74, 301020, 1.67, 0.06, 0, 6483 +barnes, je, 02.91, 76676, 2.89, 0.01, 0, 2547 +espresso, je, 05.44, 9796, 5.42, 0.02, 0, 317 +z3, je, 01.18, 65736, 1.17, 0.01, 0, 2769 +gs, je, 01.19, 53460, 1.15, 0.03, 0, 3726 +redis, je, 5.193, 36928, 2.21, 0.40, 0, 6766 +cfrac, je, 06.68, 10084, 6.68, 0.00, 0, 273 +leanN, je, 25.71, 531268, 96.34, 1.03, 1, 177553 +sed, je, 01.79, 293892, 1.71, 0.07, 0, 8287 +barnes, je, 02.86, 76616, 2.84, 0.01, 0, 2550 +espresso, je, 05.46, 9956, 5.45, 0.01, 0, 300 +z3, je, 01.19, 70012, 1.17, 0.02, 0, 2781 +gs, je, 01.19, 53260, 1.16, 0.02, 0, 3722 +redis, je, 5.044, 36848, 2.17, 0.36, 0, 6771 +cfrac, je, 06.68, 10080, 6.68, 0.00, 0, 272 +leanN, je, 26.09, 530264, 98.26, 1.12, 0, 153838 +sed, je, 01.73, 301076, 1.62, 0.10, 0, 5991 +barnes, je, 02.87, 76688, 2.84, 0.02, 0, 2551 +espresso, je, 05.48, 9980, 5.45, 0.03, 0, 317 +z3, je, 01.21, 65892, 1.19, 0.01, 0, 2772 +gs, je, 01.21, 53468, 1.18, 0.02, 0, 3719 +redis, je, 5.238, 36860, 2.25, 0.37, 0, 6765 +cfrac, je, 06.69, 10076, 6.68, 0.00, 0, 269 +leanN, je, 26.63, 496704, 101.91, 1.14, 1, 152314 +sed, je, 01.74, 295456, 1.68, 0.06, 0, 9177 +barnes, je, 02.88, 78712, 2.87, 0.00, 0, 2546 +espresso, je, 05.48, 9832, 5.46, 0.01, 0, 345 +z3, je, 01.18, 68604, 1.17, 0.01, 0, 2431 +gs, je, 01.19, 53572, 1.15, 0.04, 0, 3728 +redis, je, 5.052, 38772, 2.20, 0.34, 0, 6761 +cfrac, je, 06.72, 10056, 6.71, 0.00, 0, 271 +leanN, je, 26.16, 515292, 100.02, 0.99, 3, 110034 +sed, je, 01.74, 295444, 1.66, 0.07, 0, 9184 +barnes, je, 02.90, 74568, 2.88, 0.01, 0, 2547 +espresso, je, 05.44, 9864, 5.41, 0.03, 0, 304 +z3, je, 01.18, 65876, 1.16, 0.01, 0, 2782 +gs, je, 01.21, 53748, 1.18, 0.02, 0, 3721 +redis, je, 5.047, 36796, 2.15, 0.39, 0, 6770 +cfrac, je, 06.65, 10052, 6.65, 0.00, 0, 271 +leanN, je, 25.97, 501392, 97.88, 1.12, 4, 154360 +sed, je, 01.72, 301980, 1.65, 0.07, 0, 3141 diff --git a/docs/security/data/res_mi.csv b/docs/security/data/res_mi.csv new file mode 100644 index 000000000..181d2f17b --- /dev/null +++ b/docs/security/data/res_mi.csv @@ -0,0 +1,160 @@ +barnes, mi, 02.90, 66712, 2.88, 0.01, 0, 2461 +espresso, mi, 05.27, 8220, 5.25, 0.02, 0, 174 +z3, mi, 01.19, 71272, 1.18, 0.01, 0, 458 +gs, mi, 01.17, 57476, 1.13, 0.03, 0, 1660 +redis, mi, 4.357, 35112, 1.87, 0.32, 0, 8007 +cfrac, mi, 06.37, 4508, 6.37, 0.00, 0, 184 +leanN, mi, 25.62, 591256, 96.05, 1.07, 0, 200920 +sed, mi, 01.74, 324400, 1.66, 0.07, 0, 402 +barnes, mi, 02.85, 66836, 2.84, 0.01, 0, 2464 +espresso, mi, 05.19, 8316, 5.16, 0.03, 0, 174 +z3, mi, 01.20, 71076, 1.20, 0.00, 0, 455 +gs, mi, 01.16, 57396, 1.12, 0.03, 0, 1657 +redis, mi, 4.395, 35208, 1.88, 0.33, 0, 8001 +cfrac, mi, 06.33, 4460, 6.33, 0.00, 0, 188 +leanN, mi, 25.83, 587052, 98.11, 1.28, 0, 163240 +sed, mi, 01.74, 326500, 1.65, 0.08, 0, 401 +barnes, mi, 02.88, 66932, 2.86, 0.02, 0, 2462 +espresso, mi, 05.17, 8328, 5.15, 0.01, 0, 176 +z3, mi, 01.21, 71316, 1.19, 0.01, 0, 462 +gs, mi, 01.17, 57272, 1.11, 0.05, 0, 1656 +redis, mi, 4.294, 35128, 1.84, 0.31, 0, 8032 +cfrac, mi, 06.33, 4568, 6.32, 0.00, 0, 186 +leanN, mi, 25.89, 588932, 98.21, 1.16, 0, 165165 +sed, mi, 01.73, 330532, 1.65, 0.08, 0, 405 +barnes, mi, 02.85, 66912, 2.84, 0.00, 0, 2466 +espresso, mi, 05.17, 8276, 5.14, 0.02, 0, 177 +z3, mi, 01.19, 71344, 1.16, 0.02, 0, 460 +gs, mi, 01.16, 57488, 1.12, 0.04, 0, 1652 +redis, mi, 4.675, 35124, 1.81, 0.54, 0, 7996 +cfrac, mi, 06.32, 4460, 6.31, 0.00, 0, 179 +leanN, mi, 26.06, 592944, 99.54, 1.21, 0, 241980 +sed, mi, 01.72, 326452, 1.63, 0.09, 0, 404 +barnes, mi, 02.85, 66800, 2.84, 0.01, 0, 2464 +espresso, mi, 05.16, 8228, 5.13, 0.02, 0, 176 +z3, mi, 01.18, 71260, 1.16, 0.02, 0, 458 +gs, mi, 01.15, 57376, 1.12, 0.03, 0, 1655 +redis, mi, 4.314, 35176, 1.85, 0.31, 0, 8029 +cfrac, mi, 06.52, 4588, 6.52, 0.00, 0, 185 +leanN, mi, 25.77, 579000, 97.39, 1.21, 0, 226057 +sed, mi, 01.73, 326428, 1.67, 0.06, 0, 403 +barnes, mi, 02.84, 66972, 2.83, 0.01, 0, 2465 +espresso, mi, 05.19, 8220, 5.17, 0.02, 0, 172 +z3, mi, 01.18, 71236, 1.17, 0.01, 0, 458 +gs, mi, 01.17, 57300, 1.15, 0.01, 0, 1658 +redis, mi, 4.288, 35248, 1.74, 0.42, 0, 8036 +cfrac, mi, 06.31, 4460, 6.30, 0.00, 0, 180 +leanN, mi, 26.23, 583088, 100.11, 1.16, 2, 161724 +sed, mi, 01.75, 324396, 1.69, 0.05, 0, 402 +barnes, mi, 02.90, 66824, 2.87, 0.02, 0, 2463 +espresso, mi, 05.17, 8224, 5.15, 0.01, 0, 176 +z3, mi, 01.18, 71336, 1.16, 0.02, 0, 456 +gs, mi, 01.15, 57368, 1.13, 0.02, 0, 1657 +redis, mi, 4.308, 35204, 1.77, 0.39, 0, 8019 +cfrac, mi, 06.29, 4480, 6.29, 0.00, 0, 183 +leanN, mi, 26.14, 585036, 100.89, 1.35, 0, 185556 +sed, mi, 01.75, 326444, 1.66, 0.08, 0, 398 +barnes, mi, 02.88, 66716, 2.86, 0.02, 0, 2461 +espresso, mi, 05.20, 8260, 5.18, 0.01, 0, 174 +z3, mi, 01.19, 71380, 1.17, 0.02, 0, 459 +gs, mi, 01.16, 57188, 1.14, 0.02, 0, 1655 +redis, mi, 4.340, 35184, 1.79, 0.39, 0, 8010 +cfrac, mi, 06.33, 4496, 6.33, 0.00, 0, 186 +leanN, mi, 26.72, 595404, 103.20, 1.25, 3, 232568 +sed, mi, 01.75, 326400, 1.67, 0.07, 0, 404 +barnes, mi, 02.86, 66764, 2.85, 0.01, 0, 2461 +espresso, mi, 05.19, 8308, 5.17, 0.02, 0, 176 +z3, mi, 01.20, 71272, 1.18, 0.01, 0, 456 +gs, mi, 01.17, 57512, 1.15, 0.01, 0, 1655 +redis, mi, 4.380, 35048, 1.87, 0.33, 0, 8004 +cfrac, mi, 06.36, 4496, 6.35, 0.00, 0, 183 +leanN, mi, 25.91, 597148, 98.60, 1.04, 0, 190255 +sed, mi, 01.74, 326504, 1.66, 0.08, 0, 403 +barnes, mi, 02.86, 66808, 2.84, 0.01, 0, 2465 +espresso, mi, 05.19, 8316, 5.15, 0.03, 0, 172 +z3, mi, 01.20, 71152, 1.18, 0.01, 0, 456 +gs, mi, 01.17, 57516, 1.17, 0.00, 0, 1654 +redis, mi, 4.314, 35128, 1.80, 0.36, 0, 8009 +cfrac, mi, 06.39, 4496, 6.38, 0.00, 0, 182 +leanN, mi, 25.73, 599228, 96.25, 1.11, 0, 222805 +sed, mi, 01.74, 330548, 1.67, 0.07, 0, 406 +barnes, mi, 02.85, 66840, 2.83, 0.01, 0, 2463 +espresso, mi, 05.24, 8276, 5.20, 0.03, 0, 177 +z3, mi, 01.20, 71076, 1.18, 0.01, 0, 453 +gs, mi, 01.16, 57492, 1.13, 0.02, 0, 1655 +redis, mi, 4.314, 35236, 1.84, 0.33, 0, 8029 +cfrac, mi, 06.34, 4460, 6.34, 0.00, 0, 184 +leanN, mi, 25.88, 585348, 97.10, 1.24, 4, 281872 +sed, mi, 01.74, 324444, 1.63, 0.11, 0, 402 +barnes, mi, 02.90, 66716, 2.88, 0.01, 0, 2461 +espresso, mi, 05.18, 8312, 5.15, 0.03, 0, 174 +z3, mi, 01.20, 71316, 1.19, 0.00, 0, 460 +gs, mi, 01.18, 57324, 1.14, 0.03, 0, 1650 +redis, mi, 4.669, 35204, 1.98, 0.37, 0, 7916 +cfrac, mi, 06.36, 4408, 6.36, 0.00, 0, 181 +leanN, mi, 25.42, 592852, 95.62, 1.14, 0, 239072 +sed, mi, 01.73, 326448, 1.65, 0.08, 0, 404 +barnes, mi, 02.87, 66904, 2.84, 0.03, 0, 2463 +espresso, mi, 05.20, 8316, 5.16, 0.04, 0, 175 +z3, mi, 01.21, 71204, 1.19, 0.02, 0, 461 +gs, mi, 01.17, 57032, 1.13, 0.03, 0, 1650 +redis, mi, 4.366, 35140, 1.87, 0.32, 0, 7994 +cfrac, mi, 06.32, 4572, 6.32, 0.00, 0, 183 +leanN, mi, 25.47, 597420, 94.71, 1.27, 0, 246037 +sed, mi, 01.74, 326364, 1.66, 0.07, 0, 401 +barnes, mi, 02.93, 66764, 2.91, 0.01, 0, 2464 +espresso, mi, 05.25, 8276, 5.23, 0.02, 0, 177 +z3, mi, 01.19, 71124, 1.16, 0.02, 0, 458 +gs, mi, 01.16, 57296, 1.15, 0.01, 0, 1655 +redis, mi, 4.340, 35180, 1.79, 0.38, 0, 8014 +cfrac, mi, 06.32, 4596, 6.32, 0.00, 0, 183 +leanN, mi, 25.58, 577132, 96.07, 1.15, 0, 224607 +sed, mi, 01.74, 326444, 1.65, 0.09, 0, 401 +barnes, mi, 02.84, 66772, 2.83, 0.01, 0, 2465 +espresso, mi, 05.25, 8288, 5.23, 0.02, 0, 176 +z3, mi, 01.21, 71384, 1.19, 0.01, 0, 461 +gs, mi, 01.18, 57240, 1.14, 0.03, 0, 1653 +redis, mi, 4.328, 35216, 1.77, 0.40, 0, 8017 +cfrac, mi, 06.35, 4492, 6.35, 0.00, 0, 184 +leanN, mi, 25.94, 591264, 98.57, 1.13, 0, 184057 +sed, mi, 01.73, 330580, 1.65, 0.07, 0, 406 +barnes, mi, 02.89, 66772, 2.87, 0.02, 0, 2459 +espresso, mi, 05.18, 8196, 5.14, 0.03, 0, 172 +z3, mi, 01.20, 71148, 1.18, 0.01, 0, 457 +gs, mi, 01.17, 57300, 1.13, 0.03, 0, 1657 +redis, mi, 4.329, 35124, 1.79, 0.38, 0, 8018 +cfrac, mi, 06.33, 4496, 6.33, 0.00, 0, 182 +leanN, mi, 25.94, 578956, 98.00, 1.17, 0, 216122 +sed, mi, 01.77, 326436, 1.71, 0.05, 0, 402 +barnes, mi, 02.85, 66972, 2.82, 0.03, 0, 2465 +espresso, mi, 05.17, 8216, 5.15, 0.02, 0, 177 +z3, mi, 01.19, 71072, 1.18, 0.01, 0, 454 +gs, mi, 01.17, 57476, 1.13, 0.03, 0, 1656 +redis, mi, 4.335, 35180, 1.80, 0.38, 0, 8010 +cfrac, mi, 06.33, 4548, 6.32, 0.00, 0, 185 +leanN, mi, 26.35, 601364, 100.71, 1.07, 0, 172904 +sed, mi, 01.74, 326300, 1.65, 0.08, 0, 398 +barnes, mi, 02.88, 66928, 2.85, 0.02, 0, 2466 +espresso, mi, 05.18, 8220, 5.15, 0.02, 0, 173 +z3, mi, 01.21, 71192, 1.19, 0.01, 0, 459 +gs, mi, 01.18, 57096, 1.15, 0.03, 0, 1652 +redis, mi, 4.357, 35104, 1.80, 0.39, 0, 7991 +cfrac, mi, 06.35, 4408, 6.35, 0.00, 0, 181 +leanN, mi, 26.06, 591428, 99.05, 1.13, 3, 226227 +sed, mi, 01.76, 326452, 1.68, 0.07, 0, 401 +barnes, mi, 02.86, 66820, 2.84, 0.02, 0, 2464 +espresso, mi, 05.19, 8196, 5.17, 0.01, 0, 169 +z3, mi, 01.20, 71152, 1.18, 0.02, 0, 457 +gs, mi, 01.17, 57540, 1.15, 0.02, 0, 1657 +redis, mi, 4.489, 35192, 1.81, 0.44, 0, 7969 +cfrac, mi, 06.34, 4596, 6.34, 0.00, 0, 185 +leanN, mi, 26.55, 593452, 101.72, 1.13, 0, 230978 +sed, mi, 01.75, 330536, 1.64, 0.11, 0, 404 +barnes, mi, 02.86, 66908, 2.83, 0.01, 0, 2465 +espresso, mi, 05.19, 8288, 5.15, 0.04, 0, 177 +z3, mi, 01.18, 71348, 1.15, 0.03, 0, 459 +gs, mi, 01.17, 57476, 1.14, 0.02, 0, 1655 +redis, mi, 4.386, 35048, 1.86, 0.34, 0, 7998 +cfrac, mi, 06.34, 4460, 6.34, 0.00, 0, 182 +leanN, mi, 27.12, 578536, 105.64, 1.15, 2, 163365 +sed, mi, 01.74, 326500, 1.67, 0.07, 0, 402 diff --git a/docs/security/data/res_scudo.csv b/docs/security/data/res_scudo.csv new file mode 100644 index 000000000..d2efb91fc --- /dev/null +++ b/docs/security/data/res_scudo.csv @@ -0,0 +1,160 @@ +barnes, scudo, 02.99, 61892, 2.94, 0.04, 0, 4270 +espresso, scudo, 06.07, 4940, 6.05, 0.02, 0, 645 +z3, scudo, 01.27, 56116, 1.24, 0.03, 0, 8681 +gs, scudo, 01.21, 41248, 1.16, 0.05, 0, 17117 +redis, scudo, 5.252, 37900, 2.22, 0.42, 0, 9863 +cfrac, scudo, 08.45, 4684, 8.45, 0.00, 0, 616 +leanN, scudo, 33.01, 593072, 120.15, 1.81, 0, 598360 +sed, scudo, 01.81, 245512, 1.69, 0.11, 0, 60801 +barnes, scudo, 02.92, 61480, 2.90, 0.02, 0, 4252 +espresso, scudo, 06.04, 4900, 6.01, 0.02, 0, 642 +z3, scudo, 01.26, 56052, 1.23, 0.02, 0, 8668 +gs, scudo, 01.21, 41532, 1.17, 0.03, 0, 17160 +redis, scudo, 5.209, 37820, 2.27, 0.34, 0, 9874 +cfrac, scudo, 08.53, 4616, 8.53, 0.00, 0, 615 +leanN, scudo, 32.58, 602392, 117.53, 1.81, 0, 532564 +sed, scudo, 01.82, 245464, 1.71, 0.10, 0, 60799 +barnes, scudo, 02.94, 61888, 2.94, 0.00, 0, 4270 +espresso, scudo, 06.04, 5000, 6.02, 0.01, 0, 648 +z3, scudo, 01.26, 56160, 1.23, 0.02, 0, 8672 +gs, scudo, 01.20, 41456, 1.16, 0.04, 0, 17153 +redis, scudo, 5.238, 37884, 2.18, 0.45, 0, 9872 +cfrac, scudo, 08.44, 4696, 8.44, 0.00, 0, 610 +leanN, scudo, 33.36, 589768, 121.28, 2.00, 0, 601688 +sed, scudo, 01.81, 245412, 1.70, 0.10, 0, 60796 +barnes, scudo, 02.92, 61692, 2.91, 0.01, 0, 4279 +espresso, scudo, 06.08, 4876, 6.03, 0.05, 0, 635 +z3, scudo, 01.26, 56312, 1.24, 0.01, 0, 8163 +gs, scudo, 01.20, 41428, 1.15, 0.05, 0, 17159 +redis, scudo, 5.156, 38016, 2.25, 0.34, 0, 9884 +cfrac, scudo, 08.40, 4592, 8.39, 0.00, 0, 611 +leanN, scudo, 32.10, 628432, 114.89, 1.64, 0, 568793 +sed, scudo, 01.81, 245284, 1.67, 0.13, 0, 60797 +barnes, scudo, 02.89, 64112, 2.86, 0.02, 0, 3516 +espresso, scudo, 06.10, 5000, 6.07, 0.03, 0, 648 +z3, scudo, 01.27, 55960, 1.24, 0.02, 0, 8678 +gs, scudo, 01.20, 41180, 1.17, 0.03, 0, 17107 +redis, scudo, 5.188, 38000, 2.16, 0.45, 0, 9878 +cfrac, scudo, 08.39, 4564, 8.39, 0.00, 0, 608 +leanN, scudo, 32.49, 589876, 116.58, 1.64, 0, 607090 +sed, scudo, 01.82, 245648, 1.69, 0.11, 0, 60799 +barnes, scudo, 02.90, 64096, 2.88, 0.01, 0, 3531 +espresso, scudo, 06.03, 4884, 6.00, 0.02, 0, 642 +z3, scudo, 01.25, 56064, 1.23, 0.02, 0, 8664 +gs, scudo, 01.20, 41440, 1.14, 0.06, 0, 17166 +redis, scudo, 5.361, 37964, 2.31, 0.38, 0, 9853 +cfrac, scudo, 08.64, 4756, 8.63, 0.00, 0, 613 +leanN, scudo, 33.24, 625584, 121.33, 1.88, 0, 656737 +sed, scudo, 01.81, 245432, 1.69, 0.12, 0, 60798 +barnes, scudo, 02.92, 64456, 2.89, 0.03, 0, 3607 +espresso, scudo, 06.05, 4992, 6.03, 0.02, 0, 642 +z3, scudo, 01.24, 56172, 1.22, 0.01, 0, 8145 +gs, scudo, 01.20, 41540, 1.16, 0.03, 0, 17162 +redis, scudo, 5.182, 37880, 2.22, 0.39, 0, 9882 +cfrac, scudo, 08.40, 4564, 8.39, 0.00, 0, 610 +leanN, scudo, 32.24, 586964, 116.42, 1.71, 4, 656609 +sed, scudo, 01.83, 245376, 1.73, 0.09, 0, 60796 +barnes, scudo, 02.91, 61900, 2.89, 0.01, 0, 3644 +espresso, scudo, 06.05, 4884, 6.02, 0.02, 0, 632 +z3, scudo, 01.26, 56180, 1.25, 0.01, 0, 8670 +gs, scudo, 01.21, 41180, 1.15, 0.06, 0, 17153 +redis, scudo, 5.234, 37924, 2.22, 0.41, 0, 9880 +cfrac, scudo, 08.43, 4656, 8.42, 0.00, 0, 611 +leanN, scudo, 32.32, 617108, 115.04, 1.95, 3, 560579 +sed, scudo, 01.81, 245416, 1.69, 0.11, 0, 60794 +barnes, scudo, 02.90, 63144, 2.88, 0.02, 0, 3502 +espresso, scudo, 06.02, 4804, 6.02, 0.00, 0, 640 +z3, scudo, 01.27, 56036, 1.26, 0.01, 0, 8678 +gs, scudo, 01.22, 41524, 1.15, 0.06, 0, 17151 +redis, scudo, 5.222, 37976, 2.24, 0.39, 0, 9876 +cfrac, scudo, 08.56, 4644, 8.56, 0.00, 0, 610 +leanN, scudo, 32.94, 612364, 117.66, 2.10, 5, 805633 +sed, scudo, 01.82, 245408, 1.70, 0.12, 0, 60794 +barnes, scudo, 02.92, 63716, 2.91, 0.00, 0, 3472 +espresso, scudo, 06.13, 4804, 6.12, 0.01, 0, 633 +z3, scudo, 01.26, 56280, 1.24, 0.01, 0, 8661 +gs, scudo, 01.21, 41432, 1.18, 0.02, 0, 17155 +redis, scudo, 5.231, 37984, 2.27, 0.36, 0, 9869 +cfrac, scudo, 08.43, 4624, 8.43, 0.00, 0, 611 +leanN, scudo, 33.81, 608676, 124.38, 1.69, 0, 497025 +sed, scudo, 01.81, 245452, 1.70, 0.10, 0, 60797 +barnes, scudo, 02.94, 61456, 2.92, 0.02, 0, 4264 +espresso, scudo, 06.03, 4980, 6.00, 0.03, 0, 639 +z3, scudo, 01.25, 56216, 1.23, 0.02, 0, 8662 +gs, scudo, 01.20, 41576, 1.13, 0.06, 0, 17165 +redis, scudo, 5.297, 37972, 2.29, 0.37, 0, 9852 +cfrac, scudo, 08.45, 4648, 8.45, 0.00, 0, 611 +leanN, scudo, 32.13, 589304, 114.72, 1.66, 0, 541686 +sed, scudo, 01.83, 245484, 1.70, 0.12, 0, 60795 +barnes, scudo, 02.95, 61908, 2.93, 0.01, 0, 4305 +espresso, scudo, 06.04, 4980, 5.99, 0.04, 0, 644 +z3, scudo, 01.25, 56092, 1.23, 0.02, 0, 8666 +gs, scudo, 01.21, 41008, 1.17, 0.04, 0, 17108 +redis, scudo, 5.271, 37924, 2.22, 0.42, 0, 9862 +cfrac, scudo, 08.45, 4568, 8.44, 0.00, 0, 610 +leanN, scudo, 32.97, 591284, 118.01, 1.87, 0, 721248 +sed, scudo, 01.80, 245376, 1.68, 0.11, 0, 60799 +barnes, scudo, 02.95, 61764, 2.93, 0.01, 0, 4295 +espresso, scudo, 06.04, 4780, 6.01, 0.03, 0, 641 +z3, scudo, 01.25, 56248, 1.23, 0.01, 0, 8657 +gs, scudo, 01.20, 41332, 1.13, 0.07, 0, 17114 +redis, scudo, 5.268, 38072, 2.26, 0.38, 0, 9861 +cfrac, scudo, 08.42, 4664, 8.41, 0.00, 0, 613 +leanN, scudo, 33.37, 615476, 121.70, 1.84, 0, 548507 +sed, scudo, 01.81, 245396, 1.70, 0.11, 0, 60801 +barnes, scudo, 02.93, 62684, 2.90, 0.02, 0, 3959 +espresso, scudo, 06.02, 4792, 5.99, 0.02, 0, 640 +z3, scudo, 01.24, 56236, 1.21, 0.02, 0, 8669 +gs, scudo, 01.21, 41568, 1.16, 0.05, 0, 17154 +redis, scudo, 5.196, 37996, 2.24, 0.37, 0, 9886 +cfrac, scudo, 08.43, 4656, 8.43, 0.00, 0, 614 +leanN, scudo, 32.45, 594880, 117.90, 1.73, 0, 569976 +sed, scudo, 01.81, 245436, 1.71, 0.09, 0, 60800 +barnes, scudo, 02.92, 65468, 2.89, 0.02, 0, 3574 +espresso, scudo, 06.08, 4932, 6.07, 0.01, 0, 640 +z3, scudo, 01.27, 56472, 1.25, 0.01, 0, 8671 +gs, scudo, 01.23, 41300, 1.18, 0.04, 0, 17115 +redis, scudo, 5.432, 37996, 2.35, 0.38, 0, 9824 +cfrac, scudo, 10.05, 4680, 10.05, 0.00, 0, 612 +leanN, scudo, 34.30, 593504, 123.18, 2.01, 0, 558728 +sed, scudo, 01.82, 245464, 1.68, 0.13, 0, 60798 +barnes, scudo, 02.90, 62304, 2.87, 0.02, 0, 3184 +espresso, scudo, 06.07, 4884, 6.05, 0.02, 0, 642 +z3, scudo, 01.26, 56144, 1.23, 0.02, 0, 8153 +gs, scudo, 01.21, 41428, 1.18, 0.03, 0, 17120 +redis, scudo, 5.236, 37972, 2.31, 0.32, 0, 9876 +cfrac, scudo, 08.43, 4672, 8.43, 0.00, 0, 615 +leanN, scudo, 32.33, 597856, 116.09, 1.80, 0, 515053 +sed, scudo, 01.83, 245420, 1.71, 0.12, 0, 60795 +barnes, scudo, 02.93, 61688, 2.90, 0.02, 0, 4275 +espresso, scudo, 06.02, 4996, 5.99, 0.03, 0, 642 +z3, scudo, 01.26, 56208, 1.21, 0.04, 0, 8662 +gs, scudo, 01.21, 41520, 1.17, 0.03, 0, 17159 +redis, scudo, 5.209, 38000, 2.16, 0.45, 0, 9888 +cfrac, scudo, 08.44, 4644, 8.43, 0.00, 0, 610 +leanN, scudo, 32.48, 621548, 116.91, 1.89, 2, 604914 +sed, scudo, 01.81, 245472, 1.69, 0.12, 0, 60797 +barnes, scudo, 02.93, 63700, 2.92, 0.00, 0, 3472 +espresso, scudo, 06.13, 4980, 6.10, 0.02, 0, 604 +z3, scudo, 01.26, 56160, 1.23, 0.03, 0, 8667 +gs, scudo, 01.23, 41448, 1.15, 0.07, 0, 17161 +redis, scudo, 5.234, 37960, 2.21, 0.42, 0, 9891 +cfrac, scudo, 08.43, 4708, 8.43, 0.00, 0, 610 +leanN, scudo, 32.46, 581268, 116.25, 1.70, 0, 531142 +sed, scudo, 01.81, 245652, 1.68, 0.13, 0, 60802 +barnes, scudo, 02.95, 61940, 2.94, 0.01, 0, 4267 +espresso, scudo, 06.03, 4896, 6.01, 0.02, 0, 647 +z3, scudo, 01.26, 56284, 1.25, 0.01, 0, 8161 +gs, scudo, 01.21, 41320, 1.15, 0.05, 0, 17111 +redis, scudo, 5.244, 38000, 2.19, 0.44, 0, 9869 +cfrac, scudo, 08.45, 4568, 8.44, 0.00, 0, 615 +leanN, scudo, 31.78, 599964, 114.04, 1.60, 4, 654494 +sed, scudo, 01.80, 245660, 1.70, 0.10, 0, 60801 +barnes, scudo, 02.86, 63084, 2.84, 0.01, 0, 3492 +espresso, scudo, 06.03, 4796, 6.01, 0.02, 0, 642 +z3, scudo, 01.25, 56096, 1.22, 0.02, 0, 8667 +gs, scudo, 01.20, 41096, 1.16, 0.04, 0, 17116 +redis, scudo, 5.271, 38052, 2.23, 0.41, 0, 9871 +cfrac, scudo, 08.43, 4660, 8.42, 0.00, 0, 613 +leanN, scudo, 34.44, 587232, 125.79, 1.89, 1, 484336 +sed, scudo, 01.82, 245416, 1.72, 0.10, 0, 60795 diff --git a/docs/security/data/res_smi.csv b/docs/security/data/res_smi.csv new file mode 100644 index 000000000..2edc83f75 --- /dev/null +++ b/docs/security/data/res_smi.csv @@ -0,0 +1,160 @@ +barnes, smi, 03.02, 66728, 3.00, 0.01, 0, 2657 +espresso, smi, 05.50, 6620, 5.48, 0.02, 0, 288 +z3, smi, 01.26, 66104, 1.23, 0.02, 0, 3836 +gs, smi, 01.20, 56372, 1.16, 0.04, 0, 4550 +redis, smi, 4.622, 35372, 1.97, 0.35, 0, 8035 +cfrac, smi, 07.09, 4464, 7.09, 0.00, 0, 183 +leanN, smi, 27.04, 624072, 96.26, 1.92, 0, 354731 +sed, smi, 01.82, 317312, 1.71, 0.11, 0, 34437 +barnes, smi, 02.91, 66940, 2.90, 0.01, 0, 2670 +espresso, smi, 05.48, 6740, 5.47, 0.01, 0, 288 +z3, smi, 01.22, 65448, 1.20, 0.01, 0, 3699 +gs, smi, 01.19, 57232, 1.14, 0.05, 0, 4648 +redis, smi, 4.631, 35436, 1.95, 0.38, 0, 8036 +cfrac, smi, 07.14, 4580, 7.14, 0.00, 0, 180 +leanN, smi, 27.65, 631068, 99.97, 1.98, 0, 422694 +sed, smi, 01.81, 315944, 1.67, 0.13, 0, 34063 +barnes, smi, 02.94, 66856, 2.92, 0.02, 0, 2657 +espresso, smi, 05.48, 6640, 5.46, 0.02, 0, 288 +z3, smi, 01.23, 65868, 1.21, 0.02, 0, 3826 +gs, smi, 01.20, 57060, 1.16, 0.03, 0, 4647 +redis, smi, 4.599, 35428, 1.93, 0.39, 0, 8050 +cfrac, smi, 07.03, 4572, 7.02, 0.00, 0, 183 +leanN, smi, 26.96, 620428, 96.80, 1.63, 0, 326027 +sed, smi, 01.82, 316752, 1.69, 0.12, 0, 34265 +barnes, smi, 02.95, 66820, 2.93, 0.02, 0, 2655 +espresso, smi, 05.49, 6684, 5.45, 0.03, 0, 285 +z3, smi, 01.23, 66204, 1.21, 0.01, 0, 3834 +gs, smi, 01.20, 56748, 1.18, 0.01, 0, 4635 +redis, smi, 4.564, 35472, 1.90, 0.40, 0, 8051 +cfrac, smi, 07.04, 4596, 7.03, 0.00, 0, 182 +leanN, smi, 27.59, 635276, 100.36, 1.84, 2, 343670 +sed, smi, 01.80, 316116, 1.69, 0.11, 0, 34107 +barnes, smi, 02.95, 66948, 2.93, 0.01, 0, 2662 +espresso, smi, 05.48, 6748, 5.46, 0.02, 0, 289 +z3, smi, 01.23, 65724, 1.21, 0.02, 0, 3728 +gs, smi, 01.18, 56612, 1.15, 0.03, 0, 4610 +redis, smi, 4.662, 35452, 2.03, 0.31, 0, 8040 +cfrac, smi, 07.07, 4604, 7.07, 0.00, 0, 182 +leanN, smi, 27.65, 627320, 100.04, 2.02, 0, 363186 +sed, smi, 01.82, 316576, 1.71, 0.11, 0, 34234 +barnes, smi, 02.92, 66776, 2.90, 0.01, 0, 2661 +espresso, smi, 05.48, 6680, 5.44, 0.03, 0, 287 +z3, smi, 01.23, 66032, 1.21, 0.01, 0, 3809 +gs, smi, 01.18, 57068, 1.15, 0.03, 0, 4675 +redis, smi, 4.665, 35436, 1.91, 0.43, 0, 8032 +cfrac, smi, 07.18, 4572, 7.18, 0.00, 0, 184 +leanN, smi, 27.25, 617944, 97.77, 2.02, 0, 449513 +sed, smi, 01.80, 316564, 1.69, 0.11, 0, 34217 +barnes, smi, 02.91, 66984, 2.89, 0.02, 0, 2659 +espresso, smi, 05.45, 6684, 5.43, 0.02, 0, 289 +z3, smi, 01.23, 65560, 1.20, 0.03, 0, 3753 +gs, smi, 01.19, 57140, 1.16, 0.02, 0, 4643 +redis, smi, 4.593, 35444, 1.88, 0.43, 0, 8057 +cfrac, smi, 07.00, 4604, 7.00, 0.00, 0, 183 +leanN, smi, 27.36, 612228, 98.39, 1.86, 3, 411598 +sed, smi, 01.81, 315884, 1.69, 0.11, 0, 34061 +barnes, smi, 02.95, 66896, 2.94, 0.00, 0, 2657 +espresso, smi, 05.50, 6684, 5.46, 0.03, 0, 284 +z3, smi, 01.25, 65808, 1.23, 0.01, 0, 3798 +gs, smi, 01.20, 57036, 1.16, 0.04, 0, 4612 +redis, smi, 4.690, 35696, 1.92, 0.44, 0, 8085 +cfrac, smi, 07.06, 4636, 7.06, 0.00, 0, 183 +leanN, smi, 27.69, 626448, 100.56, 2.05, 0, 453730 +sed, smi, 01.82, 312476, 1.68, 0.13, 0, 33703 +barnes, smi, 02.94, 66808, 2.90, 0.03, 0, 2660 +espresso, smi, 05.48, 6756, 5.45, 0.03, 0, 290 +z3, smi, 01.23, 65884, 1.20, 0.02, 0, 3786 +gs, smi, 01.22, 56648, 1.18, 0.03, 0, 4597 +redis, smi, 4.618, 35384, 1.88, 0.44, 0, 8039 +cfrac, smi, 07.06, 4496, 7.06, 0.00, 0, 184 +leanN, smi, 27.56, 625856, 99.23, 1.87, 1, 381154 +sed, smi, 01.82, 312548, 1.70, 0.11, 0, 33757 +barnes, smi, 02.93, 66852, 2.89, 0.03, 0, 2658 +espresso, smi, 05.50, 6692, 5.46, 0.04, 0, 288 +z3, smi, 01.24, 65928, 1.22, 0.02, 0, 3817 +gs, smi, 01.19, 56816, 1.12, 0.06, 0, 4649 +redis, smi, 4.599, 35488, 1.92, 0.39, 0, 8062 +cfrac, smi, 07.06, 4468, 7.06, 0.00, 0, 181 +leanN, smi, 27.40, 612612, 100.31, 1.90, 0, 403445 +sed, smi, 01.81, 316720, 1.69, 0.11, 0, 34280 +barnes, smi, 02.94, 66808, 2.92, 0.02, 0, 2657 +espresso, smi, 05.49, 6728, 5.47, 0.02, 0, 292 +z3, smi, 01.23, 66256, 1.21, 0.02, 0, 3864 +gs, smi, 01.20, 57036, 1.17, 0.03, 0, 4615 +redis, smi, 4.717, 35384, 1.97, 0.40, 0, 8022 +cfrac, smi, 07.02, 4488, 7.02, 0.00, 0, 177 +leanN, smi, 27.63, 611432, 101.04, 1.79, 0, 402887 +sed, smi, 01.81, 316640, 1.71, 0.09, 0, 34248 +barnes, smi, 02.94, 66940, 2.91, 0.02, 0, 2660 +espresso, smi, 05.49, 6640, 5.47, 0.02, 0, 286 +z3, smi, 01.24, 65416, 1.21, 0.02, 0, 3713 +gs, smi, 01.19, 57232, 1.16, 0.03, 0, 4622 +redis, smi, 4.612, 35620, 1.94, 0.38, 0, 8048 +cfrac, smi, 07.07, 4580, 7.07, 0.00, 0, 180 +leanN, smi, 28.42, 620424, 105.04, 1.68, 0, 232111 +sed, smi, 01.81, 316680, 1.66, 0.14, 0, 34248 +barnes, smi, 02.98, 66920, 2.95, 0.02, 0, 2657 +espresso, smi, 05.53, 6620, 5.52, 0.00, 0, 290 +z3, smi, 01.23, 65568, 1.19, 0.03, 0, 3746 +gs, smi, 01.19, 56812, 1.14, 0.05, 0, 4576 +redis, smi, 4.587, 35500, 1.88, 0.43, 0, 8049 +cfrac, smi, 07.06, 4640, 7.05, 0.00, 0, 182 +leanN, smi, 27.35, 620144, 98.41, 1.95, 1, 396107 +sed, smi, 01.80, 316024, 1.65, 0.14, 0, 34081 +barnes, smi, 03.04, 66940, 3.02, 0.02, 0, 2661 +espresso, smi, 05.50, 6648, 5.46, 0.03, 0, 287 +z3, smi, 01.23, 66260, 1.20, 0.02, 0, 3863 +gs, smi, 01.18, 57304, 1.14, 0.04, 0, 4728 +redis, smi, 4.613, 35436, 1.88, 0.44, 0, 8042 +cfrac, smi, 07.04, 4640, 7.03, 0.00, 0, 182 +leanN, smi, 27.77, 626188, 101.15, 1.75, 0, 326530 +sed, smi, 01.81, 316808, 1.72, 0.08, 0, 34267 +barnes, smi, 02.98, 66852, 2.97, 0.01, 0, 2657 +espresso, smi, 05.55, 6648, 5.51, 0.04, 0, 287 +z3, smi, 01.26, 65784, 1.24, 0.02, 0, 3787 +gs, smi, 01.19, 57044, 1.14, 0.05, 0, 4617 +redis, smi, 4.675, 35420, 1.96, 0.39, 0, 8044 +cfrac, smi, 07.09, 4600, 7.09, 0.00, 0, 182 +leanN, smi, 27.56, 625864, 101.01, 1.84, 2, 356616 +sed, smi, 01.81, 315672, 1.69, 0.11, 0, 34008 +barnes, smi, 02.94, 66860, 2.92, 0.02, 0, 2656 +espresso, smi, 05.48, 6700, 5.45, 0.02, 0, 287 +z3, smi, 01.23, 65912, 1.21, 0.01, 0, 3802 +gs, smi, 01.19, 57004, 1.14, 0.04, 0, 4609 +redis, smi, 4.599, 35436, 1.85, 0.46, 0, 8052 +cfrac, smi, 07.02, 4504, 7.01, 0.00, 0, 182 +leanN, smi, 27.63, 626168, 101.20, 1.82, 0, 328983 +sed, smi, 01.83, 316888, 1.69, 0.14, 0, 34282 +barnes, smi, 02.94, 66828, 2.92, 0.01, 0, 2659 +espresso, smi, 05.48, 6644, 5.45, 0.02, 0, 287 +z3, smi, 01.24, 65296, 1.22, 0.02, 0, 3679 +gs, smi, 01.19, 56596, 1.17, 0.02, 0, 4579 +redis, smi, 4.618, 35860, 2.01, 0.31, 0, 8127 +cfrac, smi, 07.05, 4572, 7.05, 0.00, 0, 178 +leanN, smi, 26.99, 617668, 97.22, 1.82, 0, 395341 +sed, smi, 01.81, 317340, 1.70, 0.10, 0, 34402 +barnes, smi, 02.95, 66776, 2.93, 0.01, 0, 2660 +espresso, smi, 05.57, 6644, 5.55, 0.02, 0, 285 +z3, smi, 01.25, 65920, 1.22, 0.02, 0, 3847 +gs, smi, 01.21, 57184, 1.18, 0.02, 0, 4672 +redis, smi, 4.669, 35336, 1.94, 0.41, 0, 8028 +cfrac, smi, 07.14, 4596, 7.13, 0.00, 0, 184 +leanN, smi, 27.36, 621904, 99.41, 2.03, 4, 392238 +sed, smi, 01.81, 317072, 1.70, 0.11, 0, 34381 +barnes, smi, 02.98, 66836, 2.96, 0.01, 0, 2659 +espresso, smi, 05.48, 6700, 5.46, 0.02, 0, 286 +z3, smi, 01.23, 65772, 1.21, 0.01, 0, 3727 +gs, smi, 01.22, 56964, 1.17, 0.04, 0, 4612 +redis, smi, 4.599, 35432, 1.87, 0.44, 0, 8057 +cfrac, smi, 07.03, 4584, 7.03, 0.00, 0, 181 +leanN, smi, 27.40, 619364, 99.87, 1.82, 0, 362050 +sed, smi, 01.82, 316496, 1.70, 0.12, 0, 34208 +barnes, smi, 02.94, 66784, 2.92, 0.02, 0, 2661 +espresso, smi, 05.48, 6708, 5.46, 0.02, 0, 291 +z3, smi, 01.24, 65752, 1.21, 0.02, 0, 3822 +gs, smi, 01.19, 57064, 1.15, 0.04, 0, 4646 +redis, smi, 4.596, 35492, 1.89, 0.42, 0, 8051 +cfrac, smi, 07.14, 4500, 7.13, 0.00, 0, 181 +leanN, smi, 29.18, 626364, 109.19, 1.87, 0, 255797 +sed, smi, 01.81, 317056, 1.69, 0.11, 0, 34329 diff --git a/docs/security/data/res_sn-0.5.3.csv b/docs/security/data/res_sn-0.5.3.csv new file mode 100644 index 000000000..52fd95392 --- /dev/null +++ b/docs/security/data/res_sn-0.5.3.csv @@ -0,0 +1,160 @@ +barnes, sn-0.5.3, 02.87, 70068, 2.84, 0.02, 0, 2518 +espresso, sn-0.5.3, 05.21, 10244, 5.19, 0.01, 0, 410 +z3, sn-0.5.3, 01.18, 73508, 1.16, 0.01, 0, 563 +gs, sn-0.5.3, 01.17, 56084, 1.16, 0.01, 0, 1873 +redis, sn-0.5.3, 4.348, 37168, 1.90, 0.28, 0, 7518 +cfrac, sn-0.5.3, 06.33, 8244, 6.32, 0.00, 0, 446 +leanN, sn-0.5.3, 25.46, 546652, 96.19, 0.86, 0, 3333 +sed, sn-0.5.3, 01.73, 310148, 1.65, 0.08, 0, 682 +barnes, sn-0.5.3, 02.84, 70132, 2.82, 0.01, 0, 2530 +espresso, sn-0.5.3, 05.13, 10240, 5.11, 0.02, 0, 411 +z3, sn-0.5.3, 01.17, 73488, 1.15, 0.02, 0, 565 +gs, sn-0.5.3, 01.19, 56076, 1.16, 0.02, 0, 1875 +redis, sn-0.5.3, 4.314, 37128, 1.78, 0.38, 0, 7346 +cfrac, sn-0.5.3, 06.31, 8424, 6.30, 0.01, 0, 444 +leanN, sn-0.5.3, 26.53, 537680, 101.52, 0.99, 0, 4183 +sed, sn-0.5.3, 01.73, 310248, 1.64, 0.08, 0, 684 +barnes, sn-0.5.3, 02.85, 70108, 2.83, 0.02, 0, 2526 +espresso, sn-0.5.3, 05.14, 10212, 5.11, 0.02, 0, 411 +z3, sn-0.5.3, 01.17, 73712, 1.16, 0.01, 0, 566 +gs, sn-0.5.3, 01.15, 56044, 1.14, 0.01, 0, 1871 +redis, sn-0.5.3, 4.244, 37064, 1.74, 0.39, 0, 7619 +cfrac, sn-0.5.3, 06.31, 8408, 6.31, 0.00, 0, 447 +leanN, sn-0.5.3, 25.44, 547264, 96.18, 0.87, 0, 3596 +sed, sn-0.5.3, 01.72, 310392, 1.65, 0.07, 0, 683 +barnes, sn-0.5.3, 02.86, 69948, 2.84, 0.01, 0, 2523 +espresso, sn-0.5.3, 05.11, 10280, 5.09, 0.02, 0, 406 +z3, sn-0.5.3, 01.19, 73804, 1.17, 0.02, 0, 564 +gs, sn-0.5.3, 01.16, 56260, 1.11, 0.04, 0, 1875 +redis, sn-0.5.3, 4.254, 37024, 1.80, 0.33, 0, 7116 +cfrac, sn-0.5.3, 06.38, 8424, 6.37, 0.00, 0, 446 +leanN, sn-0.5.3, 25.91, 536216, 98.51, 0.89, 0, 3475 +sed, sn-0.5.3, 01.71, 310356, 1.63, 0.08, 0, 684 +barnes, sn-0.5.3, 02.84, 70108, 2.83, 0.00, 0, 2520 +espresso, sn-0.5.3, 05.14, 10392, 5.10, 0.03, 0, 414 +z3, sn-0.5.3, 01.16, 73748, 1.15, 0.01, 0, 566 +gs, sn-0.5.3, 01.16, 56052, 1.15, 0.01, 0, 1871 +redis, sn-0.5.3, 4.271, 37064, 1.74, 0.41, 0, 6995 +cfrac, sn-0.5.3, 06.46, 8348, 6.45, 0.00, 0, 445 +leanN, sn-0.5.3, 25.31, 555048, 95.87, 0.90, 0, 3509 +sed, sn-0.5.3, 01.72, 310300, 1.65, 0.07, 0, 685 +barnes, sn-0.5.3, 02.94, 70008, 2.93, 0.00, 0, 2522 +espresso, sn-0.5.3, 05.15, 10252, 5.13, 0.01, 0, 413 +z3, sn-0.5.3, 01.17, 73808, 1.15, 0.01, 0, 571 +gs, sn-0.5.3, 01.16, 56216, 1.14, 0.02, 0, 1872 +redis, sn-0.5.3, 4.224, 37128, 1.74, 0.38, 0, 7520 +cfrac, sn-0.5.3, 06.30, 8292, 6.30, 0.00, 0, 445 +leanN, sn-0.5.3, 27.30, 526460, 106.32, 0.96, 0, 2879 +sed, sn-0.5.3, 01.72, 310416, 1.64, 0.07, 0, 686 +barnes, sn-0.5.3, 02.87, 70156, 2.85, 0.02, 0, 2519 +espresso, sn-0.5.3, 05.13, 10420, 5.09, 0.03, 0, 414 +z3, sn-0.5.3, 01.16, 73580, 1.14, 0.02, 0, 565 +gs, sn-0.5.3, 01.15, 56056, 1.12, 0.02, 0, 1876 +redis, sn-0.5.3, 4.389, 37100, 1.73, 0.47, 0, 7100 +cfrac, sn-0.5.3, 06.36, 8348, 6.36, 0.00, 0, 445 +leanN, sn-0.5.3, 26.22, 540668, 102.18, 0.90, 0, 3315 +sed, sn-0.5.3, 01.78, 310088, 1.68, 0.09, 0, 679 +barnes, sn-0.5.3, 02.88, 69948, 2.85, 0.02, 0, 2519 +espresso, sn-0.5.3, 05.16, 10328, 5.14, 0.01, 0, 413 +z3, sn-0.5.3, 01.16, 73572, 1.14, 0.01, 0, 562 +gs, sn-0.5.3, 01.16, 56172, 1.13, 0.03, 0, 1872 +redis, sn-0.5.3, 4.348, 36980, 1.82, 0.36, 0, 7483 +cfrac, sn-0.5.3, 06.32, 8408, 6.32, 0.00, 0, 448 +leanN, sn-0.5.3, 25.75, 549084, 98.06, 0.79, 0, 3423 +sed, sn-0.5.3, 01.76, 310376, 1.70, 0.05, 0, 681 +barnes, sn-0.5.3, 02.90, 70064, 2.87, 0.02, 0, 2525 +espresso, sn-0.5.3, 05.14, 10240, 5.10, 0.04, 0, 411 +z3, sn-0.5.3, 01.17, 73556, 1.15, 0.01, 0, 562 +gs, sn-0.5.3, 01.16, 56032, 1.14, 0.02, 0, 1876 +redis, sn-0.5.3, 4.314, 36988, 1.81, 0.36, 0, 7349 +cfrac, sn-0.5.3, 06.34, 8424, 6.33, 0.00, 0, 446 +leanN, sn-0.5.3, 26.49, 539092, 101.33, 0.99, 3, 3592 +sed, sn-0.5.3, 01.72, 310300, 1.65, 0.07, 0, 679 +barnes, sn-0.5.3, 02.86, 70068, 2.85, 0.01, 0, 2523 +espresso, sn-0.5.3, 05.15, 10352, 5.14, 0.01, 0, 415 +z3, sn-0.5.3, 01.17, 73528, 1.15, 0.01, 0, 564 +gs, sn-0.5.3, 01.17, 56208, 1.15, 0.02, 0, 1872 +redis, sn-0.5.3, 4.277, 36992, 1.79, 0.36, 0, 7345 +cfrac, sn-0.5.3, 06.32, 8368, 6.32, 0.00, 0, 447 +leanN, sn-0.5.3, 25.42, 542936, 96.17, 0.85, 0, 3418 +sed, sn-0.5.3, 01.73, 310336, 1.66, 0.07, 0, 680 +barnes, sn-0.5.3, 02.88, 70236, 2.87, 0.01, 0, 2521 +espresso, sn-0.5.3, 05.27, 10284, 5.25, 0.01, 0, 413 +z3, sn-0.5.3, 01.16, 73644, 1.14, 0.02, 0, 564 +gs, sn-0.5.3, 01.17, 55884, 1.13, 0.03, 0, 1869 +redis, sn-0.5.3, 4.289, 37040, 1.77, 0.38, 0, 6881 +cfrac, sn-0.5.3, 06.36, 8408, 6.36, 0.00, 0, 447 +leanN, sn-0.5.3, 25.63, 538804, 97.85, 0.82, 0, 3326 +sed, sn-0.5.3, 01.74, 310300, 1.67, 0.06, 0, 679 +barnes, sn-0.5.3, 02.85, 70236, 2.82, 0.02, 0, 2516 +espresso, sn-0.5.3, 05.26, 10352, 5.21, 0.04, 0, 414 +z3, sn-0.5.3, 01.17, 73692, 1.16, 0.01, 0, 563 +gs, sn-0.5.3, 01.17, 56216, 1.14, 0.02, 0, 1869 +redis, sn-0.5.3, 4.263, 36996, 1.75, 0.39, 0, 7261 +cfrac, sn-0.5.3, 06.38, 8252, 6.37, 0.00, 0, 442 +leanN, sn-0.5.3, 25.65, 530420, 97.33, 0.95, 0, 3349 +sed, sn-0.5.3, 01.73, 310348, 1.65, 0.07, 0, 685 +barnes, sn-0.5.3, 02.92, 70236, 2.89, 0.02, 0, 2525 +espresso, sn-0.5.3, 05.26, 10276, 5.23, 0.03, 0, 408 +z3, sn-0.5.3, 01.17, 73648, 1.16, 0.01, 0, 569 +gs, sn-0.5.3, 01.18, 56084, 1.14, 0.03, 0, 1874 +redis, sn-0.5.3, 4.324, 36948, 1.82, 0.36, 0, 7594 +cfrac, sn-0.5.3, 06.33, 8424, 6.33, 0.00, 0, 445 +leanN, sn-0.5.3, 27.13, 532548, 104.80, 0.79, 0, 3409 +sed, sn-0.5.3, 01.73, 310464, 1.66, 0.07, 0, 683 +barnes, sn-0.5.3, 02.86, 70068, 2.84, 0.01, 0, 2525 +espresso, sn-0.5.3, 05.14, 10388, 5.12, 0.02, 0, 411 +z3, sn-0.5.3, 01.18, 73764, 1.16, 0.01, 0, 568 +gs, sn-0.5.3, 01.16, 56212, 1.14, 0.01, 0, 1874 +redis, sn-0.5.3, 4.310, 37092, 1.80, 0.37, 0, 7027 +cfrac, sn-0.5.3, 06.32, 8468, 6.31, 0.00, 0, 444 +leanN, sn-0.5.3, 26.48, 553936, 101.92, 0.96, 1, 3961 +sed, sn-0.5.3, 01.74, 310380, 1.66, 0.07, 0, 682 +barnes, sn-0.5.3, 02.91, 69964, 2.89, 0.02, 0, 2519 +espresso, sn-0.5.3, 05.22, 10328, 5.20, 0.02, 0, 413 +z3, sn-0.5.3, 01.18, 73564, 1.17, 0.01, 0, 565 +gs, sn-0.5.3, 01.18, 56072, 1.15, 0.02, 0, 1875 +redis, sn-0.5.3, 4.342, 36944, 1.80, 0.38, 0, 7593 +cfrac, sn-0.5.3, 06.35, 8424, 6.34, 0.00, 0, 444 +leanN, sn-0.5.3, 26.71, 540632, 103.48, 0.85, 5, 3013 +sed, sn-0.5.3, 01.73, 310388, 1.67, 0.06, 0, 683 +barnes, sn-0.5.3, 02.90, 69944, 2.87, 0.02, 0, 2518 +espresso, sn-0.5.3, 05.16, 10312, 5.14, 0.02, 0, 411 +z3, sn-0.5.3, 01.17, 73680, 1.16, 0.01, 0, 565 +gs, sn-0.5.3, 01.17, 56240, 1.14, 0.02, 0, 1875 +redis, sn-0.5.3, 4.314, 37140, 1.73, 0.43, 0, 7378 +cfrac, sn-0.5.3, 06.32, 8420, 6.32, 0.00, 0, 447 +leanN, sn-0.5.3, 26.72, 553304, 103.27, 0.74, 0, 3848 +sed, sn-0.5.3, 01.77, 310252, 1.70, 0.07, 0, 685 +barnes, sn-0.5.3, 02.86, 70168, 2.84, 0.02, 0, 2518 +espresso, sn-0.5.3, 05.12, 10276, 5.09, 0.02, 0, 408 +z3, sn-0.5.3, 01.18, 73504, 1.16, 0.01, 0, 562 +gs, sn-0.5.3, 01.16, 56036, 1.12, 0.03, 0, 1875 +redis, sn-0.5.3, 4.326, 37000, 1.80, 0.37, 0, 7342 +cfrac, sn-0.5.3, 06.33, 8404, 6.32, 0.00, 0, 446 +leanN, sn-0.5.3, 25.97, 547068, 99.19, 0.88, 0, 3336 +sed, sn-0.5.3, 01.77, 310252, 1.68, 0.08, 0, 683 +barnes, sn-0.5.3, 02.87, 70164, 2.84, 0.02, 0, 2523 +espresso, sn-0.5.3, 05.17, 10280, 5.15, 0.02, 0, 410 +z3, sn-0.5.3, 01.18, 73772, 1.16, 0.02, 0, 568 +gs, sn-0.5.3, 01.19, 56172, 1.16, 0.02, 0, 1872 +redis, sn-0.5.3, 4.310, 37016, 1.79, 0.37, 0, 7484 +cfrac, sn-0.5.3, 06.33, 8260, 6.33, 0.00, 0, 448 +leanN, sn-0.5.3, 25.73, 533320, 97.81, 0.66, 0, 3598 +sed, sn-0.5.3, 01.73, 310284, 1.66, 0.07, 0, 682 +barnes, sn-0.5.3, 02.85, 70132, 2.83, 0.01, 0, 2524 +espresso, sn-0.5.3, 05.13, 10292, 5.11, 0.02, 0, 411 +z3, sn-0.5.3, 01.16, 73628, 1.14, 0.02, 0, 564 +gs, sn-0.5.3, 01.18, 55808, 1.13, 0.03, 0, 1871 +redis, sn-0.5.3, 4.289, 37084, 1.80, 0.35, 0, 7347 +cfrac, sn-0.5.3, 06.31, 8308, 6.30, 0.00, 0, 448 +leanN, sn-0.5.3, 25.65, 551156, 97.81, 0.78, 4, 4057 +sed, sn-0.5.3, 01.72, 310240, 1.64, 0.08, 0, 682 +barnes, sn-0.5.3, 02.86, 69948, 2.84, 0.01, 0, 2525 +espresso, sn-0.5.3, 05.14, 10328, 5.11, 0.02, 0, 412 +z3, sn-0.5.3, 01.17, 73444, 1.16, 0.01, 0, 564 +gs, sn-0.5.3, 01.17, 55792, 1.13, 0.03, 0, 1869 +redis, sn-0.5.3, 4.262, 37176, 1.76, 0.38, 0, 6883 +cfrac, sn-0.5.3, 06.33, 8404, 6.32, 0.00, 0, 442 +leanN, sn-0.5.3, 25.67, 549420, 97.34, 0.76, 0, 3992 +sed, sn-0.5.3, 01.73, 310336, 1.67, 0.06, 0, 677 diff --git a/docs/security/data/res_sn-0.6.0-checks.csv b/docs/security/data/res_sn-0.6.0-checks.csv new file mode 100644 index 000000000..5e05e1002 --- /dev/null +++ b/docs/security/data/res_sn-0.6.0-checks.csv @@ -0,0 +1,160 @@ +barnes, sn-0.6.0-full-checks, 02.91, 65716, 2.88, 0.02, 0, 2863 +espresso, sn-0.6.0-full-checks, 05.35, 12640, 5.30, 0.04, 0, 744 +z3, sn-0.6.0-full-checks, 01.18, 72428, 1.17, 0.01, 0, 773 +gs, sn-0.6.0-full-checks, 01.21, 54304, 1.18, 0.02, 0, 2018 +redis, sn-0.6.0-full-checks, 4.435, 33628, 1.91, 0.32, 0, 8964 +cfrac, sn-0.6.0-full-checks, 06.63, 3584, 6.63, 0.00, 0, 496 +leanN, sn-0.6.0-full-checks, 26.78, 681948, 102.76, 1.04, 0, 16260 +sed, sn-0.6.0-full-checks, 01.74, 349752, 1.65, 0.08, 0, 1670 +barnes, sn-0.6.0-full-checks, 02.91, 65620, 2.90, 0.01, 0, 2854 +espresso, sn-0.6.0-full-checks, 05.27, 12760, 5.23, 0.04, 0, 739 +z3, sn-0.6.0-full-checks, 01.17, 70204, 1.16, 0.01, 0, 758 +gs, sn-0.6.0-full-checks, 01.19, 56340, 1.15, 0.03, 0, 1955 +redis, sn-0.6.0-full-checks, 4.504, 33404, 1.72, 0.54, 0, 8904 +cfrac, sn-0.6.0-full-checks, 06.62, 3560, 6.62, 0.00, 0, 499 +leanN, sn-0.6.0-full-checks, 26.59, 650568, 102.61, 1.04, 0, 12623 +sed, sn-0.6.0-full-checks, 01.74, 347584, 1.63, 0.10, 0, 1694 +barnes, sn-0.6.0-full-checks, 02.86, 65748, 2.82, 0.03, 0, 2853 +espresso, sn-0.6.0-full-checks, 05.28, 12604, 5.25, 0.03, 0, 730 +z3, sn-0.6.0-full-checks, 01.19, 72360, 1.18, 0.01, 0, 783 +gs, sn-0.6.0-full-checks, 01.18, 56336, 1.15, 0.02, 0, 2378 +redis, sn-0.6.0-full-checks, 4.406, 33360, 1.83, 0.39, 0, 8930 +cfrac, sn-0.6.0-full-checks, 06.60, 3628, 6.60, 0.00, 0, 496 +leanN, sn-0.6.0-full-checks, 26.99, 655148, 105.01, 1.00, 0, 13196 +sed, sn-0.6.0-full-checks, 01.73, 349380, 1.65, 0.08, 0, 1545 +barnes, sn-0.6.0-full-checks, 02.87, 65740, 2.85, 0.02, 0, 2853 +espresso, sn-0.6.0-full-checks, 05.27, 12648, 5.20, 0.06, 0, 740 +z3, sn-0.6.0-full-checks, 01.17, 70140, 1.14, 0.02, 0, 761 +gs, sn-0.6.0-full-checks, 01.18, 54004, 1.16, 0.02, 0, 1961 +redis, sn-0.6.0-full-checks, 4.357, 33356, 1.85, 0.34, 0, 8977 +cfrac, sn-0.6.0-full-checks, 06.61, 3584, 6.61, 0.00, 0, 495 +leanN, sn-0.6.0-full-checks, 26.66, 675748, 101.94, 1.01, 0, 13980 +sed, sn-0.6.0-full-checks, 01.73, 347264, 1.64, 0.09, 0, 1571 +barnes, sn-0.6.0-full-checks, 02.87, 65632, 2.86, 0.01, 0, 2853 +espresso, sn-0.6.0-full-checks, 05.24, 12608, 5.22, 0.02, 0, 748 +z3, sn-0.6.0-full-checks, 01.18, 72220, 1.17, 0.01, 0, 772 +gs, sn-0.6.0-full-checks, 01.19, 56208, 1.15, 0.03, 0, 1961 +redis, sn-0.6.0-full-checks, 4.369, 33168, 1.79, 0.40, 0, 8840 +cfrac, sn-0.6.0-full-checks, 06.76, 3580, 6.76, 0.00, 0, 497 +leanN, sn-0.6.0-full-checks, 26.67, 671232, 102.70, 1.00, 0, 13239 +sed, sn-0.6.0-full-checks, 01.75, 351632, 1.63, 0.11, 0, 1713 +barnes, sn-0.6.0-full-checks, 02.88, 65700, 2.86, 0.02, 0, 2854 +espresso, sn-0.6.0-full-checks, 05.33, 12760, 5.29, 0.04, 0, 726 +z3, sn-0.6.0-full-checks, 01.17, 70276, 1.16, 0.00, 0, 767 +gs, sn-0.6.0-full-checks, 01.19, 54364, 1.15, 0.03, 0, 2014 +redis, sn-0.6.0-full-checks, 4.343, 33512, 1.90, 0.29, 0, 9002 +cfrac, sn-0.6.0-full-checks, 06.60, 3596, 6.60, 0.00, 0, 507 +leanN, sn-0.6.0-full-checks, 26.23, 673484, 100.36, 0.96, 0, 12775 +sed, sn-0.6.0-full-checks, 01.74, 349496, 1.65, 0.09, 0, 1693 +barnes, sn-0.6.0-full-checks, 02.94, 65752, 2.90, 0.03, 0, 2856 +espresso, sn-0.6.0-full-checks, 05.25, 12740, 5.22, 0.02, 0, 734 +z3, sn-0.6.0-full-checks, 01.18, 70424, 1.16, 0.01, 0, 759 +gs, sn-0.6.0-full-checks, 01.19, 53812, 1.17, 0.01, 0, 1939 +redis, sn-0.6.0-full-checks, 4.392, 33984, 1.86, 0.35, 0, 9164 +cfrac, sn-0.6.0-full-checks, 06.61, 3664, 6.61, 0.00, 0, 512 +leanN, sn-0.6.0-full-checks, 27.06, 681168, 105.03, 1.04, 0, 11898 +sed, sn-0.6.0-full-checks, 01.79, 347472, 1.72, 0.06, 0, 1697 +barnes, sn-0.6.0-full-checks, 02.91, 65776, 2.88, 0.02, 0, 2856 +espresso, sn-0.6.0-full-checks, 05.28, 12608, 5.27, 0.01, 0, 736 +z3, sn-0.6.0-full-checks, 01.18, 70388, 1.15, 0.02, 0, 768 +gs, sn-0.6.0-full-checks, 01.18, 54236, 1.16, 0.02, 0, 1958 +redis, sn-0.6.0-full-checks, 4.403, 33604, 1.88, 0.33, 0, 9014 +cfrac, sn-0.6.0-full-checks, 06.62, 3576, 6.62, 0.00, 0, 495 +leanN, sn-0.6.0-full-checks, 26.55, 655872, 101.70, 1.10, 0, 13236 +sed, sn-0.6.0-full-checks, 01.79, 347480, 1.70, 0.09, 0, 1697 +barnes, sn-0.6.0-full-checks, 02.87, 65632, 2.85, 0.02, 0, 2855 +espresso, sn-0.6.0-full-checks, 05.28, 12608, 5.25, 0.02, 0, 758 +z3, sn-0.6.0-full-checks, 01.19, 70156, 1.17, 0.01, 0, 765 +gs, sn-0.6.0-full-checks, 01.19, 54220, 1.15, 0.04, 0, 1945 +redis, sn-0.6.0-full-checks, 4.406, 33720, 1.87, 0.35, 0, 9048 +cfrac, sn-0.6.0-full-checks, 06.62, 3672, 6.62, 0.00, 0, 512 +leanN, sn-0.6.0-full-checks, 27.77, 662424, 108.83, 0.93, 0, 13224 +sed, sn-0.6.0-full-checks, 01.74, 349428, 1.65, 0.08, 0, 1596 +barnes, sn-0.6.0-full-checks, 02.86, 65732, 2.84, 0.02, 0, 2854 +espresso, sn-0.6.0-full-checks, 05.27, 12808, 5.26, 0.00, 0, 733 +z3, sn-0.6.0-full-checks, 01.17, 70276, 1.15, 0.02, 0, 767 +gs, sn-0.6.0-full-checks, 01.19, 54336, 1.15, 0.03, 0, 1947 +redis, sn-0.6.0-full-checks, 4.415, 33776, 1.86, 0.36, 0, 9147 +cfrac, sn-0.6.0-full-checks, 06.73, 3572, 6.73, 0.00, 0, 494 +leanN, sn-0.6.0-full-checks, 27.00, 657728, 104.05, 0.79, 0, 14663 +sed, sn-0.6.0-full-checks, 01.76, 347356, 1.66, 0.09, 0, 1590 +barnes, sn-0.6.0-full-checks, 02.86, 65688, 2.84, 0.02, 0, 2855 +espresso, sn-0.6.0-full-checks, 05.28, 12940, 5.26, 0.02, 0, 736 +z3, sn-0.6.0-full-checks, 01.18, 70316, 1.17, 0.01, 0, 764 +gs, sn-0.6.0-full-checks, 01.20, 54284, 1.17, 0.02, 0, 1957 +redis, sn-0.6.0-full-checks, 4.645, 33244, 1.93, 0.41, 0, 8835 +cfrac, sn-0.6.0-full-checks, 06.62, 3600, 6.61, 0.00, 0, 494 +leanN, sn-0.6.0-full-checks, 26.70, 667524, 101.43, 0.87, 0, 12013 +sed, sn-0.6.0-full-checks, 01.74, 347576, 1.67, 0.07, 0, 1696 +barnes, sn-0.6.0-full-checks, 02.87, 65668, 2.84, 0.03, 0, 2853 +espresso, sn-0.6.0-full-checks, 05.28, 12644, 5.25, 0.02, 0, 736 +z3, sn-0.6.0-full-checks, 01.18, 72472, 1.16, 0.02, 0, 780 +gs, sn-0.6.0-full-checks, 01.18, 54196, 1.17, 0.01, 0, 1955 +redis, sn-0.6.0-full-checks, 4.429, 33292, 1.83, 0.40, 0, 8884 +cfrac, sn-0.6.0-full-checks, 06.71, 3668, 6.70, 0.00, 0, 511 +leanN, sn-0.6.0-full-checks, 26.04, 680596, 99.47, 0.93, 0, 14036 +sed, sn-0.6.0-full-checks, 01.74, 347564, 1.63, 0.10, 0, 1649 +barnes, sn-0.6.0-full-checks, 02.87, 65620, 2.83, 0.03, 0, 2851 +espresso, sn-0.6.0-full-checks, 05.29, 12660, 5.27, 0.01, 0, 729 +z3, sn-0.6.0-full-checks, 01.19, 70084, 1.17, 0.01, 0, 759 +gs, sn-0.6.0-full-checks, 01.18, 54320, 1.17, 0.01, 0, 1964 +redis, sn-0.6.0-full-checks, 4.443, 33348, 1.77, 0.46, 0, 8872 +cfrac, sn-0.6.0-full-checks, 06.62, 3640, 6.62, 0.00, 0, 512 +leanN, sn-0.6.0-full-checks, 27.41, 674896, 106.67, 0.85, 0, 14026 +sed, sn-0.6.0-full-checks, 01.79, 349524, 1.69, 0.10, 0, 1683 +barnes, sn-0.6.0-full-checks, 02.90, 65716, 2.89, 0.01, 0, 2870 +espresso, sn-0.6.0-full-checks, 05.28, 10580, 5.26, 0.02, 0, 1111 +z3, sn-0.6.0-full-checks, 01.18, 70424, 1.17, 0.01, 0, 766 +gs, sn-0.6.0-full-checks, 01.18, 54272, 1.16, 0.02, 0, 1944 +redis, sn-0.6.0-full-checks, 4.441, 33556, 1.88, 0.35, 0, 8974 +cfrac, sn-0.6.0-full-checks, 06.61, 3632, 6.61, 0.00, 0, 504 +leanN, sn-0.6.0-full-checks, 26.09, 670788, 99.44, 0.85, 0, 13183 +sed, sn-0.6.0-full-checks, 01.74, 347336, 1.64, 0.09, 0, 1539 +barnes, sn-0.6.0-full-checks, 02.85, 65668, 2.84, 0.01, 0, 2850 +espresso, sn-0.6.0-full-checks, 05.33, 12672, 5.29, 0.04, 0, 744 +z3, sn-0.6.0-full-checks, 01.20, 70352, 1.19, 0.01, 0, 763 +gs, sn-0.6.0-full-checks, 01.19, 54336, 1.15, 0.03, 0, 2006 +redis, sn-0.6.0-full-checks, 4.438, 33844, 1.91, 0.31, 0, 9062 +cfrac, sn-0.6.0-full-checks, 06.64, 3604, 6.64, 0.00, 0, 498 +leanN, sn-0.6.0-full-checks, 27.52, 671932, 105.43, 0.99, 0, 15234 +sed, sn-0.6.0-full-checks, 01.73, 345648, 1.65, 0.07, 0, 1696 +barnes, sn-0.6.0-full-checks, 02.93, 65632, 2.92, 0.01, 0, 2854 +espresso, sn-0.6.0-full-checks, 05.26, 12632, 5.22, 0.03, 0, 727 +z3, sn-0.6.0-full-checks, 01.19, 70152, 1.15, 0.03, 0, 756 +gs, sn-0.6.0-full-checks, 01.19, 54004, 1.17, 0.01, 0, 1957 +redis, sn-0.6.0-full-checks, 4.395, 33452, 1.84, 0.37, 0, 8967 +cfrac, sn-0.6.0-full-checks, 06.62, 3584, 6.62, 0.00, 0, 495 +leanN, sn-0.6.0-full-checks, 25.72, 685952, 96.81, 0.91, 0, 12094 +sed, sn-0.6.0-full-checks, 01.79, 347652, 1.70, 0.09, 0, 1639 +barnes, sn-0.6.0-full-checks, 02.87, 65580, 2.85, 0.01, 0, 2854 +espresso, sn-0.6.0-full-checks, 05.27, 12940, 5.25, 0.02, 0, 757 +z3, sn-0.6.0-full-checks, 01.18, 70108, 1.16, 0.01, 0, 766 +gs, sn-0.6.0-full-checks, 01.18, 54372, 1.15, 0.02, 0, 1953 +redis, sn-0.6.0-full-checks, 4.486, 33236, 1.84, 0.42, 0, 8862 +cfrac, sn-0.6.0-full-checks, 06.63, 3632, 6.63, 0.00, 0, 510 +leanN, sn-0.6.0-full-checks, 26.27, 673524, 100.89, 0.85, 0, 13522 +sed, sn-0.6.0-full-checks, 01.76, 349592, 1.69, 0.07, 0, 1642 +barnes, sn-0.6.0-full-checks, 02.88, 65744, 2.87, 0.01, 0, 2863 +espresso, sn-0.6.0-full-checks, 05.27, 12588, 5.26, 0.01, 0, 725 +z3, sn-0.6.0-full-checks, 01.19, 70400, 1.19, 0.00, 0, 772 +gs, sn-0.6.0-full-checks, 01.22, 56300, 1.18, 0.03, 0, 1960 +redis, sn-0.6.0-full-checks, 4.432, 33844, 1.81, 0.42, 0, 9037 +cfrac, sn-0.6.0-full-checks, 06.61, 3708, 6.61, 0.00, 0, 516 +leanN, sn-0.6.0-full-checks, 26.06, 653060, 99.23, 1.01, 0, 12978 +sed, sn-0.6.0-full-checks, 01.75, 347260, 1.66, 0.08, 0, 1585 +barnes, sn-0.6.0-full-checks, 02.87, 65616, 2.85, 0.01, 0, 2855 +espresso, sn-0.6.0-full-checks, 05.28, 12652, 5.26, 0.02, 0, 733 +z3, sn-0.6.0-full-checks, 01.19, 70324, 1.16, 0.02, 0, 763 +gs, sn-0.6.0-full-checks, 01.19, 54320, 1.15, 0.03, 0, 1961 +redis, sn-0.6.0-full-checks, 4.408, 33576, 1.89, 0.32, 0, 9014 +cfrac, sn-0.6.0-full-checks, 06.64, 3660, 6.64, 0.00, 0, 508 +leanN, sn-0.6.0-full-checks, 26.62, 676740, 102.76, 0.92, 0, 13834 +sed, sn-0.6.0-full-checks, 01.74, 347536, 1.62, 0.11, 0, 1703 +barnes, sn-0.6.0-full-checks, 02.85, 65632, 2.83, 0.01, 0, 2853 +espresso, sn-0.6.0-full-checks, 05.27, 12648, 5.24, 0.03, 0, 737 +z3, sn-0.6.0-full-checks, 01.17, 70352, 1.15, 0.01, 0, 769 +gs, sn-0.6.0-full-checks, 01.19, 53988, 1.16, 0.02, 0, 2022 +redis, sn-0.6.0-full-checks, 4.478, 33468, 1.91, 0.34, 0, 8968 +cfrac, sn-0.6.0-full-checks, 06.61, 3604, 6.60, 0.00, 0, 494 +leanN, sn-0.6.0-full-checks, 26.12, 671828, 98.39, 0.91, 0, 13146 +sed, sn-0.6.0-full-checks, 01.74, 349584, 1.65, 0.09, 0, 1713 diff --git a/docs/security/data/res_sn-0.6.0-memcpy.csv b/docs/security/data/res_sn-0.6.0-memcpy.csv new file mode 100644 index 000000000..7ef0b8792 --- /dev/null +++ b/docs/security/data/res_sn-0.6.0-memcpy.csv @@ -0,0 +1,160 @@ +barnes, sn-0.6.0-memcpy-checks, 02.89, 65608, 2.88, 0.01, 0, 2837 +espresso, sn-0.6.0-memcpy-checks, 05.20, 6256, 5.19, 0.00, 0, 657 +z3, sn-0.6.0-memcpy-checks, 01.19, 66128, 1.17, 0.02, 0, 738 +gs, sn-0.6.0-memcpy-checks, 01.17, 48512, 1.14, 0.03, 0, 2000 +redis, sn-0.6.0-memcpy-checks, 4.274, 30408, 1.75, 0.40, 0, 8544 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3320, 6.27, 0.00, 0, 431 +leanN, sn-0.6.0-memcpy-checks, 25.01, 545020, 96.33, 0.92, 0, 13126 +sed, sn-0.6.0-memcpy-checks, 01.72, 342792, 1.62, 0.10, 0, 1473 +barnes, sn-0.6.0-memcpy-checks, 02.91, 65628, 2.89, 0.02, 0, 2840 +espresso, sn-0.6.0-memcpy-checks, 05.12, 6440, 5.09, 0.02, 0, 659 +z3, sn-0.6.0-memcpy-checks, 01.17, 65872, 1.16, 0.00, 0, 735 +gs, sn-0.6.0-memcpy-checks, 01.17, 48300, 1.14, 0.02, 0, 1997 +redis, sn-0.6.0-memcpy-checks, 4.317, 30544, 1.78, 0.39, 0, 8538 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3320, 6.27, 0.00, 0, 432 +leanN, sn-0.6.0-memcpy-checks, 27.40, 529564, 110.40, 0.89, 0, 12793 +sed, sn-0.6.0-memcpy-checks, 01.72, 342716, 1.61, 0.10, 0, 1452 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65628, 2.84, 0.01, 0, 2840 +espresso, sn-0.6.0-memcpy-checks, 05.11, 6256, 5.08, 0.03, 0, 655 +z3, sn-0.6.0-memcpy-checks, 01.17, 66060, 1.15, 0.01, 0, 738 +gs, sn-0.6.0-memcpy-checks, 01.19, 48188, 1.16, 0.03, 0, 2000 +redis, sn-0.6.0-memcpy-checks, 4.257, 30400, 1.75, 0.39, 0, 8555 +cfrac, sn-0.6.0-memcpy-checks, 06.31, 3320, 6.31, 0.00, 0, 432 +leanN, sn-0.6.0-memcpy-checks, 26.73, 542128, 106.45, 0.90, 0, 11923 +sed, sn-0.6.0-memcpy-checks, 01.72, 342820, 1.65, 0.07, 0, 1451 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65628, 2.84, 0.01, 0, 2837 +espresso, sn-0.6.0-memcpy-checks, 05.15, 6216, 5.10, 0.04, 0, 656 +z3, sn-0.6.0-memcpy-checks, 01.17, 66232, 1.15, 0.01, 0, 741 +gs, sn-0.6.0-memcpy-checks, 01.18, 48336, 1.15, 0.02, 0, 2000 +redis, sn-0.6.0-memcpy-checks, 4.236, 30356, 1.68, 0.45, 0, 8560 +cfrac, sn-0.6.0-memcpy-checks, 06.24, 3336, 6.24, 0.00, 0, 434 +leanN, sn-0.6.0-memcpy-checks, 25.17, 533080, 97.46, 0.95, 0, 13412 +sed, sn-0.6.0-memcpy-checks, 01.72, 342824, 1.62, 0.09, 0, 1453 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65608, 2.82, 0.03, 0, 2834 +espresso, sn-0.6.0-memcpy-checks, 05.14, 6412, 5.11, 0.02, 0, 659 +z3, sn-0.6.0-memcpy-checks, 01.17, 66104, 1.15, 0.01, 0, 738 +gs, sn-0.6.0-memcpy-checks, 01.17, 48108, 1.13, 0.04, 0, 1997 +redis, sn-0.6.0-memcpy-checks, 4.265, 30460, 1.72, 0.42, 0, 8543 +cfrac, sn-0.6.0-memcpy-checks, 06.28, 3336, 6.27, 0.00, 0, 432 +leanN, sn-0.6.0-memcpy-checks, 25.95, 534752, 101.70, 0.95, 0, 11941 +sed, sn-0.6.0-memcpy-checks, 01.73, 342804, 1.64, 0.08, 0, 1476 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65604, 2.84, 0.01, 0, 2841 +espresso, sn-0.6.0-memcpy-checks, 05.14, 6440, 5.12, 0.02, 0, 661 +z3, sn-0.6.0-memcpy-checks, 01.16, 66264, 1.14, 0.02, 0, 740 +gs, sn-0.6.0-memcpy-checks, 01.18, 48464, 1.14, 0.04, 0, 1999 +redis, sn-0.6.0-memcpy-checks, 4.229, 30404, 1.71, 0.42, 0, 8564 +cfrac, sn-0.6.0-memcpy-checks, 06.24, 3336, 6.24, 0.00, 0, 430 +leanN, sn-0.6.0-memcpy-checks, 26.90, 518652, 106.80, 0.83, 0, 11945 +sed, sn-0.6.0-memcpy-checks, 01.74, 342748, 1.66, 0.07, 0, 1482 +barnes, sn-0.6.0-memcpy-checks, 02.85, 65648, 2.82, 0.02, 0, 2844 +espresso, sn-0.6.0-memcpy-checks, 05.12, 6548, 5.08, 0.04, 0, 663 +z3, sn-0.6.0-memcpy-checks, 01.17, 66016, 1.15, 0.01, 0, 742 +gs, sn-0.6.0-memcpy-checks, 01.17, 48452, 1.13, 0.03, 0, 1998 +redis, sn-0.6.0-memcpy-checks, 4.328, 30368, 1.83, 0.34, 0, 8531 +cfrac, sn-0.6.0-memcpy-checks, 06.25, 3340, 6.25, 0.00, 0, 430 +leanN, sn-0.6.0-memcpy-checks, 25.73, 544584, 100.10, 0.90, 0, 13151 +sed, sn-0.6.0-memcpy-checks, 01.74, 342872, 1.63, 0.10, 0, 1479 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65648, 2.84, 0.02, 0, 2848 +espresso, sn-0.6.0-memcpy-checks, 05.12, 6428, 5.10, 0.02, 0, 657 +z3, sn-0.6.0-memcpy-checks, 01.17, 66184, 1.15, 0.01, 0, 741 +gs, sn-0.6.0-memcpy-checks, 01.19, 48188, 1.15, 0.04, 0, 2000 +redis, sn-0.6.0-memcpy-checks, 4.306, 30440, 1.81, 0.35, 0, 8542 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3376, 6.27, 0.00, 0, 434 +leanN, sn-0.6.0-memcpy-checks, 26.59, 534420, 106.42, 0.76, 0, 12479 +sed, sn-0.6.0-memcpy-checks, 01.79, 342792, 1.69, 0.10, 0, 1477 +barnes, sn-0.6.0-memcpy-checks, 02.88, 65616, 2.87, 0.01, 0, 2841 +espresso, sn-0.6.0-memcpy-checks, 05.12, 6240, 5.10, 0.02, 0, 655 +z3, sn-0.6.0-memcpy-checks, 01.18, 66120, 1.16, 0.02, 0, 731 +gs, sn-0.6.0-memcpy-checks, 01.19, 47984, 1.15, 0.03, 0, 1996 +redis, sn-0.6.0-memcpy-checks, 4.492, 30384, 1.88, 0.38, 0, 8479 +cfrac, sn-0.6.0-memcpy-checks, 06.28, 3376, 6.28, 0.00, 0, 434 +leanN, sn-0.6.0-memcpy-checks, 26.56, 530728, 106.76, 0.88, 0, 12442 +sed, sn-0.6.0-memcpy-checks, 01.73, 342796, 1.64, 0.09, 0, 1474 +barnes, sn-0.6.0-memcpy-checks, 02.92, 65608, 2.90, 0.02, 0, 2837 +espresso, sn-0.6.0-memcpy-checks, 05.13, 6548, 5.11, 0.01, 0, 660 +z3, sn-0.6.0-memcpy-checks, 01.16, 65980, 1.16, 0.00, 0, 736 +gs, sn-0.6.0-memcpy-checks, 01.18, 48384, 1.16, 0.02, 0, 1997 +redis, sn-0.6.0-memcpy-checks, 4.320, 30388, 1.80, 0.37, 0, 8544 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3308, 6.27, 0.00, 0, 433 +leanN, sn-0.6.0-memcpy-checks, 25.50, 536352, 99.48, 0.93, 0, 13333 +sed, sn-0.6.0-memcpy-checks, 01.73, 342988, 1.64, 0.09, 0, 1481 +barnes, sn-0.6.0-memcpy-checks, 02.87, 65556, 2.85, 0.02, 0, 2842 +espresso, sn-0.6.0-memcpy-checks, 05.26, 6440, 5.23, 0.02, 0, 659 +z3, sn-0.6.0-memcpy-checks, 01.18, 66176, 1.17, 0.01, 0, 738 +gs, sn-0.6.0-memcpy-checks, 01.18, 48476, 1.15, 0.02, 0, 2006 +redis, sn-0.6.0-memcpy-checks, 4.328, 30440, 1.79, 0.38, 0, 8532 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3336, 6.27, 0.00, 0, 429 +leanN, sn-0.6.0-memcpy-checks, 25.23, 533980, 96.62, 0.83, 0, 12236 +sed, sn-0.6.0-memcpy-checks, 01.73, 342752, 1.64, 0.09, 0, 1452 +barnes, sn-0.6.0-memcpy-checks, 02.87, 65572, 2.84, 0.02, 0, 2839 +espresso, sn-0.6.0-memcpy-checks, 05.20, 6260, 5.17, 0.02, 0, 656 +z3, sn-0.6.0-memcpy-checks, 01.17, 66212, 1.15, 0.01, 0, 737 +gs, sn-0.6.0-memcpy-checks, 01.17, 48144, 1.15, 0.02, 0, 1997 +redis, sn-0.6.0-memcpy-checks, 4.289, 30472, 1.78, 0.37, 0, 8542 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3420, 6.26, 0.00, 0, 435 +leanN, sn-0.6.0-memcpy-checks, 24.51, 522388, 93.95, 0.87, 0, 11928 +sed, sn-0.6.0-memcpy-checks, 01.72, 342740, 1.64, 0.08, 0, 1472 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65616, 2.84, 0.02, 0, 2829 +espresso, sn-0.6.0-memcpy-checks, 05.16, 6336, 5.14, 0.01, 0, 658 +z3, sn-0.6.0-memcpy-checks, 01.18, 66124, 1.17, 0.01, 0, 734 +gs, sn-0.6.0-memcpy-checks, 01.17, 48504, 1.16, 0.01, 0, 1998 +redis, sn-0.6.0-memcpy-checks, 4.270, 30452, 1.82, 0.32, 0, 8553 +cfrac, sn-0.6.0-memcpy-checks, 06.26, 3308, 6.26, 0.00, 0, 434 +leanN, sn-0.6.0-memcpy-checks, 24.86, 533536, 96.02, 0.79, 0, 13301 +sed, sn-0.6.0-memcpy-checks, 01.73, 342864, 1.64, 0.08, 0, 1477 +barnes, sn-0.6.0-memcpy-checks, 02.90, 65628, 2.88, 0.01, 0, 2840 +espresso, sn-0.6.0-memcpy-checks, 05.11, 6336, 5.10, 0.01, 0, 658 +z3, sn-0.6.0-memcpy-checks, 01.18, 66104, 1.16, 0.01, 0, 736 +gs, sn-0.6.0-memcpy-checks, 01.18, 47980, 1.14, 0.03, 0, 1995 +redis, sn-0.6.0-memcpy-checks, 4.441, 30420, 1.90, 0.33, 0, 8490 +cfrac, sn-0.6.0-memcpy-checks, 06.26, 3312, 6.26, 0.00, 0, 432 +leanN, sn-0.6.0-memcpy-checks, 24.50, 526484, 93.75, 0.78, 0, 11139 +sed, sn-0.6.0-memcpy-checks, 01.74, 342876, 1.65, 0.09, 0, 1479 +barnes, sn-0.6.0-memcpy-checks, 02.93, 65692, 2.91, 0.01, 0, 2840 +espresso, sn-0.6.0-memcpy-checks, 05.16, 6204, 5.13, 0.03, 0, 655 +z3, sn-0.6.0-memcpy-checks, 01.22, 66008, 1.20, 0.02, 0, 732 +gs, sn-0.6.0-memcpy-checks, 01.19, 48452, 1.15, 0.04, 0, 1998 +redis, sn-0.6.0-memcpy-checks, 4.367, 30356, 1.80, 0.39, 0, 8516 +cfrac, sn-0.6.0-memcpy-checks, 06.29, 3336, 6.29, 0.00, 0, 430 +leanN, sn-0.6.0-memcpy-checks, 26.06, 528052, 101.95, 0.89, 0, 11474 +sed, sn-0.6.0-memcpy-checks, 01.73, 342804, 1.64, 0.08, 0, 1455 +barnes, sn-0.6.0-memcpy-checks, 02.85, 65772, 2.83, 0.02, 0, 2825 +espresso, sn-0.6.0-memcpy-checks, 05.11, 6300, 5.07, 0.04, 0, 655 +z3, sn-0.6.0-memcpy-checks, 01.17, 66100, 1.16, 0.00, 0, 735 +gs, sn-0.6.0-memcpy-checks, 01.18, 48300, 1.16, 0.01, 0, 1997 +redis, sn-0.6.0-memcpy-checks, 4.367, 30380, 1.81, 0.38, 0, 8527 +cfrac, sn-0.6.0-memcpy-checks, 06.26, 3336, 6.26, 0.00, 0, 431 +leanN, sn-0.6.0-memcpy-checks, 25.88, 525276, 101.33, 0.80, 0, 12426 +sed, sn-0.6.0-memcpy-checks, 01.78, 342860, 1.67, 0.10, 0, 1480 +barnes, sn-0.6.0-memcpy-checks, 02.90, 65808, 2.87, 0.02, 0, 2847 +espresso, sn-0.6.0-memcpy-checks, 05.14, 6252, 5.09, 0.04, 0, 657 +z3, sn-0.6.0-memcpy-checks, 01.18, 66176, 1.15, 0.02, 0, 737 +gs, sn-0.6.0-memcpy-checks, 01.17, 48444, 1.14, 0.02, 0, 1997 +redis, sn-0.6.0-memcpy-checks, 4.263, 30432, 1.80, 0.34, 0, 8561 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3336, 6.27, 0.00, 0, 432 +leanN, sn-0.6.0-memcpy-checks, 24.65, 529484, 93.79, 0.87, 0, 12943 +sed, sn-0.6.0-memcpy-checks, 01.76, 342828, 1.65, 0.10, 0, 1454 +barnes, sn-0.6.0-memcpy-checks, 02.87, 65672, 2.86, 0.01, 0, 2840 +espresso, sn-0.6.0-memcpy-checks, 05.13, 6268, 5.10, 0.03, 0, 658 +z3, sn-0.6.0-memcpy-checks, 01.20, 65928, 1.18, 0.01, 0, 731 +gs, sn-0.6.0-memcpy-checks, 01.19, 47956, 1.17, 0.02, 0, 1990 +redis, sn-0.6.0-memcpy-checks, 4.422, 30536, 1.94, 0.28, 0, 8505 +cfrac, sn-0.6.0-memcpy-checks, 06.27, 3336, 6.27, 0.00, 0, 432 +leanN, sn-0.6.0-memcpy-checks, 25.97, 551676, 101.63, 0.87, 0, 14181 +sed, sn-0.6.0-memcpy-checks, 01.73, 342800, 1.65, 0.08, 0, 1478 +barnes, sn-0.6.0-memcpy-checks, 02.87, 65672, 2.85, 0.02, 0, 2839 +espresso, sn-0.6.0-memcpy-checks, 05.12, 6280, 5.10, 0.02, 0, 659 +z3, sn-0.6.0-memcpy-checks, 01.18, 66240, 1.15, 0.02, 0, 736 +gs, sn-0.6.0-memcpy-checks, 01.18, 48492, 1.17, 0.01, 0, 2005 +redis, sn-0.6.0-memcpy-checks, 4.294, 30464, 1.79, 0.37, 0, 8546 +cfrac, sn-0.6.0-memcpy-checks, 06.28, 3336, 6.28, 0.00, 0, 432 +leanN, sn-0.6.0-memcpy-checks, 25.64, 536612, 100.11, 0.84, 0, 13271 +sed, sn-0.6.0-memcpy-checks, 01.72, 342916, 1.64, 0.08, 0, 1477 +barnes, sn-0.6.0-memcpy-checks, 02.86, 65692, 2.85, 0.00, 0, 2843 +espresso, sn-0.6.0-memcpy-checks, 05.12, 6304, 5.09, 0.02, 0, 655 +z3, sn-0.6.0-memcpy-checks, 01.18, 66208, 1.17, 0.01, 0, 736 +gs, sn-0.6.0-memcpy-checks, 01.19, 48212, 1.16, 0.02, 0, 2001 +redis, sn-0.6.0-memcpy-checks, 4.283, 30304, 1.82, 0.34, 0, 8541 +cfrac, sn-0.6.0-memcpy-checks, 06.26, 3320, 6.26, 0.00, 0, 428 +leanN, sn-0.6.0-memcpy-checks, 24.79, 534008, 95.51, 0.88, 0, 12152 +sed, sn-0.6.0-memcpy-checks, 01.73, 342800, 1.65, 0.08, 0, 1477 diff --git a/docs/security/data/res_sn-0.6.0.csv b/docs/security/data/res_sn-0.6.0.csv new file mode 100644 index 000000000..44375689e --- /dev/null +++ b/docs/security/data/res_sn-0.6.0.csv @@ -0,0 +1,160 @@ +barnes, sn-0.6.0, 02.87, 65508, 2.85, 0.01, 0, 2838 +espresso, sn-0.6.0, 05.19, 6216, 5.16, 0.03, 0, 654 +z3, sn-0.6.0, 01.17, 66184, 1.16, 0.01, 0, 734 +gs, sn-0.6.0, 01.18, 48280, 1.17, 0.01, 0, 1999 +redis, sn-0.6.0, 4.216, 30440, 1.74, 0.38, 0, 8556 +cfrac, sn-0.6.0, 06.27, 3332, 6.27, 0.00, 0, 432 +leanN, sn-0.6.0, 24.85, 545020, 95.77, 0.89, 0, 12529 +sed, sn-0.6.0, 01.73, 342816, 1.64, 0.08, 0, 1449 +barnes, sn-0.6.0, 02.86, 65508, 2.83, 0.02, 0, 2839 +espresso, sn-0.6.0, 05.12, 6536, 5.09, 0.02, 0, 658 +z3, sn-0.6.0, 01.16, 66108, 1.14, 0.01, 0, 735 +gs, sn-0.6.0, 01.17, 48452, 1.15, 0.02, 0, 1998 +redis, sn-0.6.0, 4.241, 30464, 1.80, 0.34, 0, 8558 +cfrac, sn-0.6.0, 06.27, 3244, 6.27, 0.00, 0, 434 +leanN, sn-0.6.0, 25.01, 517504, 97.71, 0.77, 0, 11406 +sed, sn-0.6.0, 01.73, 342784, 1.65, 0.07, 0, 1477 +barnes, sn-0.6.0, 02.90, 65604, 2.88, 0.02, 0, 2837 +espresso, sn-0.6.0, 05.10, 6248, 5.06, 0.04, 0, 659 +z3, sn-0.6.0, 01.18, 66188, 1.16, 0.01, 0, 740 +gs, sn-0.6.0, 01.18, 48188, 1.15, 0.02, 0, 1998 +redis, sn-0.6.0, 4.254, 30504, 1.80, 0.34, 0, 8552 +cfrac, sn-0.6.0, 06.26, 3332, 6.26, 0.00, 0, 434 +leanN, sn-0.6.0, 27.54, 544660, 111.08, 1.01, 0, 13274 +sed, sn-0.6.0, 01.72, 342796, 1.60, 0.11, 0, 1474 +barnes, sn-0.6.0, 02.86, 65672, 2.84, 0.02, 0, 2839 +espresso, sn-0.6.0, 05.11, 6232, 5.09, 0.02, 0, 654 +z3, sn-0.6.0, 01.17, 66068, 1.15, 0.01, 0, 741 +gs, sn-0.6.0, 01.18, 48352, 1.14, 0.03, 0, 2001 +redis, sn-0.6.0, 4.199, 30520, 1.74, 0.36, 0, 8579 +cfrac, sn-0.6.0, 06.28, 3300, 6.28, 0.00, 0, 433 +leanN, sn-0.6.0, 25.55, 535144, 99.90, 0.83, 0, 12792 +sed, sn-0.6.0, 01.73, 342908, 1.63, 0.09, 0, 1480 +barnes, sn-0.6.0, 02.86, 65552, 2.83, 0.02, 0, 2838 +espresso, sn-0.6.0, 05.11, 6328, 5.07, 0.03, 0, 661 +z3, sn-0.6.0, 01.17, 66052, 1.17, 0.00, 0, 734 +gs, sn-0.6.0, 01.17, 48216, 1.14, 0.01, 0, 1999 +redis, sn-0.6.0, 4.248, 30392, 1.74, 0.40, 0, 8556 +cfrac, sn-0.6.0, 06.39, 3272, 6.39, 0.00, 0, 429 +leanN, sn-0.6.0, 26.24, 529888, 103.74, 0.90, 0, 10606 +sed, sn-0.6.0, 01.73, 342732, 1.66, 0.06, 0, 1477 +barnes, sn-0.6.0, 02.86, 65660, 2.85, 0.01, 0, 2839 +espresso, sn-0.6.0, 05.21, 6272, 5.18, 0.03, 0, 660 +z3, sn-0.6.0, 01.17, 65988, 1.14, 0.02, 0, 733 +gs, sn-0.6.0, 01.16, 48528, 1.13, 0.03, 0, 1997 +redis, sn-0.6.0, 4.203, 30428, 1.78, 0.34, 0, 8564 +cfrac, sn-0.6.0, 06.23, 3276, 6.23, 0.00, 0, 431 +leanN, sn-0.6.0, 27.12, 530604, 108.73, 1.00, 0, 12559 +sed, sn-0.6.0, 01.73, 342848, 1.64, 0.09, 0, 1474 +barnes, sn-0.6.0, 02.93, 65796, 2.92, 0.01, 0, 2844 +espresso, sn-0.6.0, 05.11, 6256, 5.11, 0.00, 0, 658 +z3, sn-0.6.0, 01.16, 66164, 1.15, 0.01, 0, 739 +gs, sn-0.6.0, 01.17, 48532, 1.15, 0.01, 0, 1993 +redis, sn-0.6.0, 4.196, 30348, 1.74, 0.37, 0, 8562 +cfrac, sn-0.6.0, 06.25, 3328, 6.25, 0.00, 0, 431 +leanN, sn-0.6.0, 26.39, 518364, 104.78, 0.84, 0, 11801 +sed, sn-0.6.0, 01.76, 342848, 1.68, 0.07, 0, 1478 +barnes, sn-0.6.0, 02.86, 65636, 2.85, 0.01, 0, 2847 +espresso, sn-0.6.0, 05.12, 6268, 5.09, 0.03, 0, 659 +z3, sn-0.6.0, 01.18, 66116, 1.17, 0.00, 0, 738 +gs, sn-0.6.0, 01.18, 48488, 1.16, 0.02, 0, 2000 +redis, sn-0.6.0, 4.320, 30360, 1.80, 0.37, 0, 8539 +cfrac, sn-0.6.0, 06.27, 3336, 6.27, 0.00, 0, 435 +leanN, sn-0.6.0, 25.36, 534340, 98.64, 0.81, 0, 12637 +sed, sn-0.6.0, 01.76, 342736, 1.66, 0.09, 0, 1478 +barnes, sn-0.6.0, 02.87, 65668, 2.84, 0.03, 0, 2837 +espresso, sn-0.6.0, 05.13, 6236, 5.10, 0.02, 0, 656 +z3, sn-0.6.0, 01.17, 66192, 1.16, 0.00, 0, 737 +gs, sn-0.6.0, 01.17, 48364, 1.16, 0.01, 0, 1998 +redis, sn-0.6.0, 4.182, 30384, 1.72, 0.39, 0, 8574 +cfrac, sn-0.6.0, 06.27, 3332, 6.27, 0.00, 0, 436 +leanN, sn-0.6.0, 24.73, 534340, 94.63, 0.75, 0, 13335 +sed, sn-0.6.0, 01.73, 342912, 1.64, 0.08, 0, 1475 +barnes, sn-0.6.0, 02.86, 65640, 2.84, 0.02, 0, 2848 +espresso, sn-0.6.0, 05.14, 6296, 5.10, 0.03, 0, 658 +z3, sn-0.6.0, 01.17, 66000, 1.14, 0.02, 0, 732 +gs, sn-0.6.0, 01.18, 48316, 1.14, 0.03, 0, 1996 +redis, sn-0.6.0, 4.239, 30492, 1.74, 0.39, 0, 8547 +cfrac, sn-0.6.0, 06.27, 3264, 6.26, 0.00, 0, 430 +leanN, sn-0.6.0, 25.04, 538884, 96.63, 0.76, 0, 13205 +sed, sn-0.6.0, 01.73, 342812, 1.65, 0.08, 0, 1454 +barnes, sn-0.6.0, 02.90, 65508, 2.89, 0.00, 0, 2839 +espresso, sn-0.6.0, 05.13, 6432, 5.12, 0.01, 0, 659 +z3, sn-0.6.0, 01.17, 66128, 1.16, 0.01, 0, 734 +gs, sn-0.6.0, 01.18, 48284, 1.15, 0.02, 0, 1997 +redis, sn-0.6.0, 4.231, 30396, 1.68, 0.44, 0, 8561 +cfrac, sn-0.6.0, 06.26, 3312, 6.26, 0.00, 0, 432 +leanN, sn-0.6.0, 24.98, 530168, 95.53, 0.83, 0, 12350 +sed, sn-0.6.0, 01.73, 342840, 1.63, 0.10, 0, 1475 +barnes, sn-0.6.0, 02.86, 65600, 2.86, 0.00, 0, 2837 +espresso, sn-0.6.0, 05.22, 6232, 5.19, 0.02, 0, 655 +z3, sn-0.6.0, 01.16, 66120, 1.14, 0.02, 0, 733 +gs, sn-0.6.0, 01.17, 48504, 1.15, 0.02, 0, 1996 +redis, sn-0.6.0, 4.189, 30384, 1.69, 0.41, 0, 8580 +cfrac, sn-0.6.0, 06.43, 3328, 6.43, 0.00, 0, 434 +leanN, sn-0.6.0, 25.93, 546076, 101.21, 0.89, 0, 12098 +sed, sn-0.6.0, 01.73, 342820, 1.59, 0.13, 0, 1454 +barnes, sn-0.6.0, 02.84, 65664, 2.83, 0.01, 0, 2834 +espresso, sn-0.6.0, 05.24, 6204, 5.21, 0.03, 0, 658 +z3, sn-0.6.0, 01.16, 66120, 1.14, 0.01, 0, 732 +gs, sn-0.6.0, 01.18, 48436, 1.16, 0.02, 0, 1994 +redis, sn-0.6.0, 4.205, 30532, 1.72, 0.39, 0, 8568 +cfrac, sn-0.6.0, 06.26, 3300, 6.26, 0.00, 0, 429 +leanN, sn-0.6.0, 24.78, 528436, 95.18, 0.90, 0, 12185 +sed, sn-0.6.0, 01.73, 342816, 1.63, 0.09, 0, 1453 +barnes, sn-0.6.0, 02.90, 65672, 2.88, 0.01, 0, 2838 +espresso, sn-0.6.0, 05.11, 6332, 5.08, 0.03, 0, 659 +z3, sn-0.6.0, 01.17, 66156, 1.14, 0.02, 0, 737 +gs, sn-0.6.0, 01.17, 48076, 1.14, 0.03, 0, 1993 +redis, sn-0.6.0, 4.241, 30492, 1.70, 0.43, 0, 8554 +cfrac, sn-0.6.0, 06.26, 3272, 6.25, 0.00, 0, 431 +leanN, sn-0.6.0, 25.24, 540816, 98.88, 0.86, 0, 13196 +sed, sn-0.6.0, 01.72, 342792, 1.62, 0.10, 0, 1473 +barnes, sn-0.6.0, 02.88, 65672, 2.86, 0.01, 0, 2846 +espresso, sn-0.6.0, 05.16, 6204, 5.14, 0.02, 0, 657 +z3, sn-0.6.0, 01.19, 66132, 1.16, 0.02, 0, 740 +gs, sn-0.6.0, 01.19, 48452, 1.14, 0.05, 0, 1998 +redis, sn-0.6.0, 4.162, 30416, 1.72, 0.37, 0, 8595 +cfrac, sn-0.6.0, 06.27, 3276, 6.27, 0.00, 0, 429 +leanN, sn-0.6.0, 25.06, 516652, 97.97, 0.97, 0, 11333 +sed, sn-0.6.0, 01.73, 342792, 1.64, 0.08, 0, 1478 +barnes, sn-0.6.0, 02.92, 65664, 2.91, 0.01, 0, 2840 +espresso, sn-0.6.0, 05.11, 6296, 5.09, 0.02, 0, 656 +z3, sn-0.6.0, 01.17, 66088, 1.15, 0.01, 0, 741 +gs, sn-0.6.0, 01.17, 48348, 1.15, 0.02, 0, 1996 +redis, sn-0.6.0, 4.202, 30528, 1.74, 0.37, 0, 8568 +cfrac, sn-0.6.0, 06.28, 3312, 6.28, 0.00, 0, 435 +leanN, sn-0.6.0, 24.59, 547572, 94.33, 0.82, 0, 11927 +sed, sn-0.6.0, 01.76, 342800, 1.69, 0.07, 0, 1456 +barnes, sn-0.6.0, 02.89, 65644, 2.87, 0.01, 0, 2849 +espresso, sn-0.6.0, 05.13, 6248, 5.10, 0.02, 0, 657 +z3, sn-0.6.0, 01.17, 66224, 1.15, 0.01, 0, 738 +gs, sn-0.6.0, 01.17, 48396, 1.14, 0.02, 0, 1999 +redis, sn-0.6.0, 4.217, 30432, 1.76, 0.36, 0, 8574 +cfrac, sn-0.6.0, 06.26, 3336, 6.26, 0.00, 0, 435 +leanN, sn-0.6.0, 25.81, 534344, 100.53, 0.99, 0, 12584 +sed, sn-0.6.0, 01.76, 342784, 1.64, 0.12, 0, 1476 +barnes, sn-0.6.0, 02.88, 65636, 2.86, 0.02, 0, 2848 +espresso, sn-0.6.0, 05.13, 6432, 5.11, 0.01, 0, 656 +z3, sn-0.6.0, 01.18, 65980, 1.17, 0.01, 0, 736 +gs, sn-0.6.0, 01.19, 48464, 1.15, 0.03, 0, 1999 +redis, sn-0.6.0, 4.178, 30424, 1.69, 0.40, 0, 8592 +cfrac, sn-0.6.0, 06.29, 3304, 6.29, 0.00, 0, 436 +leanN, sn-0.6.0, 25.81, 539980, 100.77, 0.90, 0, 13684 +sed, sn-0.6.0, 01.73, 342852, 1.63, 0.09, 0, 1479 +barnes, sn-0.6.0, 02.88, 65684, 2.85, 0.02, 0, 2840 +espresso, sn-0.6.0, 05.16, 6364, 5.15, 0.01, 0, 660 +z3, sn-0.6.0, 01.17, 65920, 1.15, 0.02, 0, 731 +gs, sn-0.6.0, 01.17, 48424, 1.14, 0.02, 0, 1998 +redis, sn-0.6.0, 4.193, 30440, 1.71, 0.40, 0, 8578 +cfrac, sn-0.6.0, 06.26, 3328, 6.26, 0.00, 0, 428 +leanN, sn-0.6.0, 24.98, 545136, 96.54, 0.80, 0, 12708 +sed, sn-0.6.0, 01.74, 342788, 1.64, 0.10, 0, 1477 +barnes, sn-0.6.0, 02.85, 65604, 2.84, 0.01, 0, 2829 +espresso, sn-0.6.0, 05.09, 6296, 5.07, 0.02, 0, 656 +z3, sn-0.6.0, 01.17, 66064, 1.15, 0.01, 0, 732 +gs, sn-0.6.0, 01.19, 48128, 1.17, 0.01, 0, 1997 +redis, sn-0.6.0, 4.199, 30328, 1.78, 0.33, 0, 8578 +cfrac, sn-0.6.0, 06.27, 3312, 6.27, 0.00, 0, 430 +leanN, sn-0.6.0, 24.71, 546284, 94.03, 0.79, 0, 12227 +sed, sn-0.6.0, 01.73, 342852, 1.65, 0.08, 0, 1476 diff --git a/src/aal/aal_concept.h b/src/aal/aal_concept.h deleted file mode 100644 index 6b85772ee..000000000 --- a/src/aal/aal_concept.h +++ /dev/null @@ -1,69 +0,0 @@ -#pragma once - -#ifdef __cpp_concepts -# include "../ds/concept.h" -# include "../ds/ptrwrap.h" -# include "aal_consts.h" - -# include -# include - -namespace snmalloc -{ - /** - * AALs must advertise the bit vector of supported features, their name, - * - */ - template - concept ConceptAAL_static_members = requires() - { - typename std::integral_constant; - typename std::integral_constant; - }; - - /** - * AALs provide a prefetch operation. - */ - template - concept ConceptAAL_prefetch = requires(void *ptr) - { - { AAL::prefetch(ptr) } noexcept -> ConceptSame; - }; - - /** - * AALs provide a notion of high-precision timing. - */ - template - concept ConceptAAL_tick = requires() - { - { AAL::tick() } noexcept -> ConceptSame; - }; - - template - concept ConceptAAL_capptr_methods = - requires(CapPtr auth, CapPtr ret, size_t sz) - { - /** - * Produce a pointer with reduced authority from a more privilged pointer. - * The resulting pointer will have base at auth's address and length of - * exactly sz. auth+sz must not exceed auth's limit. - */ - { AAL::template capptr_bound(auth, sz) } noexcept - -> ConceptSame>; - - /** - * Construct a copy of auth with its target set to that of ret. - */ - { AAL::capptr_rebound(auth, ret) } noexcept - -> ConceptSame>; - }; - - template - concept ConceptAAL = - ConceptAAL_static_members && - ConceptAAL_prefetch && - ConceptAAL_tick && - ConceptAAL_capptr_methods; - -} // namespace snmalloc -#endif diff --git a/src/ds/cdllist.h b/src/ds/cdllist.h deleted file mode 100644 index f32f66197..000000000 --- a/src/ds/cdllist.h +++ /dev/null @@ -1,171 +0,0 @@ -#pragma once - -#include "defines.h" -#include "ptrwrap.h" - -#include -#include - -namespace snmalloc -{ - template typename Ptr = Pointer> - class CDLLNodeBase - { - /** - * to_next is used to handle a zero initialised data structure. - * This means that `is_empty` works even when the constructor hasn't - * been run. - */ - ptrdiff_t to_next = 0; - - protected: - void set_next(Ptr c) - { - to_next = pointer_diff_signed(Ptr>(this), c); - } - - public: - SNMALLOC_FAST_PATH bool is_empty() - { - return to_next == 0; - } - - SNMALLOC_FAST_PATH Ptr get_next() - { - return static_cast>(pointer_offset_signed(this, to_next)); - } - }; - - template typename Ptr = Pointer> - class CDLLNodeBaseNext - { - /** - * Like to_next in the pointer-less case, this version still works with - * zero-initialized data structure. To make `is_empty` work in this case, - * next is set to `nullptr` rather than `this` when the list is empty. - * - */ - - Ptr next = nullptr; - - protected: - void set_next(Ptr c) - { - next = address_cast(c) == address_cast(this) ? nullptr : c; - } - - public: - SNMALLOC_FAST_PATH bool is_empty() - { - return next == nullptr; - } - - SNMALLOC_FAST_PATH Ptr get_next() - { - return next == nullptr ? Ptr(static_cast(this)) : next; - } - }; - - template typename Ptr = Pointer> - using CDLLNodeParent = std::conditional_t< - aal_supports, - CDLLNodeBaseNext, - CDLLNodeBase>; - - /** - * Special class for cyclic doubly linked non-empty linked list - * - * This code assumes there is always one element in the list. The client - * must ensure there is a sentinal element. - */ - template typename Ptr = Pointer> - class CDLLNode : public CDLLNodeParent, Ptr> - { - Ptr prev = nullptr; - - public: - /** - * Single element cyclic list. This is the empty case. - */ - CDLLNode() - { - this->set_next(Ptr(this)); - prev = Ptr(this); - } - - /** - * Removes this element from the cyclic list is it part of. - */ - SNMALLOC_FAST_PATH void remove() - { - SNMALLOC_ASSERT(!this->is_empty()); - debug_check(); - this->get_next()->prev = prev; - prev->set_next(this->get_next()); - // As this is no longer in the list, check invariant for - // neighbouring element. - this->get_next()->debug_check(); - -#ifndef NDEBUG - this->set_next(nullptr); - prev = nullptr; -#endif - } - - /** - * Nulls the previous pointer - * - * The Meta-slab uses nullptr in prev to mean that it is not part of a - * size class list. - **/ - void null_prev() - { - prev = nullptr; - } - - SNMALLOC_FAST_PATH Ptr get_prev() - { - return prev; - } - - SNMALLOC_FAST_PATH void insert_next(Ptr item) - { - debug_check(); - item->set_next(this->get_next()); - this->get_next()->prev = item; - item->prev = this; - set_next(item); - debug_check(); - } - - SNMALLOC_FAST_PATH void insert_prev(Ptr item) - { - debug_check(); - item->prev = prev; - prev->set_next(item); - item->set_next(Ptr(this)); - prev = item; - debug_check(); - } - - /** - * Checks the lists invariants - * x->next->prev = x - * for all x in the list. - */ - void debug_check() - { -#ifndef NDEBUG - Ptr item = this->get_next(); - auto p = Ptr(this); - - do - { - SNMALLOC_ASSERT(item->prev == p); - p = item; - item = item->get_next(); - } while (item != Ptr(this)); -#endif - } - }; -} // namespace snmalloc diff --git a/src/ds/concept.h b/src/ds/concept.h deleted file mode 100644 index 4e489edea..000000000 --- a/src/ds/concept.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include - -/** - * C++20 concepts are referenced as if they were types in declarations within - * template parameters (e.g. "template ..."). That is, they - * take the place of the "typename"/"class" keyword on template parameters. - * If the compiler understands concepts, this macro expands as its argument; - * otherwise, it expands to the keyword "typename", so snmalloc templates that - * use concept-qualified parameters should use this to remain compatible across - * C++ versions: "template" - */ -#ifdef __cpp_concepts -# define SNMALLOC_CONCEPT(c) c -#else -# define SNMALLOC_CONCEPT(c) typename -#endif - -#ifdef __cpp_concepts -namespace snmalloc -{ - /** - * C++20 concepts are more than just new syntax; there's a new support - * library specified as well. As C++20 is quite new, however, there are some - * environments, notably Clang, that understand the syntax but do not yet - * offer the library. Fortunately, alternate pronouciations are possible. - */ -# ifdef _cpp_lib_concepts - /** - * ConceptSame is true if T and U are the same type and false otherwise. - * When specifying a concept, use ConceptSame to indicate that an - * expression must evaluate precisely to the type U. - */ - template - concept ConceptSame = std::same_as; -# else - template - concept ConceptSame = std::is_same::value; -# endif -} // namespace snmalloc -#endif diff --git a/src/ds/csv.h b/src/ds/csv.h deleted file mode 100644 index 63419efbd..000000000 --- a/src/ds/csv.h +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once - -#include -#include - -namespace snmalloc -{ - class CSVStream - { - private: - std::ostream* out; - bool first = true; - - public: - class Endl - {}; - - Endl endl; - - CSVStream(std::ostream* o) : out(o) {} - - void preprint() - { - if (!first) - { - *out << ", "; - } - else - { - first = false; - } - } - - CSVStream& operator<<(const std::string& str) - { - preprint(); - *out << str; - return *this; - } - - CSVStream& operator<<(uint64_t u) - { - preprint(); - *out << u; - return *this; - } - - CSVStream& operator<<(Endl) - { - *out << std::endl; - first = true; - return *this; - } - }; -} // namespace snmalloc \ No newline at end of file diff --git a/src/ds/defines.h b/src/ds/defines.h deleted file mode 100644 index 53d362bfc..000000000 --- a/src/ds/defines.h +++ /dev/null @@ -1,110 +0,0 @@ -#pragma once - -#if defined(_MSC_VER) && !defined(__clang__) -# define ALWAYSINLINE __forceinline -# define NOINLINE __declspec(noinline) -# define likely(x) !!(x) -# define unlikely(x) !!(x) -# define SNMALLOC_SLOW_PATH NOINLINE -# define SNMALLOC_FAST_PATH ALWAYSINLINE -# define SNMALLOC_PURE -# define SNMALLOC_COLD -#else -# define likely(x) __builtin_expect(!!(x), 1) -# define unlikely(x) __builtin_expect(!!(x), 0) -# define ALWAYSINLINE __attribute__((always_inline)) -# define NOINLINE __attribute__((noinline)) -# define SNMALLOC_SLOW_PATH NOINLINE -# define SNMALLOC_FAST_PATH inline ALWAYSINLINE -# define SNMALLOC_PURE __attribute__((const)) -# define SNMALLOC_COLD __attribute__((cold)) -#endif - -#if defined(__cpp_constinit) && __cpp_constinit >= 201907 -# define SNMALLOC_CONSTINIT_FN constinit -# define SNMALLOC_CONSTINIT_STATIC constinit const -#else -# define SNMALLOC_CONSTINIT_FN constexpr -# define SNMALLOC_CONSTINIT_STATIC constexpr -#endif - -#if defined(__cpp_consteval) -# define SNMALLOC_CONSTEVAL consteval -#else -# define SNMALLOC_CONSTEVAL constexpr -#endif - -#if !defined(__clang__) && defined(__GNUC__) -# define GCC_NOT_CLANG -#endif - -#ifdef GCC_NOT_CLANG -# if __GNUC__ >= 8 -# define GCC_VERSION_EIGHT_PLUS -# endif -#endif - -#ifdef __APPLE__ -# define SNMALLOC_FORCE_BSS __attribute__((section("__DATA,__bss"))) -#elif defined(__ELF__) -# define SNMALLOC_FORCE_BSS __attribute__((section(".bss"))) -#else -# define SNMALLOC_FORCE_BSS -#endif - -#ifndef __has_builtin -# define __has_builtin(x) 0 -#endif - -#define UNUSED(x) ((void)(x)) - -namespace snmalloc -{ - // Forwards reference so that the platform can define how to handle errors. - [[noreturn]] SNMALLOC_COLD void error(const char* const str); -} // namespace snmalloc - -#define TOSTRING(expr) TOSTRING2(expr) -#define TOSTRING2(expr) #expr - -#ifdef NDEBUG -# define SNMALLOC_ASSERT(expr) \ - {} -#else -# define SNMALLOC_ASSERT(expr) \ - { \ - if (!(expr)) \ - { \ - snmalloc::error("assert fail: " #expr " in " __FILE__ \ - " on " TOSTRING(__LINE__)); \ - } \ - } -#endif - -#define SNMALLOC_CHECK(expr) \ - { \ - if (!(expr)) \ - { \ - snmalloc::error("Check fail: " #expr " in " __FILE__ \ - " on " TOSTRING(__LINE__)); \ - } \ - } - -#ifndef NDEBUG -# define SNMALLOC_ASSUME(x) SNMALLOC_ASSERT(x) -#else -# if __has_builtin(__builtin_assume) -# define SNMALLOC_ASSUME(x) __builtin_assume((x)) -# elif defined(_MSC_VER) -# define SNMALLOC_ASSUME(x) __assume((x)); -# elif defined(__GNUC__) -# define SNMALLOC_ASSUME(x) \ - if (!(x)) \ - __builtin_unreachable(); -# else -# define SNMALLOC_ASSUME(x) \ - do \ - { \ - } while (0) -# endif -#endif diff --git a/src/ds/dllist.h b/src/ds/dllist.h deleted file mode 100644 index 36c669c90..000000000 --- a/src/ds/dllist.h +++ /dev/null @@ -1,210 +0,0 @@ -#pragma once - -#include "helpers.h" -#include "invalidptr.h" -#include "ptrwrap.h" - -#include -#include - -namespace snmalloc -{ - template< - class T, - template typename Ptr = Pointer, - class Terminator = std::nullptr_t, - void on_clear(Ptr) = ignore> - class DLList final - { - private: - static_assert( - std::is_same>::value, - "T->prev must be a Ptr"); - static_assert( - std::is_same>::value, - "T->next must be a Ptr"); - - Ptr head = Terminator(); - Ptr tail = Terminator(); - - public: - ~DLList() - { - clear(); - } - - DLList() = default; - - DLList(DLList&& o) noexcept - { - head = o.head; - tail = o.tail; - - o.head = nullptr; - o.tail = nullptr; - } - - DLList& operator=(DLList&& o) noexcept - { - head = o.head; - tail = o.tail; - - o.head = nullptr; - o.tail = nullptr; - return *this; - } - - SNMALLOC_FAST_PATH bool is_empty() - { - return head == Terminator(); - } - - SNMALLOC_FAST_PATH Ptr get_head() - { - return head; - } - - Ptr get_tail() - { - return tail; - } - - SNMALLOC_FAST_PATH Ptr pop() - { - Ptr item = head; - - if (item != Terminator()) - remove(item); - - return item; - } - - Ptr pop_tail() - { - Ptr item = tail; - - if (item != Terminator()) - remove(item); - - return item; - } - - void insert(Ptr item) - { -#ifndef NDEBUG - debug_check_not_contains(item); -#endif - - item->next = head; - item->prev = Terminator(); - - if (head != Terminator()) - head->prev = item; - else - tail = item; - - head = item; -#ifndef NDEBUG - debug_check(); -#endif - } - - void insert_back(Ptr item) - { -#ifndef NDEBUG - debug_check_not_contains(item); -#endif - - item->prev = tail; - item->next = Terminator(); - - if (tail != Terminator()) - tail->next = item; - else - head = item; - - tail = item; -#ifndef NDEBUG - debug_check(); -#endif - } - - SNMALLOC_FAST_PATH void remove(Ptr item) - { -#ifndef NDEBUG - debug_check_contains(item); -#endif - - if (item->next != Terminator()) - item->next->prev = item->prev; - else - tail = item->prev; - - if (item->prev != Terminator()) - item->prev->next = item->next; - else - head = item->next; - -#ifndef NDEBUG - debug_check(); -#endif - } - - void clear() - { - while (head != nullptr) - { - auto c = head; - remove(c); - on_clear(c); - } - } - - void debug_check_contains(Ptr item) - { -#ifndef NDEBUG - debug_check(); - Ptr curr = head; - - while (curr != item) - { - SNMALLOC_ASSERT(curr != Terminator()); - curr = curr->next; - } -#else - UNUSED(item); -#endif - } - - void debug_check_not_contains(Ptr item) - { -#ifndef NDEBUG - debug_check(); - Ptr curr = head; - - while (curr != Terminator()) - { - SNMALLOC_ASSERT(curr != item); - curr = curr->next; - } -#else - UNUSED(item); -#endif - } - - void debug_check() - { -#ifndef NDEBUG - Ptr item = head; - Ptr prev = Terminator(); - - while (item != Terminator()) - { - SNMALLOC_ASSERT(item->prev == prev); - prev = item; - item = item->next; - } -#endif - } - }; -} // namespace snmalloc diff --git a/src/ds/flaglock.h b/src/ds/flaglock.h deleted file mode 100644 index 9b2458d0d..000000000 --- a/src/ds/flaglock.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include "bits.h" - -namespace snmalloc -{ - class FlagLock - { - private: - std::atomic_flag& lock; - - public: - FlagLock(std::atomic_flag& lock) : lock(lock) - { - while (lock.test_and_set(std::memory_order_acquire)) - Aal::pause(); - } - - ~FlagLock() - { - lock.clear(std::memory_order_release); - } - }; -} // namespace snmalloc diff --git a/src/ds/helpers.h b/src/ds/helpers.h deleted file mode 100644 index 49211a3b8..000000000 --- a/src/ds/helpers.h +++ /dev/null @@ -1,241 +0,0 @@ -#pragma once - -#include "bits.h" -#include "flaglock.h" - -#include - -namespace snmalloc -{ - /* - * In some use cases we need to run before any of the C++ runtime has been - * initialised. This singleton class is designed to not depend on the - * runtime. - */ - template - class Singleton - { - inline static std::atomic_flag flag; - inline static std::atomic initialised{false}; - inline static Object obj; - - public: - /** - * If argument is non-null, then it is assigned the value - * true, if this is the first call to get. - * At most one call will be first. - */ - inline SNMALLOC_SLOW_PATH static Object& get(bool* first = nullptr) - { - // If defined should be initially false; - SNMALLOC_ASSERT(first == nullptr || *first == false); - - if (unlikely(!initialised.load(std::memory_order_acquire))) - { - FlagLock lock(flag); - if (!initialised) - { - obj = init(); - initialised.store(true, std::memory_order_release); - if (first != nullptr) - *first = true; - } - } - return obj; - } - }; - - /** - * Wrapper for wrapping values. - * - * Wraps on read. This allows code to trust the value is in range, even when - * there is a memory corruption. - */ - template - class Mod - { - static_assert(bits::is_pow2(length), "Must be a power of two."); - - private: - T value = 0; - - public: - operator T() - { - return static_cast(value & (length - 1)); - } - - Mod& operator=(const T v) - { - value = v; - return *this; - } - }; - - template - class ModArray - { - /** - * Align the elements, so that access is cheaper. - */ - struct alignas(bits::next_pow2_const(sizeof(T))) TWrap - { - T v; - }; - - static constexpr size_t rlength = bits::next_pow2_const(length); - TWrap array[rlength]; - - public: - constexpr const T& operator[](const size_t i) const - { - return array[i & (rlength - 1)].v; - } - - constexpr T& operator[](const size_t i) - { - return array[i & (rlength - 1)].v; - } - }; - - /** - * Helper class to execute a specified function on destruction. - */ - template - class OnDestruct - { - public: - ~OnDestruct() - { - f(); - } - }; - - /** - * Non-owning version of std::function. Wraps a reference to a callable object - * (eg. a lambda) and allows calling it through dynamic dispatch, with no - * allocation. This is useful in the allocator code paths, where we can't - * safely use std::function. - * - * Inspired by the C++ proposal: - * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0792r2.html - */ - template - struct function_ref; - template - struct function_ref - { - // The enable_if is used to stop this constructor from shadowing the default - // copy / move constructors. - template< - typename Fn, - typename = - std::enable_if_t, function_ref>>> - function_ref(Fn&& fn) - { - data_ = static_cast(&fn); - fn_ = execute; - } - - R operator()(Args... args) const - { - return fn_(data_, args...); - } - - private: - void* data_; - R (*fn_)(void*, Args...); - - template - static R execute(void* p, Args... args) - { - return (*static_cast>(p))(args...); - }; - }; - - template typename Ptr> - void ignore(Ptr t) - { - UNUSED(t); - } - - /** - * Sometimes we need atomics with trivial initializer. Unfortunately, this - * became harder to accomplish in C++20. Fortunately, our rules for accessing - * these are at least as strong as those required by C++20's atomic_ref: - * - * * The objects outlive any references to them - * - * * We always access the objects through references (though we'd be allowed - * to access them without if we knew there weren't other references) - * - * * We don't access sub-objects at all, much less concurrently through - * other references. - */ - template - class TrivialInitAtomic - { - static_assert( - std::is_trivially_default_constructible_v, - "TrivialInitAtomic should not attempt to call nontrivial constructors"); - -#ifdef __cpp_lib_atomic_ref - using Val = T; - using Ref = std::atomic_ref; -#else - using Val = std::atomic; - using Ref = std::atomic&; -#endif - Val v; - - public: - /** - * Construct a reference to this value; use .load and .store to manipulate - * the value. - */ - SNMALLOC_FAST_PATH Ref ref() - { -#ifdef __cpp_lib_atomic_ref - return std::atomic_ref(this->v); -#else - return this->v; -#endif - } - - SNMALLOC_FAST_PATH T - load(std::memory_order mo = std::memory_order_seq_cst) noexcept - { - return this->ref().load(mo); - } - - SNMALLOC_FAST_PATH void - store(T n, std::memory_order mo = std::memory_order_seq_cst) noexcept - { - return this->ref().store(n, mo); - } - - SNMALLOC_FAST_PATH bool compare_exchange_strong( - T& exp, T des, std::memory_order mo = std::memory_order_seq_cst) noexcept - { - return this->ref().compare_exchange_strong(exp, des, mo); - } - - SNMALLOC_FAST_PATH T - exchange(T des, std::memory_order mo = std::memory_order_seq_cst) noexcept - { - return this->ref().exchange(des, mo); - } - - template - SNMALLOC_FAST_PATH - typename std::enable_if::value, Q>::type - fetch_add( - Q arg, std::memory_order mo = std::memory_order_seq_cst) noexcept - { - return this->ref().fetch_add(arg, mo); - } - }; - - static_assert(sizeof(TrivialInitAtomic) == sizeof(char)); - static_assert(alignof(TrivialInitAtomic) == alignof(char)); -} // namespace snmalloc diff --git a/src/ds/invalidptr.h b/src/ds/invalidptr.h deleted file mode 100644 index b96be083c..000000000 --- a/src/ds/invalidptr.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -namespace snmalloc -{ - /** - * Invalid pointer class. This is similar to `std::nullptr_t`, but allows - * other values. - */ - template - struct InvalidPointer - { - /** - * Equality comparison. Two invalid pointer values with the same sentinel - * are always the same, invalid pointer values with different sentinels are - * always different. - */ - template - constexpr bool operator==(const InvalidPointer&) - { - return Sentinel == OtherSentinel; - } - /** - * Equality comparison. Two invalid pointer values with the same sentinel - * are always the same, invalid pointer values with different sentinels are - * always different. - */ - template - constexpr bool operator!=(const InvalidPointer&) - { - return Sentinel != OtherSentinel; - } - /** - * Implicit conversion, creates a pointer with the value of the sentinel. - * On CHERI and other provenance-tracking systems, this is a - * provenance-free integer and so will trap if dereferenced, on other - * systems the sentinel should be a value in unmapped memory. - */ - template - operator T*() const - { - return reinterpret_cast(Sentinel); - } - /** - * Implicit conversion to an address, returns the sentinel value. - */ - operator address_t() const - { - return Sentinel; - } - }; -} // namespace snmalloc diff --git a/src/ds/mpmcstack.h b/src/ds/mpmcstack.h deleted file mode 100644 index bd16c0872..000000000 --- a/src/ds/mpmcstack.h +++ /dev/null @@ -1,79 +0,0 @@ -#pragma once - -#include "aba.h" - -namespace snmalloc -{ - template< - class T, - Construction c = RequiresInit, - template typename Ptr = Pointer, - template typename AtomicPtr = AtomicPointer> - class MPMCStack - { - using ABAT = ABA; - - private: - static_assert( - std::is_same>::value, - "T->next must be an AtomicPtr"); - - ABAT stack; - - public: - void push(Ptr item) - { - return push(item, item); - } - - void push(Ptr first, Ptr last) - { - // Pushes an item on the stack. - auto cmp = stack.read(); - - do - { - Ptr top = cmp.ptr(); - last->next.store(top, std::memory_order_release); - } while (!cmp.store_conditional(first)); - } - - Ptr pop() - { - // Returns the next item. If the returned value is decommitted, it is - // possible for the read of top->next to segfault. - auto cmp = stack.read(); - Ptr top; - Ptr next; - - do - { - top = cmp.ptr(); - - if (top == nullptr) - break; - - next = top->next.load(std::memory_order_acquire); - } while (!cmp.store_conditional(next)); - - return top; - } - - Ptr pop_all() - { - // Returns all items as a linked list, leaving an empty stack. - auto cmp = stack.read(); - Ptr top; - - do - { - top = cmp.ptr(); - - if (top == nullptr) - break; - } while (!cmp.store_conditional(nullptr)); - - return top; - } - }; -} // namespace snmalloc diff --git a/src/ds/mpscq.h b/src/ds/mpscq.h deleted file mode 100644 index 683d0db53..000000000 --- a/src/ds/mpscq.h +++ /dev/null @@ -1,84 +0,0 @@ -#pragma once - -#include "bits.h" -#include "helpers.h" - -#include -namespace snmalloc -{ - template< - class T, - template typename Ptr = Pointer, - template typename AtomicPtr = AtomicPointer> - class MPSCQ - { - private: - static_assert( - std::is_same>::value, - "T->next must be an AtomicPtr"); - - AtomicPtr back{nullptr}; - Ptr front = nullptr; - - public: - void invariant() - { - SNMALLOC_ASSERT(back != nullptr); - SNMALLOC_ASSERT(front != nullptr); - } - - void init(Ptr stub) - { - stub->next.store(nullptr, std::memory_order_relaxed); - front = stub; - back.store(stub, std::memory_order_relaxed); - invariant(); - } - - Ptr destroy() - { - Ptr fnt = front; - back.store(nullptr, std::memory_order_relaxed); - front = nullptr; - return fnt; - } - - inline bool is_empty() - { - Ptr bk = back.load(std::memory_order_relaxed); - - return bk == front; - } - - void enqueue(Ptr first, Ptr last) - { - // Pushes a list of messages to the queue. Each message from first to - // last should be linked together through their next pointers. - invariant(); - last->next.store(nullptr, std::memory_order_relaxed); - std::atomic_thread_fence(std::memory_order_release); - Ptr prev = back.exchange(last, std::memory_order_relaxed); - prev->next.store(first, std::memory_order_relaxed); - } - - std::pair, bool> dequeue() - { - // Returns the front message, or null if not possible to return a message. - invariant(); - Ptr first = front; - Ptr next = first->next.load(std::memory_order_relaxed); - - if (next != nullptr) - { - front = next; - Aal::prefetch(&(next->next)); - SNMALLOC_ASSERT(front != nullptr); - std::atomic_thread_fence(std::memory_order_acquire); - invariant(); - return {first, true}; - } - - return {nullptr, false}; - } - }; -} // namespace snmalloc diff --git a/src/ds/ptrwrap.h b/src/ds/ptrwrap.h deleted file mode 100644 index 8cf1f8109..000000000 --- a/src/ds/ptrwrap.h +++ /dev/null @@ -1,300 +0,0 @@ -#pragma once - -#include - -namespace snmalloc -{ - /** - * To assist in providing a uniform interface regardless of pointer wrapper, - * we also export intrinsic pointer and atomic pointer aliases, as the postfix - * type constructor '*' does not work well as a template parameter and we - * don't have inline type-level functions. - */ - template - using Pointer = T*; - - template - using AtomicPointer = std::atomic; - - /** - * Summaries of StrictProvenance metadata. We abstract away the particular - * size and any offset into the bounds. - * - * CBArena is as powerful as our pointers get: they're results from mmap(), - * and so confer as much authority as the kernel has given us. - * - * CBChunk is restricted to either a single chunk (SUPERSLAB_SIZE) or perhaps - * to several if we've requesed a large allocation (see capptr_chunk_is_alloc - * and its uses). - * - * CBChunkD is curious: we often use CBArena-bounded pointers to derive - * pointers to Allocslab metadata, and on most fast paths these pointers end - * up being ephemeral. As such, on NDEBUG builds, we elide the capptr_bounds - * that would bound these to chunks and instead just unsafely inherit the - * CBArena bounds. The use of CBChunkD thus helps to ensure that we - * eventually do invoke capptr_bounds when these pointers end up being longer - * lived! - * - * *E forms are "exported" and have had platform constraints applied. That - * means, for example, on CheriBSD, that they have had their VMMAP permission - * stripped. - * - * Yes, I wish the start-of-comment characters were aligned below as well. - * I blame clang format. - */ - enum capptr_bounds - { - /* Spatial Notes */ - CBArena, /* Arena */ - CBChunkD, /* Arena Chunk-bounded in debug; internal use only! */ - CBChunk, /* Chunk */ - CBChunkE, /* Chunk (+ platform constraints) */ - CBAlloc, /* Alloc */ - CBAllocE /* Alloc (+ platform constraints) */ - }; - - /** - * Compute the "exported" variant of a capptr_bounds annotation. This is - * used by the PAL's capptr_export function to compute its return value's - * annotation. - */ - template - SNMALLOC_CONSTEVAL capptr_bounds capptr_export_type() - { - static_assert( - (B == CBChunk) || (B == CBAlloc), "capptr_export_type of bad type"); - - switch (B) - { - case CBChunk: - return CBChunkE; - case CBAlloc: - return CBAllocE; - } - } - - template - SNMALLOC_CONSTEVAL bool capptr_is_bounds_refinement() - { - switch (BI) - { - case CBAllocE: - return BO == CBAllocE; - case CBAlloc: - return BO == CBAlloc; - case CBChunkE: - return BO == CBAllocE || BO == CBChunkE; - case CBChunk: - return BO == CBAlloc || BO == CBChunk || BO == CBChunkD; - case CBChunkD: - return BO == CBAlloc || BO == CBChunk || BO == CBChunkD; - case CBArena: - return BO == CBAlloc || BO == CBChunk || BO == CBChunkD || - BO == CBArena; - } - } - - /** - * A pointer annotated with a "phantom type parameter" carrying a static - * summary of its StrictProvenance metadata. - */ - template - struct CapPtr - { - T* unsafe_capptr; - - /** - * nullptr is implicitly constructable at any bounds type - */ - CapPtr(const std::nullptr_t n) : unsafe_capptr(n) {} - - CapPtr() : CapPtr(nullptr) {} - - /** - * all other constructions must be explicit - * - * Unfortunately, MSVC gets confused if an Allocator is instantiated in a - * way that never needs initialization (as our sandbox test does, for - * example) and, in that case, declares this constructor unreachable, - * presumably after some heroic feat of inlining that has also lost any - * semblance of context. See the blocks tagged "CapPtr-vs-MSVC" for where - * this has been observed. - */ -#ifdef _MSC_VER -# pragma warning(push) -# pragma warning(disable : 4702) -#endif - explicit CapPtr(T* p) : unsafe_capptr(p) {} -#ifdef _MSC_VER -# pragma warning(pop) -#endif - - /** - * Allow static_cast<>-s that preserve bounds but vary the target type. - */ - template - SNMALLOC_FAST_PATH CapPtr as_static() - { - return CapPtr(static_cast(this->unsafe_capptr)); - } - - SNMALLOC_FAST_PATH CapPtr as_void() - { - return this->as_static(); - } - - /** - * A more aggressive bounds-preserving cast, using reinterpret_cast - */ - template - SNMALLOC_FAST_PATH CapPtr as_reinterpret() - { - return CapPtr(reinterpret_cast(this->unsafe_capptr)); - } - - SNMALLOC_FAST_PATH bool operator==(const CapPtr& rhs) const - { - return this->unsafe_capptr == rhs.unsafe_capptr; - } - - SNMALLOC_FAST_PATH bool operator!=(const CapPtr& rhs) const - { - return this->unsafe_capptr != rhs.unsafe_capptr; - } - - SNMALLOC_FAST_PATH bool operator<(const CapPtr& rhs) const - { - return this->unsafe_capptr < rhs.unsafe_capptr; - } - - SNMALLOC_FAST_PATH T* operator->() const - { - /* - * CBAllocE bounds are associated with objects coming from or going to the - * client; we should be doing nothing with them. - */ - static_assert(bounds != CBAllocE); - return this->unsafe_capptr; - } - }; - - static_assert(sizeof(CapPtr) == sizeof(void*)); - static_assert(alignof(CapPtr) == alignof(void*)); - - template - using CapPtrCBArena = CapPtr; - - template - using CapPtrCBChunk = CapPtr; - - template - using CapPtrCBChunkE = CapPtr; - - template - using CapPtrCBAlloc = CapPtr; - - /** - * Sometimes (with large allocations) we really mean the entire chunk (or even - * several chunks) to be the allocation. - */ - template - SNMALLOC_FAST_PATH CapPtr - capptr_chunk_is_alloc(CapPtr p) - { - return CapPtr(p.unsafe_capptr); - } - - /** - * With all the bounds and constraints in place, it's safe to extract a void - * pointer (to reveal to the client). - */ - SNMALLOC_FAST_PATH void* capptr_reveal(CapPtr p) - { - return p.unsafe_capptr; - } - - /** - * - * Wrap a std::atomic with bounds annotation and speak in terms of - * bounds-annotated pointers at the interface. - * - * Note the membranous sleight of hand being pulled here: this class puts - * annotations around an un-annotated std::atomic, to appease C++, yet - * will expose or consume only CapPtr with the same bounds annotation. - */ - template - struct AtomicCapPtr - { - std::atomic unsafe_capptr; - - /** - * nullptr is constructable at any bounds type - */ - AtomicCapPtr(const std::nullptr_t n) : unsafe_capptr(n) {} - - /** - * Interconversion with CapPtr - */ - AtomicCapPtr(CapPtr p) : unsafe_capptr(p.unsafe_capptr) {} - - operator CapPtr() const noexcept - { - return CapPtr(this->unsafe_capptr); - } - - // Our copy-assignment operator follows std::atomic and returns a copy of - // the RHS. Clang finds this surprising; we suppress the warning. - // NOLINTNEXTLINE(misc-unconventional-assign-operator) - CapPtr operator=(CapPtr p) noexcept - { - this->store(p); - return p; - } - - SNMALLOC_FAST_PATH CapPtr - load(std::memory_order order = std::memory_order_seq_cst) noexcept - { - return CapPtr(this->unsafe_capptr.load(order)); - } - - SNMALLOC_FAST_PATH void store( - CapPtr desired, - std::memory_order order = std::memory_order_seq_cst) noexcept - { - this->unsafe_capptr.store(desired.unsafe_capptr, order); - } - - SNMALLOC_FAST_PATH CapPtr exchange( - CapPtr desired, - std::memory_order order = std::memory_order_seq_cst) noexcept - { - return CapPtr( - this->unsafe_capptr.exchange(desired.unsafe_capptr, order)); - } - - SNMALLOC_FAST_PATH bool operator==(const AtomicCapPtr& rhs) const - { - return this->unsafe_capptr == rhs.unsafe_capptr; - } - - SNMALLOC_FAST_PATH bool operator!=(const AtomicCapPtr& rhs) const - { - return this->unsafe_capptr != rhs.unsafe_capptr; - } - - SNMALLOC_FAST_PATH bool operator<(const AtomicCapPtr& rhs) const - { - return this->unsafe_capptr < rhs.unsafe_capptr; - } - }; - - template - using AtomicCapPtrCBArena = AtomicCapPtr; - - template - using AtomicCapPtrCBChunk = AtomicCapPtr; - - template - using AtomicCapPtrCBAlloc = AtomicCapPtr; - -} // namespace snmalloc diff --git a/src/mem/address_space.h b/src/mem/address_space.h deleted file mode 100644 index 6430ce708..000000000 --- a/src/mem/address_space.h +++ /dev/null @@ -1,357 +0,0 @@ -#include "../ds/address.h" -#include "../ds/flaglock.h" -#include "../pal/pal.h" -#include "arenamap.h" - -#include -namespace snmalloc -{ - /** - * Implements a power of two allocator, where all blocks are aligned to the - * same power of two as their size. This is what snmalloc uses to get - * alignment of very large sizeclasses. - * - * It cannot unreserve memory, so this does not require the - * usual complexity of a buddy allocator. - */ - template - class AddressSpaceManager - { - /** - * Stores the blocks of address space - * - * The first level of array indexes based on power of two size. - * - * The first entry ranges[n][0] is just a pointer to an address range - * of size 2^n. - * - * The second entry ranges[n][1] is a pointer to a linked list of blocks - * of this size. The final block in the list is not committed, so we commit - * on pop for this corner case. - * - * Invariants - * ranges[n][1] != nullptr => ranges[n][0] != nullptr - * - * bits::BITS is used for simplicity, we do not use below the pointer size, - * and large entries will be unlikely to be supported by the platform. - */ - std::array, 2>, bits::BITS> ranges = {}; - - /** - * This is infrequently used code, a spin lock simplifies the code - * considerably, and should never be on the fast path. - */ - std::atomic_flag spin_lock = ATOMIC_FLAG_INIT; - - /** - * Checks a block satisfies its invariant. - */ - inline void check_block(CapPtr base, size_t align_bits) - { - SNMALLOC_ASSERT( - base == pointer_align_up(base, bits::one_at_bit(align_bits))); - // All blocks need to be bigger than a pointer. - SNMALLOC_ASSERT(bits::one_at_bit(align_bits) >= sizeof(void*)); - UNUSED(base); - UNUSED(align_bits); - } - - /** - * Adds a block to `ranges`. - */ - void add_block(size_t align_bits, CapPtr base) - { - check_block(base, align_bits); - SNMALLOC_ASSERT(align_bits < 64); - if (ranges[align_bits][0] == nullptr) - { - // Prefer first slot if available. - ranges[align_bits][0] = base; - return; - } - - if (ranges[align_bits][1] != nullptr) - { - // Add to linked list. - commit_block(base, sizeof(void*)); - *(base.template as_static>().unsafe_capptr) = - ranges[align_bits][1]; - check_block(ranges[align_bits][1], align_bits); - } - - // Update head of list - ranges[align_bits][1] = base; - check_block(ranges[align_bits][1], align_bits); - } - - /** - * Find a block of the correct size. May split larger blocks - * to satisfy this request. - */ - CapPtr remove_block(size_t align_bits) - { - CapPtr first = ranges[align_bits][0]; - if (first == nullptr) - { - if (align_bits == (bits::BITS - 1)) - { - // Out of memory - return nullptr; - } - - // Look for larger block and split up recursively - CapPtr bigger = remove_block(align_bits + 1); - if (bigger != nullptr) - { - size_t left_over_size = bits::one_at_bit(align_bits); - auto left_over = pointer_offset(bigger, left_over_size); - ranges[align_bits][0] = - Aal::capptr_bound(left_over, left_over_size); - check_block(left_over, align_bits); - } - check_block(bigger, align_bits + 1); - return bigger; - } - - CapPtr second = ranges[align_bits][1]; - if (second != nullptr) - { - commit_block(second, sizeof(void*)); - auto psecond = - second.template as_static>().unsafe_capptr; - auto next = *psecond; - ranges[align_bits][1] = next; - // Zero memory. Client assumes memory contains only zeros. - *psecond = nullptr; - check_block(second, align_bits); - check_block(next, align_bits); - return second; - } - - check_block(first, align_bits); - ranges[align_bits][0] = nullptr; - return first; - } - - /** - * Add a range of memory to the address space. - * Divides blocks into power of two sizes with natural alignment - */ - void add_range(CapPtr base, size_t length) - { - // Find the minimum set of maximally aligned blocks in this range. - // Each block's alignment and size are equal. - while (length >= sizeof(void*)) - { - size_t base_align_bits = bits::ctz(address_cast(base)); - size_t length_align_bits = (bits::BITS - 1) - bits::clz(length); - size_t align_bits = bits::min(base_align_bits, length_align_bits); - size_t align = bits::one_at_bit(align_bits); - - check_block(base, align_bits); - add_block(align_bits, base); - - base = pointer_offset(base, align); - length -= align; - } - } - - /** - * Commit a block of memory - */ - void commit_block(CapPtr base, size_t size) - { - // Rounding required for sub-page allocations. - auto page_start = pointer_align_down(base); - auto page_end = - pointer_align_up(pointer_offset(base, size)); - size_t using_size = pointer_diff(page_start, page_end); - PAL::template notify_using(page_start.unsafe_capptr, using_size); - } - - public: - /** - * Returns a pointer to a block of memory of the supplied size. - * The block will be committed, if specified by the template parameter. - * The returned block is guaranteed to be aligened to the size. - * - * Only request 2^n sizes, and not less than a pointer. - * - * On StrictProvenance architectures, any underlying allocations made as - * part of satisfying the request will be registered with the provided - * arena_map for use in subsequent amplification. - */ - template - CapPtr reserve(size_t size, ArenaMap& arena_map) - { - SNMALLOC_ASSERT(bits::is_pow2(size)); - SNMALLOC_ASSERT(size >= sizeof(void*)); - - /* - * For sufficiently large allocations with platforms that support aligned - * allocations and architectures that don't require StrictProvenance, - * try asking the platform first. - */ - if constexpr ( - pal_supports && !aal_supports) - { - if (size >= PAL::minimum_alloc_size) - return CapPtr( - PAL::template reserve_aligned(size)); - } - - CapPtr res; - { - FlagLock lock(spin_lock); - res = remove_block(bits::next_pow2_bits(size)); - if (res == nullptr) - { - // Allocation failed ask OS for more memory - CapPtr block = nullptr; - size_t block_size = 0; - if constexpr (pal_supports) - { - /* - * aal_supports ends up here, too, and we ensure - * that we always allocate whole ArenaMap granules. - */ - if constexpr (aal_supports) - { - static_assert( - !aal_supports || - (ArenaMap::alloc_size >= PAL::minimum_alloc_size), - "Provenance root granule must be at least PAL's " - "minimum_alloc_size"); - block_size = bits::align_up(size, ArenaMap::alloc_size); - } - else - { - /* - * We will have handled the case where size >= minimum_alloc_size - * above, so we are left to handle only small things here. - */ - block_size = PAL::minimum_alloc_size; - } - - void* block_raw = PAL::template reserve_aligned(block_size); - - // It's a bit of a lie to convert without applying bounds, but the - // platform will have bounded block for us and it's better that the - // rest of our internals expect CBChunk bounds. - block = CapPtr(block_raw); - - if constexpr (aal_supports) - { - auto root_block = CapPtr(block_raw); - auto root_size = block_size; - do - { - arena_map.register_root(root_block); - root_block = pointer_offset(root_block, ArenaMap::alloc_size); - root_size -= ArenaMap::alloc_size; - } while (root_size > 0); - } - } - else if constexpr (!pal_supports) - { - // Need at least 2 times the space to guarantee alignment. - // Hold lock here as a race could cause additional requests to - // the PAL, and this could lead to suprious OOM. This is - // particularly bad if the PAL gives all the memory on first call. - auto block_and_size = PAL::reserve_at_least(size * 2); - block = CapPtr(block_and_size.first); - block_size = block_and_size.second; - - // Ensure block is pointer aligned. - if ( - pointer_align_up(block, sizeof(void*)) != block || - bits::align_up(block_size, sizeof(void*)) > block_size) - { - auto diff = - pointer_diff(block, pointer_align_up(block, sizeof(void*))); - block_size = block_size - diff; - block_size = bits::align_down(block_size, sizeof(void*)); - } - } - if (block == nullptr) - { - return nullptr; - } - add_range(block, block_size); - - // still holding lock so guaranteed to succeed. - res = remove_block(bits::next_pow2_bits(size)); - } - } - - // Don't need lock while committing pages. - if constexpr (committed) - commit_block(res, size); - - return res; - } - - /** - * Aligns block to next power of 2 above size, and unused space at the end - * of the block is retained by the address space manager. - * - * This is useful for allowing the space required for alignment to be - * used, by smaller objects. - */ - template - CapPtr - reserve_with_left_over(size_t size, ArenaMap& arena_map) - { - SNMALLOC_ASSERT(size >= sizeof(void*)); - - size = bits::align_up(size, sizeof(void*)); - - size_t rsize = bits::next_pow2(size); - - auto res = reserve(rsize, arena_map); - - if (res != nullptr) - { - if (rsize > size) - { - FlagLock lock(spin_lock); - add_range(pointer_offset(res, size), rsize - size); - } - - if constexpr (committed) - commit_block(res, size); - } - return res; - } - - /** - * Default constructor. An address-space manager constructed in this way - * does not own any memory at the start and will request any that it needs - * from the PAL. - */ - AddressSpaceManager() = default; - - /** - * Constructor that pre-initialises the address-space manager with a region - * of memory. - */ - AddressSpaceManager(CapPtr base, size_t length) - { - add_range(base, length); - } - - /** - * Move assignment operator. This should only be used during initialisation - * of the system. There should be no concurrency. - */ - AddressSpaceManager& operator=(AddressSpaceManager&& other) noexcept - { - // Lock address space manager. This will prevent it being used by - // mistake. Fails with deadlock with any subsequent caller. - if (other.spin_lock.test_and_set()) - abort(); - ranges = other.ranges; - return *this; - } - }; -} // namespace snmalloc diff --git a/src/mem/alloc.h b/src/mem/alloc.h deleted file mode 100644 index 1ca74a9c3..000000000 --- a/src/mem/alloc.h +++ /dev/null @@ -1,1563 +0,0 @@ -#pragma once - -#ifdef _MSC_VER -# define ALLOCATOR __declspec(allocator) -#else -# define ALLOCATOR -#endif - -#include "../pal/pal_consts.h" -#include "allocstats.h" -#include "chunkmap.h" -#include "external_alloc.h" -#include "largealloc.h" -#include "mediumslab.h" -#include "pooled.h" -#include "remoteallocator.h" -#include "sizeclasstable.h" -#include "slab.h" - -#include -#include - -namespace snmalloc -{ - enum Boundary - { - /** - * The location of the first byte of this allocation. - */ - Start, - /** - * The location of the last byte of the allocation. - */ - End, - /** - * The location one past the end of the allocation. This is mostly useful - * for bounds checking, where anything less than this value is safe. - */ - OnePastEnd - }; - - // This class is just used so that the free lists are the first entry - // in the allocator and hence has better code gen. - // It contains a free list per small size class. These are used for - // allocation on the fast path. This part of the code is inspired by mimalloc. - class FastFreeLists - { - protected: - FreeListIter small_fast_free_lists[NUM_SMALL_CLASSES]; - - public: - FastFreeLists() : small_fast_free_lists() {} - }; - - /** - * Allocator. This class is parameterised on five template parameters. - * - * The first two template parameter provides a hook to allow the allocator in - * use to be dynamically modified. This is used to implement a trick from - * mimalloc that avoids a conditional branch on the fast path. We - * initialise the thread-local allocator pointer with the address of a global - * allocator, which never owns any memory. The first returns true, if is - * passed the global allocator. The second initialises the thread-local - * allocator if it is has been been initialised already. Splitting into two - * functions allows for the code to be structured into tail calls to improve - * codegen. The second template takes a function that takes the allocator - * that is initialised, and the value returned, is returned by - * `InitThreadAllocator`. This is used incase we are running during teardown - * and the thread local allocator cannot be kept alive. - * - * The `MemoryProvider` defines the source of memory for this allocator. - * Allocators try to reuse address space by allocating from existing slabs or - * reusing freed large allocations. When they need to allocate a new chunk - * of memory they request space from the `MemoryProvider`. - * - * The `ChunkMap` parameter provides the adaptor to the pagemap. This is used - * to associate metadata with large (16MiB, by default) regions, allowing an - * allocator to find the allocator responsible for that region. - * - * The final template parameter, `IsQueueInline`, defines whether the - * message queue for this allocator should be stored as a field of the - * allocator (`true`) or provided externally, allowing it to be anywhere else - * in the address space (`false`). - */ - template< - bool (*NeedsInitialisation)(void*), - void* (*InitThreadAllocator)(function_ref), - class MemoryProvider = GlobalVirtual, - class ChunkMap = SNMALLOC_DEFAULT_CHUNKMAP, - bool IsQueueInline = true> - class Allocator : public FastFreeLists, - public Pooled> - { - friend RemoteCache; - - LargeAlloc large_allocator; - ChunkMap chunk_map; - LocalEntropy entropy; - - /** - * Per size class bumpptr for building new free lists - * If aligned to a SLAB start, then it is empty, and a new - * slab is required. - */ - CapPtr bump_ptrs[NUM_SMALL_CLASSES] = {nullptr}; - - public: - Stats& stats() - { - return large_allocator.stats; - } - - template - friend class AllocPool; - - /** - * Allocate memory of a statically known size. - */ - template - SNMALLOC_FAST_PATH ALLOCATOR void* alloc() - { - static_assert(size != 0, "Size must not be zero."); -#ifdef SNMALLOC_PASS_THROUGH - // snmalloc guarantees a lot of alignment, so we can depend on this - // make pass through call aligned_alloc with the alignment snmalloc - // would guarantee. - void* result = external_alloc::aligned_alloc( - natural_alignment(size), round_size(size)); - if constexpr (zero_mem == YesZero) - memset(result, 0, size); - return result; -#else - constexpr sizeclass_t sizeclass = size_to_sizeclass_const(size); - - stats().alloc_request(size); - - if constexpr (sizeclass < NUM_SMALL_CLASSES) - { - return capptr_reveal(small_alloc(size)); - } - else if constexpr (sizeclass < NUM_SIZECLASSES) - { - handle_message_queue(); - constexpr size_t rsize = sizeclass_to_size(sizeclass); - return capptr_reveal(medium_alloc(sizeclass, rsize, size)); - } - else - { - handle_message_queue(); - return capptr_reveal(large_alloc(size)); - } -#endif - } - - /** - * Allocate memory of a dynamically known size. - */ - template - SNMALLOC_FAST_PATH ALLOCATOR void* alloc(size_t size) - { -#ifdef SNMALLOC_PASS_THROUGH - // snmalloc guarantees a lot of alignment, so we can depend on this - // make pass through call aligned_alloc with the alignment snmalloc - // would guarantee. - void* result = external_alloc::aligned_alloc( - natural_alignment(size), round_size(size)); - if constexpr (zero_mem == YesZero) - memset(result, 0, size); - return result; -#else - // Perform the - 1 on size, so that zero wraps around and ends up on - // slow path. - if (likely((size - 1) <= (sizeclass_to_size(NUM_SMALL_CLASSES - 1) - 1))) - { - // Allocations smaller than the slab size are more likely. Improve - // branch prediction by placing this case first. - return capptr_reveal(small_alloc(size)); - } - - return capptr_reveal(alloc_not_small(size)); - } - - template - SNMALLOC_SLOW_PATH CapPtr alloc_not_small(size_t size) - { - handle_message_queue(); - - if (size == 0) - { - return small_alloc(1); - } - - sizeclass_t sizeclass = size_to_sizeclass(size); - if (sizeclass < NUM_SIZECLASSES) - { - size_t rsize = sizeclass_to_size(sizeclass); - return medium_alloc(sizeclass, rsize, size); - } - - return large_alloc(size); -#endif - } - - /* - * Free memory of a statically known size. Must be called with an - * external pointer. - */ - template - void dealloc(void* p_raw) - { -#ifdef SNMALLOC_PASS_THROUGH - UNUSED(size); - return external_alloc::free(p_raw); -#else - constexpr sizeclass_t sizeclass = size_to_sizeclass_const(size); - - auto p_ret = CapPtr(p_raw); - auto p_auth = large_allocator.capptr_amplify(p_ret); - - if (sizeclass < NUM_SMALL_CLASSES) - { - auto super = Superslab::get(p_auth); - - small_dealloc_unchecked(super, p_auth, p_ret, sizeclass); - } - else if (sizeclass < NUM_SIZECLASSES) - { - auto slab = Mediumslab::get(p_auth); - - medium_dealloc_unchecked(slab, p_auth, p_ret, sizeclass); - } - else - { - large_dealloc_unchecked(p_auth, p_ret, size); - } -#endif - } - - /* - * Free memory of a dynamically known size. Must be called with an - * external pointer. - */ - SNMALLOC_FAST_PATH void dealloc(void* p_raw, size_t size) - { -#ifdef SNMALLOC_PASS_THROUGH - UNUSED(size); - return external_alloc::free(p_raw); -#else - SNMALLOC_ASSERT(p_raw != nullptr); - - auto p_ret = CapPtr(p_raw); - auto p_auth = large_allocator.capptr_amplify(p_ret); - - if (likely((size - 1) <= (sizeclass_to_size(NUM_SMALL_CLASSES - 1) - 1))) - { - auto super = Superslab::get(p_auth); - sizeclass_t sizeclass = size_to_sizeclass(size); - - small_dealloc_unchecked(super, p_auth, p_ret, sizeclass); - return; - } - dealloc_sized_slow(p_auth, p_ret, size); -#endif - } - - SNMALLOC_SLOW_PATH void dealloc_sized_slow( - CapPtr p_auth, CapPtr p_ret, size_t size) - { - if (size == 0) - return dealloc(p_ret.unsafe_capptr, 1); - - if (likely(size <= sizeclass_to_size(NUM_SIZECLASSES - 1))) - { - auto slab = Mediumslab::get(p_auth); - sizeclass_t sizeclass = size_to_sizeclass(size); - medium_dealloc_unchecked(slab, p_auth, p_ret, sizeclass); - return; - } - large_dealloc_unchecked(p_auth, p_ret, size); - } - - /* - * Free memory of an unknown size. Must be called with an external - * pointer. - */ - SNMALLOC_FAST_PATH void dealloc(void* p_raw) - { -#ifdef SNMALLOC_PASS_THROUGH - return external_alloc::free(p_raw); -#else - - uint8_t chunkmap_slab_kind = chunkmap().get(address_cast(p_raw)); - - auto p_ret = CapPtr(p_raw); - auto p_auth = large_allocator.capptr_amplify(p_ret); - - if (likely(chunkmap_slab_kind == CMSuperslab)) - { - /* - * If this is a live allocation (and not a double- or wild-free), it's - * safe to construct these Slab and Metaslab pointers and reading the - * sizeclass won't fail, since either we or the other allocator can't - * reuse the slab, as we have not yet deallocated this pointer. - * - * On the other hand, in the case of a double- or wild-free, this might - * fault or data race against reused memory. Eventually, we will come - * to rely on revocation to guard against these cases: changing the - * superslab kind will require revoking the whole superslab, as will - * changing a slab's size class. However, even then, until we get - * through the guard in small_dealloc_start(), we must treat this as - * possibly stale and suspect. - */ - auto super = Superslab::get(p_auth); - auto slab = Metaslab::get_slab(p_auth); - auto meta = super->get_meta(slab); - sizeclass_t sizeclass = meta->sizeclass(); - - small_dealloc_checked_sizeclass(super, slab, p_auth, p_ret, sizeclass); - return; - } - dealloc_not_small(p_auth, p_ret, chunkmap_slab_kind); - } - - SNMALLOC_SLOW_PATH void dealloc_not_small( - CapPtr p_auth, - CapPtr p_ret, - uint8_t chunkmap_slab_kind) - { - handle_message_queue(); - - if (p_ret == nullptr) - return; - - if (chunkmap_slab_kind == CMMediumslab) - { - /* - * The same reasoning from the fast path continues to hold here. These - * values are suspect until we complete the double-free check in - * medium_dealloc_smart(). - */ - auto slab = Mediumslab::get(p_auth); - sizeclass_t sizeclass = slab->get_sizeclass(); - - medium_dealloc_checked_sizeclass(slab, p_auth, p_ret, sizeclass); - return; - } - - if (chunkmap_slab_kind == CMNotOurs) - { - error("Not allocated by this allocator"); - } - - large_dealloc_checked_sizeclass( - p_auth, - p_ret, - bits::one_at_bit(chunkmap_slab_kind), - chunkmap_slab_kind); -#endif - } - - template - void* external_pointer(void* p_raw) - { -#ifdef SNMALLOC_PASS_THROUGH - error("Unsupported"); - UNUSED(p_raw); -#else - uint8_t chunkmap_slab_kind = chunkmap().get(address_cast(p_raw)); - auto p_ret = CapPtr(p_raw); - auto p_auth = large_allocator.capptr_amplify(p_ret); - - auto super = Superslab::get(p_auth); - if (chunkmap_slab_kind == CMSuperslab) - { - auto slab = Metaslab::get_slab(p_auth); - auto meta = super->get_meta(slab); - - sizeclass_t sc = meta->sizeclass(); - auto slab_end = - Aal::capptr_rebound(p_ret, pointer_offset(slab, SLAB_SIZE)); - - return capptr_reveal(external_pointer(p_ret, sc, slab_end)); - } - if (chunkmap_slab_kind == CMMediumslab) - { - auto slab = Mediumslab::get(p_auth); - - sizeclass_t sc = slab->get_sizeclass(); - auto slab_end = - Aal::capptr_rebound(p_ret, pointer_offset(slab, SUPERSLAB_SIZE)); - - return capptr_reveal(external_pointer(p_ret, sc, slab_end)); - } - - auto ss = super.as_void(); - - while (chunkmap_slab_kind >= CMLargeRangeMin) - { - // This is a large alloc redirect. - ss = pointer_offset_signed( - ss, - -(static_cast(1) - << (chunkmap_slab_kind - CMLargeRangeMin + SUPERSLAB_BITS))); - chunkmap_slab_kind = chunkmap().get(address_cast(ss)); - } - - if (chunkmap_slab_kind == CMNotOurs) - { - if constexpr ((location == End) || (location == OnePastEnd)) - // We don't know the End, so return MAX_PTR - return pointer_offset(nullptr, UINTPTR_MAX); - else - // We don't know the Start, so return MIN_PTR - return nullptr; - } - - SNMALLOC_ASSERT( - (chunkmap_slab_kind >= CMLargeMin) && - (chunkmap_slab_kind <= CMLargeMax)); - - CapPtr retss = Aal::capptr_rebound(p_ret, ss); - CapPtr ret; - - // This is a large alloc, mask off to the slab size. - if constexpr (location == Start) - ret = retss; - else if constexpr (location == End) - ret = pointer_offset(retss, (bits::one_at_bit(chunkmap_slab_kind)) - 1); - else - ret = pointer_offset(retss, bits::one_at_bit(chunkmap_slab_kind)); - - return capptr_reveal(ret); -#endif - } - - private: - SNMALLOC_SLOW_PATH static size_t alloc_size_error() - { - error("Not allocated by this allocator"); - } - - public: - SNMALLOC_FAST_PATH size_t alloc_size(const void* p_raw) - { -#ifdef SNMALLOC_PASS_THROUGH - return external_alloc::malloc_usable_size(const_cast(p_raw)); -#else - // This must be called on an external pointer. - size_t chunkmap_slab_kind = chunkmap().get(address_cast(p_raw)); - auto p_ret = CapPtr(const_cast(p_raw)); - auto p_auth = large_allocator.capptr_amplify(p_ret); - - if (likely(chunkmap_slab_kind == CMSuperslab)) - { - auto super = Superslab::get(p_auth); - - // Reading a remote sizeclass won't fail, since the other allocator - // can't reuse the slab, as we have no yet deallocated this pointer. - auto slab = Metaslab::get_slab(p_auth); - auto meta = super->get_meta(slab); - - return sizeclass_to_size(meta->sizeclass()); - } - - if (likely(chunkmap_slab_kind == CMMediumslab)) - { - auto slab = Mediumslab::get(p_auth); - // Reading a remote sizeclass won't fail, since the other allocator - // can't reuse the slab, as we have no yet deallocated this pointer. - return sizeclass_to_size(slab->get_sizeclass()); - } - - if (likely(chunkmap_slab_kind != CMNotOurs)) - { - SNMALLOC_ASSERT( - (chunkmap_slab_kind >= CMLargeMin) && - (chunkmap_slab_kind <= CMLargeMax)); - - return bits::one_at_bit(chunkmap_slab_kind); - } - - return alloc_size_error(); -#endif - } - - /** - * Return this allocator's "truncated" ID, an integer useful as a hash - * value of this allocator. - * - * Specifically, this is the address of this allocator's message queue - * with the least significant bits missing, masked by SIZECLASS_MASK. - * This will be unique for Allocs with inline queues; Allocs with - * out-of-line queues must ensure that no two queues' addresses collide - * under this masking. - */ - size_t get_trunc_id() - { - return public_state()->trunc_id(); - } - - private: - using alloc_id_t = typename Remote::alloc_id_t; - - SlabList small_classes[NUM_SMALL_CLASSES]; - DLList medium_classes[NUM_MEDIUM_CLASSES]; - - DLList super_available; - DLList super_only_short_available; - - RemoteCache remote_cache; - - std::conditional_t - remote_alloc; - - auto* public_state() - { - if constexpr (IsQueueInline) - { - return &remote_alloc; - } - else - { - return remote_alloc; - } - } - - auto& message_queue() - { - return public_state()->message_queue; - } - - template - friend class Pool; - - public: - Allocator( - MemoryProvider& m, - ChunkMap&& c = ChunkMap(), - RemoteAllocator* r = nullptr, - bool isFake = false) - : large_allocator(m), chunk_map(c) - { - if constexpr (IsQueueInline) - { - SNMALLOC_ASSERT(r == nullptr); - (void)r; - } - else - { - remote_alloc = r; - } - - // If this is fake, don't do any of the bits of initialisation that may - // allocate memory. - if (isFake) - return; - - // Entropy must be first, so that all data-structures can use the key - // it generates. - // This must occur before any freelists are constructed. - entropy.init(); - - init_message_queue(); - message_queue().invariant(); - -#ifndef NDEBUG - for (sizeclass_t i = 0; i < NUM_SIZECLASSES; i++) - { - size_t size = sizeclass_to_size(i); - sizeclass_t sc1 = size_to_sizeclass(size); - sizeclass_t sc2 = size_to_sizeclass_const(size); - size_t size1 = sizeclass_to_size(sc1); - size_t size2 = sizeclass_to_size(sc2); - - SNMALLOC_ASSERT(sc1 == i); - SNMALLOC_ASSERT(sc1 == sc2); - SNMALLOC_ASSERT(size1 == size); - SNMALLOC_ASSERT(size1 == size2); - } -#endif - } - - /** - * If result parameter is non-null, then false is assigned into the - * the location pointed to by result if this allocator is non-empty. - * - * If result pointer is null, then this code raises a Pal::error on the - * particular check that fails, if any do fail. - */ - void debug_is_empty(bool* result) - { - auto test = [&result](auto& queue) { - if (!queue.is_empty()) - { - if (result != nullptr) - *result = false; - else - error("debug_is_empty: found non-empty allocator"); - } - }; - - // Destroy the message queue so that it has no stub message. - { - CapPtr p = message_queue().destroy(); - - while (p != nullptr) - { - auto n = p->non_atomic_next; - handle_dealloc_remote(p); - p = n; - } - } - - // Dump bump allocators back into memory - for (size_t i = 0; i < NUM_SMALL_CLASSES; i++) - { - auto& bp = bump_ptrs[i]; - auto rsize = sizeclass_to_size(i); - FreeListIter ffl; - - CapPtr super = Superslab::get(bp); - auto super_slabd = capptr_debug_chunkd_from_chunk(super); - - CapPtr slab = Metaslab::get_slab(bp); - auto slab_slabd = capptr_debug_chunkd_from_chunk(slab); - - while (pointer_align_up(bp, SLAB_SIZE) != bp) - { - Slab::alloc_new_list(bp, ffl, rsize, entropy); - while (!ffl.empty()) - { - small_dealloc_offseted_inner( - super_slabd, slab_slabd, ffl.take(entropy), i); - } - } - } - - for (size_t i = 0; i < NUM_SMALL_CLASSES; i++) - { - if (!small_fast_free_lists[i].empty()) - { - auto head = small_fast_free_lists[i].peek(); - auto head_auth = large_allocator.capptr_amplify(head); - auto super = Superslab::get(head_auth); - auto slab = Metaslab::get_slab(head_auth); - do - { - auto curr = small_fast_free_lists[i].take(entropy); - small_dealloc_offseted_inner(super, slab, curr, i); - } while (!small_fast_free_lists[i].empty()); - - test(small_classes[i]); - } - } - - for (auto& medium_class : medium_classes) - { - test(medium_class); - } - - test(super_available); - test(super_only_short_available); - - // Place the static stub message on the queue. - init_message_queue(); - } - - template - static CapPtr external_pointer( - CapPtr p_ret, - sizeclass_t sizeclass, - CapPtr end_point) - { - size_t rsize = sizeclass_to_size(sizeclass); - - auto end_point_correction = location == End ? - pointer_offset_signed(end_point, -1) : - (location == OnePastEnd ? - end_point : - pointer_offset_signed(end_point, -static_cast(rsize))); - - size_t offset_from_end = - pointer_diff(p_ret, pointer_offset_signed(end_point, -1)); - - size_t end_to_end = round_by_sizeclass(sizeclass, offset_from_end); - - return pointer_offset_signed( - end_point_correction, -static_cast(end_to_end)); - } - - void init_message_queue() - { - // Manufacture an allocation to prime the queue - // Using an actual allocation removes a conditional from a critical path. - auto dummy = CapPtr(alloc(MIN_ALLOC_SIZE)) - .template as_static(); - if (dummy == nullptr) - { - error("Critical error: Out-of-memory during initialisation."); - } - dummy->set_info(get_trunc_id(), size_to_sizeclass_const(MIN_ALLOC_SIZE)); - message_queue().init(dummy); - } - - SNMALLOC_FAST_PATH void handle_dealloc_remote(CapPtr p) - { - auto target_id = Remote::trunc_target_id(p, &large_allocator); - if (likely(target_id == get_trunc_id())) - { - // Destined for my slabs - auto p_auth = large_allocator.template capptr_amplify(p); - auto super = Superslab::get(p_auth); - auto sizeclass = p->sizeclass(); - dealloc_not_large_local(super, Remote::clear(p), sizeclass); - } - else - { - // Merely routing; despite the cast here, p is going to be cast right - // back to a Remote. - remote_cache.dealloc( - target_id, p.template as_reinterpret(), p->sizeclass()); - } - } - - SNMALLOC_SLOW_PATH void dealloc_not_large( - RemoteAllocator* target, CapPtr p, sizeclass_t sizeclass) - { - if (likely(target->trunc_id() == get_trunc_id())) - { - auto p_auth = large_allocator.capptr_amplify(p); - auto super = Superslab::get(p_auth); - dealloc_not_large_local(super, p, sizeclass); - } - else - { - remote_dealloc_and_post(target, p, sizeclass); - } - } - - // TODO: Adjust when medium slab same as super slab. - // Second parameter should be a FreeObject. - SNMALLOC_FAST_PATH void dealloc_not_large_local( - CapPtr super, - CapPtr p, - sizeclass_t sizeclass) - { - // Guard against remote queues that have colliding IDs - SNMALLOC_ASSERT(super->get_allocator() == public_state()); - - if (likely(sizeclass < NUM_SMALL_CLASSES)) - { - SNMALLOC_ASSERT(super->get_kind() == Super); - check_client( - super->get_kind() == Super, - "Heap Corruption: Sizeclass of remote dealloc corrupt."); - auto slab = Metaslab::get_slab(Aal::capptr_rebound(super.as_void(), p)); - check_client( - super->get_meta(slab)->sizeclass() == sizeclass, - "Heap Corruption: Sizeclass of remote dealloc corrupt."); - small_dealloc_offseted(super, slab, p, sizeclass); - } - else - { - auto medium = super.template as_reinterpret(); - SNMALLOC_ASSERT(medium->get_kind() == Medium); - check_client( - medium->get_kind() == Medium, - "Heap Corruption: Sizeclass of remote dealloc corrupt."); - check_client( - medium->get_sizeclass() == sizeclass, - "Heap Corruption: Sizeclass of remote dealloc corrupt."); - medium_dealloc_local(medium, p, sizeclass); - } - } - - SNMALLOC_SLOW_PATH void handle_message_queue_inner() - { - for (size_t i = 0; i < REMOTE_BATCH; i++) - { - auto r = message_queue().dequeue(); - - if (unlikely(!r.second)) - break; - - handle_dealloc_remote(r.first); - } - - // Our remote queues may be larger due to forwarding remote frees. - if (likely(remote_cache.capacity > 0)) - return; - - stats().remote_post(); - remote_cache.post(this, get_trunc_id()); - } - - /** - * Check if this allocator has messages to deallocate blocks from another - * thread - */ - SNMALLOC_FAST_PATH bool has_messages() - { - return !(message_queue().is_empty()); - } - - SNMALLOC_FAST_PATH void handle_message_queue() - { - // Inline the empty check, but not necessarily the full queue handling. - if (likely(!has_messages())) - return; - - handle_message_queue_inner(); - } - - CapPtr get_superslab() - { - auto super = super_available.get_head(); - - if (super != nullptr) - return super; - - super = large_allocator - .template alloc(0, SUPERSLAB_SIZE, SUPERSLAB_SIZE) - .template as_reinterpret(); - - if (super == nullptr) - return super; - - super->init(public_state()); - chunkmap().set_slab(super); - super_available.insert(super); - return super; - } - - void reposition_superslab(CapPtr super) - { - switch (super->get_status()) - { - case Superslab::Full: - { - // Remove from the list of superslabs that have available slabs. - super_available.remove(super); - break; - } - - case Superslab::Available: - { - // Do nothing. - break; - } - - case Superslab::OnlyShortSlabAvailable: - { - // Move from the general list to the short slab only list. - super_available.remove(super); - super_only_short_available.insert(super); - break; - } - - case Superslab::Empty: - { - // Can't be empty since we just allocated. - error("Unreachable"); - break; - } - } - } - - SNMALLOC_SLOW_PATH CapPtr alloc_slab(sizeclass_t sizeclass) - { - stats().sizeclass_alloc_slab(sizeclass); - if (Superslab::is_short_sizeclass(sizeclass)) - { - // Pull a short slab from the list of superslabs that have only the - // short slab available. - CapPtr super = super_only_short_available.pop(); - - if (super != nullptr) - { - auto slab = Superslab::alloc_short_slab(super, sizeclass); - SNMALLOC_ASSERT(super->is_full()); - return slab; - } - - super = get_superslab(); - - if (super == nullptr) - return nullptr; - - auto slab = Superslab::alloc_short_slab(super, sizeclass); - reposition_superslab(super); - return slab; - } - - auto super = get_superslab(); - - if (super == nullptr) - return nullptr; - - auto slab = Superslab::alloc_slab(super, sizeclass); - reposition_superslab(super); - return slab; - } - - template - SNMALLOC_FAST_PATH CapPtr small_alloc(size_t size) - { - SNMALLOC_ASSUME(size <= SLAB_SIZE); - sizeclass_t sizeclass = size_to_sizeclass(size); - return small_alloc_inner(sizeclass, size); - } - - template - SNMALLOC_FAST_PATH CapPtr - small_alloc_inner(sizeclass_t sizeclass, size_t size) - { - SNMALLOC_ASSUME(sizeclass < NUM_SMALL_CLASSES); - auto& fl = small_fast_free_lists[sizeclass]; - if (likely(!fl.empty())) - { - stats().alloc_request(size); - stats().sizeclass_alloc(sizeclass); - auto p = fl.take(entropy); - if constexpr (zero_mem == YesZero) - { - pal_zero( - p, sizeclass_to_size(sizeclass)); - } - - // TODO: Should this be zeroing the next pointer? - return capptr_export(p.as_void()); - } - - if (likely(!has_messages())) - return small_alloc_next_free_list(sizeclass, size); - - return small_alloc_mq_slow(sizeclass, size); - } - - /** - * Slow path for handling message queue, before dealing with small - * allocation request. - */ - template - SNMALLOC_SLOW_PATH CapPtr - small_alloc_mq_slow(sizeclass_t sizeclass, size_t size) - { - handle_message_queue_inner(); - - return small_alloc_next_free_list(sizeclass, size); - } - - /** - * Attempt to find a new free list to allocate from - */ - template - SNMALLOC_SLOW_PATH CapPtr - small_alloc_next_free_list(sizeclass_t sizeclass, size_t size) - { - size_t rsize = sizeclass_to_size(sizeclass); - auto& sl = small_classes[sizeclass]; - - if (likely(!sl.is_empty())) - { - stats().alloc_request(size); - stats().sizeclass_alloc(sizeclass); - - auto meta = sl.get_next().template as_static(); - auto& ffl = small_fast_free_lists[sizeclass]; - return Metaslab::alloc( - meta, ffl, rsize, entropy); - } - return small_alloc_rare(sizeclass, size); - } - - /** - * Called when there are no available free list to service this request - * Could be due to using the dummy allocator, or needing to bump allocate a - * new free list. - */ - template - SNMALLOC_SLOW_PATH CapPtr - small_alloc_rare(sizeclass_t sizeclass, size_t size) - { - if (likely(!NeedsInitialisation(this))) - { - stats().alloc_request(size); - stats().sizeclass_alloc(sizeclass); - return small_alloc_new_free_list(sizeclass); - } - return small_alloc_first_alloc(sizeclass, size); - } - - /** - * Called on first allocation to set up the thread local allocator, - * then directs the allocation request to the newly created allocator. - */ - template - SNMALLOC_SLOW_PATH CapPtr - small_alloc_first_alloc(sizeclass_t sizeclass, size_t size) - { - /* - * We have to convert through void* as part of the thread allocator - * initializer API. Be a little more verbose than strictly necessary to - * demonstrate that small_alloc_inner is giving us a CBAllocE-annotated - * pointer before we just go slapping that label on a void* later. - */ - void* ret = InitThreadAllocator([sizeclass, size](void* alloc) { - CapPtr ret = - reinterpret_cast(alloc) - ->template small_alloc_inner(sizeclass, size); - return ret.unsafe_capptr; - }); - return CapPtr(ret); - } - - /** - * Called to create a new free list, and service the request from that new - * list. - */ - template - SNMALLOC_FAST_PATH CapPtr - small_alloc_new_free_list(sizeclass_t sizeclass) - { - auto& bp = bump_ptrs[sizeclass]; - if (likely(pointer_align_up(bp, SLAB_SIZE) != bp)) - { - return small_alloc_build_free_list(sizeclass); - } - // Fetch new slab - return small_alloc_new_slab(sizeclass); - } - - /** - * Creates a new free list from the thread local bump allocator and service - * the request from that new list. - */ - template - SNMALLOC_FAST_PATH CapPtr - small_alloc_build_free_list(sizeclass_t sizeclass) - { - auto& bp = bump_ptrs[sizeclass]; - auto rsize = sizeclass_to_size(sizeclass); - auto& ffl = small_fast_free_lists[sizeclass]; - SNMALLOC_ASSERT(ffl.empty()); - Slab::alloc_new_list(bp, ffl, rsize, entropy); - - auto p = ffl.take(entropy); - - if constexpr (zero_mem == YesZero) - { - pal_zero(p, sizeclass_to_size(sizeclass)); - } - - // TODO: Should this be zeroing the next pointer? - return capptr_export(p.as_void()); - } - - /** - * Allocates a new slab to allocate from, set it to be the bump allocator - * for this size class, and then builds a new free list from the thread - * local bump allocator and service the request from that new list. - */ - template - SNMALLOC_SLOW_PATH CapPtr - small_alloc_new_slab(sizeclass_t sizeclass) - { - auto& bp = bump_ptrs[sizeclass]; - // Fetch new slab - auto slab = alloc_slab(sizeclass); - if (slab == nullptr) - return nullptr; - bp = pointer_offset( - slab, get_initial_offset(sizeclass, Metaslab::is_short(slab))); - - return small_alloc_build_free_list(sizeclass); - } - - SNMALLOC_FAST_PATH void small_dealloc_unchecked( - CapPtr super, - CapPtr p_auth, - CapPtr p_ret, - sizeclass_t sizeclass) - { - check_client( - chunkmap().get(address_cast(p_ret)) == CMSuperslab, - "Claimed small deallocation is not in a Superslab"); - - small_dealloc_checked_chunkmap(super, p_auth, p_ret, sizeclass); - } - - SNMALLOC_FAST_PATH void small_dealloc_checked_chunkmap( - CapPtr super, - CapPtr p_auth, - CapPtr p_ret, - sizeclass_t sizeclass) - { - auto slab = Metaslab::get_slab(p_auth); - check_client( - sizeclass == super->get_meta(slab)->sizeclass(), - "Claimed small deallocation with mismatching size class"); - - small_dealloc_checked_sizeclass(super, slab, p_auth, p_ret, sizeclass); - } - - SNMALLOC_FAST_PATH void small_dealloc_checked_sizeclass( - CapPtr super, - CapPtr slab, - CapPtr p_auth, - CapPtr p_ret, - sizeclass_t sizeclass) - { - check_client( - Slab::get_meta(slab)->is_start_of_object(address_cast(p_ret)), - "Not deallocating start of an object"); - - small_dealloc_start(super, slab, p_auth, p_ret, sizeclass); - } - - SNMALLOC_FAST_PATH void small_dealloc_start( - CapPtr super, - CapPtr slab, - CapPtr p_auth, - CapPtr p_ret, - sizeclass_t sizeclass) - { - // TODO: with SSM/MTE, guard against double-frees - UNUSED(p_ret); - - RemoteAllocator* target = super->get_allocator(); - - auto p = - Aal::capptr_bound(p_auth, sizeclass_to_size(sizeclass)); - - if (likely(target == public_state())) - { - small_dealloc_offseted(super, slab, p, sizeclass); - } - else - remote_dealloc(target, p, sizeclass); - } - - SNMALLOC_FAST_PATH void small_dealloc_offseted( - CapPtr super, - CapPtr slab, - CapPtr p, - sizeclass_t sizeclass) - { - stats().sizeclass_dealloc(sizeclass); - - small_dealloc_offseted_inner(super, slab, FreeObject::make(p), sizeclass); - } - - SNMALLOC_FAST_PATH void small_dealloc_offseted_inner( - CapPtr super, - CapPtr slab, - CapPtr p, - sizeclass_t sizeclass) - { - if (likely(Slab::dealloc_fast(slab, super, p, entropy))) - return; - - small_dealloc_offseted_slow(super, slab, p, sizeclass); - } - - SNMALLOC_SLOW_PATH void small_dealloc_offseted_slow( - CapPtr super, - CapPtr slab, - CapPtr p, - sizeclass_t sizeclass) - { - bool was_full = super->is_full(); - SlabList* sl = &small_classes[sizeclass]; - Superslab::Action a = Slab::dealloc_slow(slab, sl, super, p, entropy); - if (likely(a == Superslab::NoSlabReturn)) - return; - stats().sizeclass_dealloc_slab(sizeclass); - - if (a == Superslab::NoStatusChange) - return; - - auto super_slab = capptr_chunk_from_chunkd(super, SUPERSLAB_SIZE); - - switch (super->get_status()) - { - case Superslab::Full: - { - error("Unreachable"); - break; - } - - case Superslab::Available: - { - if (was_full) - { - super_available.insert(super_slab); - } - else - { - super_only_short_available.remove(super_slab); - super_available.insert(super_slab); - } - break; - } - - case Superslab::OnlyShortSlabAvailable: - { - super_only_short_available.insert(super_slab); - break; - } - - case Superslab::Empty: - { - super_available.remove(super_slab); - - chunkmap().clear_slab(super_slab); - large_allocator.dealloc( - super_slab.template as_reinterpret(), 0); - stats().superslab_push(); - break; - } - } - } - - template - CapPtr - medium_alloc(sizeclass_t sizeclass, size_t rsize, size_t size) - { - sizeclass_t medium_class = sizeclass - NUM_SMALL_CLASSES; - - auto sc = &medium_classes[medium_class]; - CapPtr slab = sc->get_head(); - CapPtr p; - - if (slab != nullptr) - { - p = Mediumslab::alloc( - slab, rsize); - - if (Mediumslab::full(slab)) - sc->pop(); - } - else - { - if (NeedsInitialisation(this)) - { - /* - * We have to convert through void* as part of the thread allocator - * initializer API. Be a little more verbose than strictly necessary - * to demonstrate that small_alloc_inner is giving us an annotated - * pointer before we just go slapping that label on a void* later. - */ - void* ret = - InitThreadAllocator([size, rsize, sizeclass](void* alloc) { - CapPtr ret = - reinterpret_cast(alloc)->medium_alloc( - sizeclass, rsize, size); - return ret.unsafe_capptr; - }); - return CapPtr(ret); - } - - auto newslab = - large_allocator - .template alloc(0, SUPERSLAB_SIZE, SUPERSLAB_SIZE) - .template as_reinterpret(); - - if (newslab == nullptr) - return nullptr; - - Mediumslab::init(newslab, public_state(), sizeclass, rsize); - chunkmap().set_slab(newslab); - - auto newslab_export = capptr_export(newslab); - - p = Mediumslab::alloc( - newslab_export, rsize); - - if (!Mediumslab::full(newslab)) - sc->insert(newslab_export); - } - - stats().alloc_request(size); - stats().sizeclass_alloc(sizeclass); - - return p; - } - - SNMALLOC_FAST_PATH - void medium_dealloc_unchecked( - CapPtr slab, - CapPtr p_auth, - CapPtr p_ret, - sizeclass_t sizeclass) - { - check_client( - chunkmap().get(address_cast(p_ret)) == CMMediumslab, - "Claimed medium deallocation is not in a Mediumslab"); - - medium_dealloc_checked_chunkmap(slab, p_auth, p_ret, sizeclass); - } - - SNMALLOC_FAST_PATH - void medium_dealloc_checked_chunkmap( - CapPtr slab, - CapPtr p_auth, - CapPtr p_ret, - sizeclass_t sizeclass) - { - check_client( - slab->get_sizeclass() == sizeclass, - "Claimed medium deallocation of the wrong sizeclass"); - - medium_dealloc_checked_sizeclass(slab, p_auth, p_ret, sizeclass); - } - - SNMALLOC_FAST_PATH - void medium_dealloc_checked_sizeclass( - CapPtr slab, - CapPtr p_auth, - CapPtr p_ret, - sizeclass_t sizeclass) - { - check_client( - is_multiple_of_sizeclass( - sizeclass, address_cast(slab) + SUPERSLAB_SIZE - address_cast(p_ret)), - "Not deallocating start of an object"); - - medium_dealloc_start(slab, p_auth, p_ret, sizeclass); - } - - SNMALLOC_FAST_PATH - void medium_dealloc_start( - CapPtr slab, - CapPtr p_auth, - CapPtr p_ret, - sizeclass_t sizeclass) - { - // TODO: with SSM/MTE, guard against double-frees - UNUSED(p_ret); - - RemoteAllocator* target = slab->get_allocator(); - - // TODO: This bound is perhaps superfluous in the local case, as - // mediumslabs store free objects by offset rather than pointer. - auto p = - Aal::capptr_bound(p_auth, sizeclass_to_size(sizeclass)); - - if (likely(target == public_state())) - medium_dealloc_local(slab, p, sizeclass); - else - { - remote_dealloc(target, p, sizeclass); - } - } - - SNMALLOC_FAST_PATH - void medium_dealloc_local( - CapPtr slab, - CapPtr p, - sizeclass_t sizeclass) - { - stats().sizeclass_dealloc(sizeclass); - bool was_full = Mediumslab::dealloc(slab, p); - - auto slab_bounded = capptr_chunk_from_chunkd(slab, SUPERSLAB_SIZE); - - if (Mediumslab::empty(slab)) - { - if (!was_full) - { - sizeclass_t medium_class = sizeclass - NUM_SMALL_CLASSES; - auto sc = &medium_classes[medium_class]; - /* - * This unsafety lets us avoid applying platform constraints to a - * pointer we are just about to drop on the floor; remove() uses its - * argument but does not persist it. - */ - sc->remove(CapPtr(slab_bounded.unsafe_capptr)); - } - - chunkmap().clear_slab(slab_bounded); - large_allocator.dealloc( - slab_bounded.template as_reinterpret(), 0); - stats().superslab_push(); - } - else if (was_full) - { - sizeclass_t medium_class = sizeclass - NUM_SMALL_CLASSES; - auto sc = &medium_classes[medium_class]; - sc->insert(capptr_export(slab_bounded)); - } - } - - template - CapPtr large_alloc(size_t size) - { - if (NeedsInitialisation(this)) - { - // MSVC-vs-CapPtr triggering; xref CapPtr's constructor - void* ret = InitThreadAllocator([size](void* alloc) { - CapPtr ret = - reinterpret_cast(alloc)->large_alloc(size); - return ret.unsafe_capptr; - }); - return CapPtr(ret); - } - - size_t size_bits = bits::next_pow2_bits(size); - size_t large_class = size_bits - SUPERSLAB_BITS; - SNMALLOC_ASSERT(large_class < NUM_LARGE_CLASSES); - - size_t rsize = bits::one_at_bit(SUPERSLAB_BITS) << large_class; - // For superslab size, we always commit the whole range. - if (large_class == 0) - size = rsize; - - CapPtr p = - large_allocator.template alloc(large_class, rsize, size); - if (likely(p != nullptr)) - { - chunkmap().set_large_size(p, size); - - stats().alloc_request(size); - stats().large_alloc(large_class); - } - return capptr_export(Aal::capptr_bound(p, rsize)); - } - - void large_dealloc_unchecked( - CapPtr p_auth, CapPtr p_ret, size_t size) - { - uint8_t claimed_chunkmap_slab_kind = - static_cast(bits::next_pow2_bits(size)); - - // This also catches some "not deallocating start of an object" cases: if - // we're so far from the start that our actual chunkmap slab kind is not a - // legitimate large class - check_client( - chunkmap().get(address_cast(p_ret)) == claimed_chunkmap_slab_kind, - "Claimed large deallocation with wrong size class"); - - // round up as we would if we had had to look up the chunkmap_slab_kind - size_t rsize = bits::one_at_bit(claimed_chunkmap_slab_kind); - - large_dealloc_checked_sizeclass( - p_auth, p_ret, rsize, claimed_chunkmap_slab_kind); - } - - void large_dealloc_checked_sizeclass( - CapPtr p_auth, - CapPtr p_ret, - size_t size, - uint8_t chunkmap_slab_kind) - { - check_client( - address_cast(Superslab::get(p_auth)) == address_cast(p_ret), - "Not deallocating start of an object"); - SNMALLOC_ASSERT(bits::one_at_bit(chunkmap_slab_kind) >= SUPERSLAB_SIZE); - - large_dealloc_start(p_auth, p_ret, size, chunkmap_slab_kind); - } - - void large_dealloc_start( - CapPtr p_auth, - CapPtr p_ret, - size_t size, - uint8_t chunkmap_slab_kind) - { - // TODO: with SSM/MTE, guard against double-frees - - if (NeedsInitialisation(this)) - { - InitThreadAllocator( - [p_auth, p_ret, size, chunkmap_slab_kind](void* alloc) { - reinterpret_cast(alloc)->large_dealloc_start( - p_auth, p_ret, size, chunkmap_slab_kind); - return nullptr; - }); - return; - } - - size_t large_class = chunkmap_slab_kind - SUPERSLAB_BITS; - auto slab = Aal::capptr_bound(p_auth, size); - - chunkmap().clear_large_size(slab, size); - - stats().large_dealloc(large_class); - - // Initialise in order to set the correct SlabKind. - slab->init(); - large_allocator.dealloc(slab, large_class); - } - - // This is still considered the fast path as all the complex code is tail - // called in its slow path. This leads to one fewer unconditional jump in - // Clang. - SNMALLOC_FAST_PATH - void remote_dealloc( - RemoteAllocator* target, CapPtr p, sizeclass_t sizeclass) - { - SNMALLOC_ASSERT(target->trunc_id() != get_trunc_id()); - - // Check whether this will overflow the cache first. If we are a fake - // allocator, then our cache will always be full and so we will never hit - // this path. - if (remote_cache.capacity > 0) - { - stats().remote_free(sizeclass); - remote_cache.dealloc(target->trunc_id(), p, sizeclass); - return; - } - - remote_dealloc_slow(target, p, sizeclass); - } - - SNMALLOC_SLOW_PATH void remote_dealloc_slow( - RemoteAllocator* target, - CapPtr p_auth, - sizeclass_t sizeclass) - { - SNMALLOC_ASSERT(target->trunc_id() != get_trunc_id()); - - // Now that we've established that we're in the slow path (if we're a - // real allocator, we will have to empty our cache now), check if we are - // a real allocator and construct one if we aren't. - if (NeedsInitialisation(this)) - { - InitThreadAllocator([target, p_auth, sizeclass](void* alloc) { - reinterpret_cast(alloc)->dealloc_not_large( - target, p_auth, sizeclass); - return nullptr; - }); - return; - } - - remote_dealloc_and_post(target, p_auth, sizeclass); - } - - SNMALLOC_SLOW_PATH void remote_dealloc_and_post( - RemoteAllocator* target, - CapPtr p_auth, - sizeclass_t sizeclass) - { - handle_message_queue(); - - stats().remote_free(sizeclass); - remote_cache.dealloc(target->trunc_id(), p_auth, sizeclass); - - stats().remote_post(); - remote_cache.post(this, get_trunc_id()); - } - - ChunkMap& chunkmap() - { - return chunk_map; - } - }; -} // namespace snmalloc diff --git a/src/mem/allocconfig.h b/src/mem/allocconfig.h deleted file mode 100644 index 2b25512fa..000000000 --- a/src/mem/allocconfig.h +++ /dev/null @@ -1,156 +0,0 @@ -#pragma once - -#include "../ds/bits.h" - -namespace snmalloc -{ -// The CHECK_CLIENT macro is used to turn on minimal checking of the client -// calling the API correctly. -#if !defined(NDEBUG) && !defined(CHECK_CLIENT) -# define CHECK_CLIENT -#endif - - SNMALLOC_FAST_PATH void check_client_impl(bool test, const char* const str) - { -#ifdef CHECK_CLIENT - if (unlikely(!test)) - error(str); -#else - UNUSED(test); - UNUSED(str); -#endif - } -#ifdef CHECK_CLIENT -# define check_client(test, str) check_client_impl(test, str) -#else -# define check_client(test, str) -#endif - - // 0 intermediate bits results in power of 2 small allocs. 1 intermediate - // bit gives additional sizeclasses at the midpoint between each power of 2. - // 2 intermediate bits gives 3 intermediate sizeclasses, etc. - static constexpr size_t INTERMEDIATE_BITS = -#ifdef USE_INTERMEDIATE_BITS - USE_INTERMEDIATE_BITS -#else - 2 -#endif - ; - - // Return remote small allocs when the local cache reaches this size. - static constexpr int64_t REMOTE_CACHE = -#ifdef USE_REMOTE_CACHE - USE_REMOTE_CACHE -#else - 1 << 20 -#endif - ; - - // Handle at most this many object from the remote dealloc queue at a time. - static constexpr size_t REMOTE_BATCH = -#ifdef USE_REMOTE_BATCH - REMOTE_BATCH -#else - 4096 -#endif - ; - - // Specifies smaller slab and super slab sizes for address space - // constrained scenarios. - static constexpr size_t USE_LARGE_CHUNKS = -#ifdef SNMALLOC_USE_LARGE_CHUNKS - // In 32 bit uses smaller superslab. - (bits::is64()) -#else - false -#endif - ; - - // Specifies even smaller slab and super slab sizes for open enclave. - static constexpr size_t USE_SMALL_CHUNKS = -#ifdef SNMALLOC_USE_SMALL_CHUNKS - true -#else - false -#endif - ; - - enum DecommitStrategy - { - /** - * Never decommit memory. - */ - DecommitNone, - /** - * Decommit superslabs when they are entirely empty. - */ - DecommitSuper, - /** - * Decommit superslabs only when we are informed of memory pressure by the - * OS, do not decommit anything in normal operation. - */ - DecommitSuperLazy - }; - - static constexpr DecommitStrategy decommit_strategy = -#ifdef USE_DECOMMIT_STRATEGY - USE_DECOMMIT_STRATEGY -#elif defined(_WIN32) && !defined(OPEN_ENCLAVE) - DecommitSuperLazy -#else - DecommitSuper -#endif - ; - - // The remaining values are derived, not configurable. - static constexpr size_t POINTER_BITS = - bits::next_pow2_bits_const(sizeof(uintptr_t)); - - // Used to isolate values on cache lines to prevent false sharing. - static constexpr size_t CACHELINE_SIZE = 64; - - static constexpr size_t PAGE_ALIGNED_SIZE = OS_PAGE_SIZE << INTERMEDIATE_BITS; - - // Minimum allocation size is space for two pointers. - static_assert(bits::next_pow2_const(sizeof(void*)) == sizeof(void*)); - static constexpr size_t MIN_ALLOC_SIZE = 2 * sizeof(void*); - static constexpr size_t MIN_ALLOC_BITS = bits::ctz_const(MIN_ALLOC_SIZE); - - // Slabs are 64 KiB unless constrained to 16 or even 8 KiB - static constexpr size_t SLAB_BITS = - USE_SMALL_CHUNKS ? 13 : (USE_LARGE_CHUNKS ? 16 : 14); - static constexpr size_t SLAB_SIZE = 1 << SLAB_BITS; - static constexpr size_t SLAB_MASK = ~(SLAB_SIZE - 1); - - // Superslabs are composed of this many slabs. Slab offsets are encoded as - // a byte, so the maximum count is 256. This must be a power of two to - // allow fast masking to find a superslab start address. - static constexpr size_t SLAB_COUNT_BITS = - USE_SMALL_CHUNKS ? 5 : (USE_LARGE_CHUNKS ? 8 : 6); - static constexpr size_t SLAB_COUNT = 1 << SLAB_COUNT_BITS; - static constexpr size_t SUPERSLAB_SIZE = SLAB_SIZE * SLAB_COUNT; - static constexpr size_t SUPERSLAB_MASK = ~(SUPERSLAB_SIZE - 1); - static constexpr size_t SUPERSLAB_BITS = SLAB_BITS + SLAB_COUNT_BITS; - - static_assert((1ULL << SUPERSLAB_BITS) == SUPERSLAB_SIZE, "Sanity check"); - - // Number of slots for remote deallocation. - static constexpr size_t REMOTE_SLOT_BITS = 6; - static constexpr size_t REMOTE_SLOTS = 1 << REMOTE_SLOT_BITS; - static constexpr size_t REMOTE_MASK = REMOTE_SLOTS - 1; - - static_assert( - INTERMEDIATE_BITS < MIN_ALLOC_BITS, - "INTERMEDIATE_BITS must be less than MIN_ALLOC_BITS"); - static_assert( - MIN_ALLOC_SIZE >= (sizeof(void*) * 2), - "MIN_ALLOC_SIZE must be sufficient for two pointers"); - static_assert( - SLAB_BITS <= (sizeof(uint16_t) * 8), - "SLAB_BITS must not be more than the bits in a uint16_t"); - static_assert( - SLAB_COUNT == bits::next_pow2_const(SLAB_COUNT), - "SLAB_COUNT must be a power of 2"); - static_assert( - SLAB_COUNT <= (UINT8_MAX + 1), "SLAB_COUNT must fit in a uint8_t"); -} // namespace snmalloc diff --git a/src/mem/allocslab.h b/src/mem/allocslab.h deleted file mode 100644 index 579da85d0..000000000 --- a/src/mem/allocslab.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include "baseslab.h" - -namespace snmalloc -{ - struct RemoteAllocator; - - class Allocslab : public Baseslab - { - protected: - RemoteAllocator* allocator; - - public: - RemoteAllocator* get_allocator() - { - return allocator; - } - }; -} // namespace snmalloc diff --git a/src/mem/allocstats.h b/src/mem/allocstats.h deleted file mode 100644 index 488493f9b..000000000 --- a/src/mem/allocstats.h +++ /dev/null @@ -1,417 +0,0 @@ -#pragma once - -#include "../ds/bits.h" -#include "../mem/sizeclass.h" - -#include - -#ifdef USE_SNMALLOC_STATS -# include "../ds/csv.h" -# include "sizeclass.h" - -# include -# include -#endif - -namespace snmalloc -{ - template - struct AllocStats - { - struct CurrentMaxPair - { - size_t current = 0; - size_t max = 0; - size_t used = 0; - - void inc() - { - current++; - used++; - if (current > max) - max++; - } - - void dec() - { - SNMALLOC_ASSERT(current > 0); - current--; - } - - bool is_empty() - { - return current == 0; - } - - bool is_unused() - { - return max == 0; - } - - void add(CurrentMaxPair& that) - { - current += that.current; - max += that.max; - used += that.used; - } -#ifdef USE_SNMALLOC_STATS - void print(CSVStream& csv, size_t multiplier = 1) - { - csv << current * multiplier << max * multiplier << used * multiplier; - } -#endif - }; - - struct Stats - { - CurrentMaxPair count; - CurrentMaxPair slab_count; - uint64_t time = Aal::tick(); - uint64_t ticks = 0; - double online_average = 0; - - bool is_empty() - { - return count.is_empty(); - } - - void add(Stats& that) - { - count.add(that.count); - slab_count.add(that.slab_count); - } - - void addToRunningAverage() - { - uint64_t now = Aal::tick(); - - if (slab_count.current != 0) - { - double occupancy = static_cast(count.current) / - static_cast(slab_count.current); - uint64_t duration = now - time; - - if (ticks == 0) - online_average = occupancy; - else - online_average += - ((occupancy - online_average) * static_cast(duration)) / - static_cast(ticks); - - ticks += duration; - } - - time = now; - } - -#ifdef USE_SNMALLOC_STATS - void - print(CSVStream& csv, size_t multiplier = 1, size_t slab_multiplier = 1) - { - // Keep in sync with header lower down - count.print(csv, multiplier); - slab_count.print(csv, slab_multiplier); - size_t average = - static_cast(online_average * static_cast(multiplier)); - - csv << average << (slab_multiplier - average) * slab_count.max - << csv.endl; - } -#endif - }; - -#ifdef USE_SNMALLOC_STATS - static constexpr size_t BUCKETS_BITS = 4; - static constexpr size_t BUCKETS = 1 << BUCKETS_BITS; - static constexpr size_t TOTAL_BUCKETS = - bits::to_exp_mant_const( - bits::one_at_bit(bits::ADDRESS_BITS - 1)); - - Stats sizeclass[N]; - - size_t large_pop_count[LARGE_N] = {0}; - size_t large_push_count[LARGE_N] = {0}; - - size_t remote_freed = 0; - size_t remote_posted = 0; - size_t remote_received = 0; - size_t superslab_push_count = 0; - size_t superslab_pop_count = 0; - size_t superslab_fresh_count = 0; - size_t segment_count = 0; - size_t bucketed_requests[TOTAL_BUCKETS] = {}; -#endif - - void alloc_request(size_t size) - { - UNUSED(size); - -#ifdef USE_SNMALLOC_STATS - auto index = (size == 0) ? 0 : bits::to_exp_mant(size); - SNMALLOC_ASSERT(index < TOTAL_BUCKETS); - bucketed_requests[index]++; -#endif - } - - bool is_empty() - { -#ifdef USE_SNMALLOC_STATS - for (size_t i = 0; i < N; i++) - { - if (!sizeclass[i].is_empty()) - return false; - } - - for (size_t i = 0; i < LARGE_N; i++) - { - if (large_push_count[i] != large_pop_count[i]) - return false; - } - - return (remote_freed == remote_posted); -#else - return true; -#endif - } - - void sizeclass_alloc(sizeclass_t sc) - { - UNUSED(sc); - -#ifdef USE_SNMALLOC_STATS - sizeclass[sc].addToRunningAverage(); - sizeclass[sc].count.inc(); -#endif - } - - void sizeclass_dealloc(sizeclass_t sc) - { - UNUSED(sc); - -#ifdef USE_SNMALLOC_STATS - sizeclass[sc].addToRunningAverage(); - sizeclass[sc].count.dec(); -#endif - } - - void large_alloc(size_t sc) - { - UNUSED(sc); - -#ifdef USE_SNMALLOC_STATS - SNMALLOC_ASSUME(sc < LARGE_N); - large_pop_count[sc]++; -#endif - } - - void sizeclass_alloc_slab(sizeclass_t sc) - { - UNUSED(sc); - -#ifdef USE_SNMALLOC_STATS - sizeclass[sc].addToRunningAverage(); - sizeclass[sc].slab_count.inc(); -#endif - } - - void sizeclass_dealloc_slab(sizeclass_t sc) - { - UNUSED(sc); - -#ifdef USE_SNMALLOC_STATS - sizeclass[sc].addToRunningAverage(); - sizeclass[sc].slab_count.dec(); -#endif - } - - void large_dealloc(size_t sc) - { - UNUSED(sc); - -#ifdef USE_SNMALLOC_STATS - large_push_count[sc]++; -#endif - } - - void segment_create() - { -#ifdef USE_SNMALLOC_STATS - segment_count++; -#endif - } - - void superslab_pop() - { -#ifdef USE_SNMALLOC_STATS - superslab_pop_count++; -#endif - } - - void superslab_push() - { -#ifdef USE_SNMALLOC_STATS - superslab_push_count++; -#endif - } - - void superslab_fresh() - { -#ifdef USE_SNMALLOC_STATS - superslab_fresh_count++; -#endif - } - - void remote_free(sizeclass_t sc) - { - UNUSED(sc); - -#ifdef USE_SNMALLOC_STATS - remote_freed += sizeclass_to_size(sc); -#endif - } - - void remote_post() - { -#ifdef USE_SNMALLOC_STATS - remote_posted = remote_freed; -#endif - } - - void remote_receive(sizeclass_t sc) - { - UNUSED(sc); - -#ifdef USE_SNMALLOC_STATS - remote_received += sizeclass_to_size(sc); -#endif - } - - void add(AllocStats& that) - { - UNUSED(that); - -#ifdef USE_SNMALLOC_STATS - for (size_t i = 0; i < N; i++) - sizeclass[i].add(that.sizeclass[i]); - - for (size_t i = 0; i < LARGE_N; i++) - { - large_push_count[i] += that.large_push_count[i]; - large_pop_count[i] += that.large_pop_count[i]; - } - - for (size_t i = 0; i < TOTAL_BUCKETS; i++) - bucketed_requests[i] += that.bucketed_requests[i]; - - remote_freed += that.remote_freed; - remote_posted += that.remote_posted; - remote_received += that.remote_received; - superslab_pop_count += that.superslab_pop_count; - superslab_push_count += that.superslab_push_count; - superslab_fresh_count += that.superslab_fresh_count; - segment_count += that.segment_count; -#endif - } - -#ifdef USE_SNMALLOC_STATS - template - void print(std::ostream& o, uint64_t dumpid = 0, uint64_t allocatorid = 0) - { - UNUSED(o); - UNUSED(dumpid); - UNUSED(allocatorid); - - CSVStream csv(&o); - - if (dumpid == 0) - { - // Output headers for initial dump - // Keep in sync with data dump - csv << "GlobalStats" - << "DumpID" - << "AllocatorID" - << "Remote freed" - << "Remote posted" - << "Remote received" - << "Superslab pop" - << "Superslab push" - << "Superslab fresh" - << "Segments" << csv.endl; - - csv << "BucketedStats" - << "DumpID" - << "AllocatorID" - << "Size group" - << "Size" - << "Current count" - << "Max count" - << "Total Allocs" - << "Current Slab bytes" - << "Max Slab bytes" - << "Total slab allocs" - << "Average Slab Usage" - << "Average wasted space" << csv.endl; - - csv << "LargeBucketedStats" - << "DumpID" - << "AllocatorID" - << "Size group" - << "Size" - << "Push count" - << "Pop count" << csv.endl; - - csv << "AllocSizes" - << "DumpID" - << "AllocatorID" - << "ClassID" - << "Low size" - << "High size" - << "Count" << csv.endl; - } - - for (sizeclass_t i = 0; i < N; i++) - { - if (sizeclass[i].count.is_unused()) - continue; - - sizeclass[i].addToRunningAverage(); - - csv << "BucketedStats" << dumpid << allocatorid << i - << sizeclass_to_size(i); - - sizeclass[i].print(csv, sizeclass_to_size(i)); - } - - for (uint8_t i = 0; i < LARGE_N; i++) - { - if ((large_push_count[i] == 0) && (large_pop_count[i] == 0)) - continue; - - csv << "LargeBucketedStats" << dumpid << allocatorid << (i + N) - << large_sizeclass_to_size(i) << large_push_count[i] - << large_pop_count[i] << csv.endl; - } - - size_t low = 0; - size_t high = 0; - - for (size_t i = 0; i < TOTAL_BUCKETS; i++) - { - low = high + 1; - high = bits::from_exp_mant(i); - - if (bucketed_requests[i] == 0) - continue; - - csv << "AllocSizes" << dumpid << allocatorid << i << low << high - << bucketed_requests[i] << csv.endl; - } - - csv << "GlobalStats" << dumpid << allocatorid << remote_freed - << remote_posted << remote_received << superslab_pop_count - << superslab_push_count << superslab_fresh_count << segment_count - << csv.endl; - } -#endif - }; -} // namespace snmalloc diff --git a/src/mem/arenamap.h b/src/mem/arenamap.h deleted file mode 100644 index bf12816ad..000000000 --- a/src/mem/arenamap.h +++ /dev/null @@ -1,130 +0,0 @@ -#include "../ds/ptrwrap.h" -#include "pagemap.h" - -namespace snmalloc -{ - struct default_alloc_size_t - { - /* - * Just make something up for non-StrictProvenance architectures. - * Ultimately, this is going to flow only to FlatPagemap's template argument - * for the number of bits it's covering but the whole thing will be - * discarded by the time we resolve all the conditionals behind the - * AuthPagemap type. To avoid pathologies where COVERED_BITS ends up being - * bit-width of the machine (meaning 1ULL << COVERED_BITS becomes undefined) - * and where sizeof(std::atomic[ENTRIES]) is either undefined or - * enormous, we choose a value that dodges both endpoints and still results - * in a small table. - */ - static constexpr size_t capptr_root_alloc_size = - bits::one_at_bit(bits::ADDRESS_BITS - 8); - }; - - /* - * Compute the block allocation size to use for AlignedAllocations. This - * is either PAL::capptr_root_alloc_size, on architectures that require - * StrictProvenance, or the placeholder from above. - */ - template - static constexpr size_t AUTHMAP_ALLOC_SIZE = std::conditional_t< - aal_supports, - PAL, - default_alloc_size_t>::capptr_root_alloc_size; - - template - static constexpr size_t - AUTHMAP_BITS = bits::next_pow2_bits_const(AUTHMAP_ALLOC_SIZE); - - template - static constexpr bool - AUTHMAP_USE_FLATPAGEMAP = pal_supports || - (PAGEMAP_NODE_SIZE >= sizeof(FlatPagemap, void*>)); - - struct default_auth_pagemap - { - static SNMALLOC_FAST_PATH void* get(address_t a) - { - UNUSED(a); - return nullptr; - } - }; - - template - using AuthPagemap = std::conditional_t< - aal_supports, - std::conditional_t< - AUTHMAP_USE_FLATPAGEMAP, - FlatPagemap, void*>, - Pagemap, void*, nullptr, PrimAlloc>>, - default_auth_pagemap>; - - struct ForAuthmap - {}; - template - using GlobalAuthmap = - GlobalPagemapTemplate, ForAuthmap>; - - template - struct DefaultArenaMapTemplate - { - /* - * Without AlignedAllocation, we (below) adopt a fallback mechanism that - * over-allocates and then finds an aligned region within the too-large - * region. The "trimmings" from either side are also registered in hopes - * that they can be used for later allocations. - * - * Unfortunately, that strategy does not work for this ArenaMap: trimmings - * may be smaller than the granularity of our backing PageMap, and so we - * would be unable to amplify authority. Eventually we may arrive at a need - * for an ArenaMap that is compatible with this approach, but for the moment - * it's far simpler to assume that we can always ask for memory sufficiently - * aligned to cover an entire PageMap granule. - */ - static_assert( - !aal_supports || pal_supports, - "StrictProvenance requires platform support for aligned allocation"); - - static constexpr size_t alloc_size = AUTHMAP_ALLOC_SIZE; - - /* - * Because we assume that we can `capptr_amplify` and then - * `Superslab::get()` on the result to get to the Superslab metadata - * headers, it must be the case that provenance roots cover entire - * Superslabs. - */ - static_assert( - !aal_supports || - ((alloc_size > 0) && (alloc_size % SUPERSLAB_SIZE == 0)), - "Provenance root granule must encompass whole superslabs"); - - static void register_root(CapPtr root) - { - if constexpr (aal_supports) - { - PagemapProvider::pagemap().set(address_cast(root), root.unsafe_capptr); - } - else - { - UNUSED(root); - } - } - - template - static SNMALLOC_FAST_PATH CapPtr capptr_amplify(CapPtr r) - { - static_assert( - B == CBAllocE || B == CBAlloc, - "Attempting to amplify an unexpectedly high pointer"); - return Aal::capptr_rebound( - CapPtr( - PagemapProvider::pagemap().get(address_cast(r))), - r) - .template as_static(); - } - }; - - template - using DefaultArenaMap = - DefaultArenaMapTemplate>; - -} // namespace snmalloc diff --git a/src/mem/baseslab.h b/src/mem/baseslab.h deleted file mode 100644 index 9ca661c0b..000000000 --- a/src/mem/baseslab.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include "../ds/mpmcstack.h" -#include "allocconfig.h" - -namespace snmalloc -{ - enum SlabKind - { - Fresh = 0, - Large, - Medium, - Super, - /** - * If the decommit policy is lazy, slabs are moved to this state when all - * pages other than the first one have been decommitted. - */ - Decommitted - }; - - class Baseslab - { - protected: - SlabKind kind; - - public: - SlabKind get_kind() - { - return kind; - } - }; -} // namespace snmalloc diff --git a/src/mem/chunkmap.h b/src/mem/chunkmap.h deleted file mode 100644 index 90175583c..000000000 --- a/src/mem/chunkmap.h +++ /dev/null @@ -1,195 +0,0 @@ -#pragma once - -using namespace std; - -#include "../ds/address.h" -#include "largealloc.h" -#include "mediumslab.h" -#include "pagemap.h" -#include "slab.h" - -namespace snmalloc -{ - enum ChunkMapSuperslabKind : uint8_t - { - CMNotOurs = 0, - CMSuperslab = 1, - CMMediumslab = 2, - - /* - * Values 3 (inclusive) through SUPERSLAB_BITS (exclusive) are as yet - * unused. - * - * Values SUPERSLAB_BITS (inclusive) through 64 (exclusive, as it would - * represent the entire address space) are used for log2(size) at the - * heads of large allocations. See SuperslabMap::set_large_size. - */ - CMLargeMin = SUPERSLAB_BITS, - CMLargeMax = 63, - - /* - * Values 64 (inclusive) through 64 + SUPERSLAB_BITS (exclusive) are unused - */ - - /* - * Values 64 + SUPERSLAB_BITS (inclusive) through 128 (exclusive) are used - * for entries within a large allocation. A value of x at pagemap entry p - * indicates that there are at least 2^(x-64) (inclusive) and at most - * 2^(x+1-64) (exclusive) page map entries between p and the start of the - * allocation. See ChunkMap::set_large_size and external_address's - * handling of large reallocation redirections. - */ - CMLargeRangeMin = 64 + SUPERSLAB_BITS, - CMLargeRangeMax = 127, - - /* - * Values 128 (inclusive) through 255 (inclusive) are as yet unused. - */ - - }; - - /* - * Ensure that ChunkMapSuperslabKind values are actually disjoint, i.e., - * that large allocations don't land on CMMediumslab. - */ - static_assert( - SUPERSLAB_BITS > CMMediumslab, "Large allocations may be too small"); - -#ifndef SNMALLOC_MAX_FLATPAGEMAP_SIZE -/* - * Unless otherwise specified, use a flat pagemap for the chunkmap (1 byte per - * Superslab-sized and -aligned region of the address space) if either of the - * following hold: - * - * - the platform supports LazyCommit and the flat structure would occupy 256 - * MiB or less. 256 MiB is more than adequate for 32-bit architectures and - * is the size of the flat pagemap for a 48-bit AS with the default chunk - * size or the USE_LARGE_CHUNKS chunksize (that is, configurations other - * than USE_SMALL_CHUNKS). - * - * - the platform does not support LazyCommit but the flat structure would - * occupy less than PAGEMAP_NODE_SIZE (i.e., the backing store for an - * internal tree node in the non-flat pagemap). - */ -# define SNMALLOC_MAX_FLATPAGEMAP_SIZE \ - (pal_supports ? 256ULL * 1024 * 1024 : PAGEMAP_NODE_SIZE) -#endif - static constexpr bool CHUNKMAP_USE_FLATPAGEMAP = - SNMALLOC_MAX_FLATPAGEMAP_SIZE >= - sizeof(FlatPagemap); - - using ChunkmapPagemap = std::conditional_t< - CHUNKMAP_USE_FLATPAGEMAP, - FlatPagemap, - Pagemap>; - - struct ForChunkmap - {}; - using GlobalChunkmap = GlobalPagemapTemplate; - - /** - * Optionally exported function that accesses the global chunkmap pagemap - * provided by a shared library. - */ - extern "C" void* - snmalloc_chunkmap_global_get(snmalloc::PagemapConfig const**); - - /** - * Class that defines an interface to the pagemap. This is provided to - * `Allocator` as a template argument and so can be replaced by a compatible - * implementation (for example, to move pagemap updates to a different - * protection domain). - */ - template - struct DefaultChunkMap - { - /** - * Get the pagemap entry corresponding to a specific address. - * - * Despite the type, the return value is an enum ChunkMapSuperslabKind - * or one of the reserved values described therewith. - */ - static uint8_t get(address_t p) - { - return PagemapProvider::pagemap().get(p); - } - - /** - * Set a pagemap entry indicating that there is a superslab at the - * specified index. - */ - static void set_slab(CapPtr slab) - { - set(address_cast(slab), static_cast(CMSuperslab)); - } - /** - * Add a pagemap entry indicating that a medium slab has been allocated. - */ - static void set_slab(CapPtr slab) - { - set(address_cast(slab), static_cast(CMMediumslab)); - } - /** - * Remove an entry from the pagemap corresponding to a superslab. - */ - static void clear_slab(CapPtr slab) - { - SNMALLOC_ASSERT(get(address_cast(slab)) == CMSuperslab); - set(address_cast(slab), static_cast(CMNotOurs)); - } - /** - * Remove an entry corresponding to a medium slab. - */ - static void clear_slab(CapPtr slab) - { - SNMALLOC_ASSERT(get(address_cast(slab)) == CMMediumslab); - set(address_cast(slab), static_cast(CMNotOurs)); - } - /** - * Update the pagemap to reflect a large allocation, of `size` bytes from - * address `p`. - */ - static void set_large_size(CapPtr p, size_t size) - { - size_t size_bits = bits::next_pow2_bits(size); - set(address_cast(p), static_cast(size_bits)); - // Set redirect slide - auto ss = address_cast(p) + SUPERSLAB_SIZE; - for (size_t i = 0; i < size_bits - SUPERSLAB_BITS; i++) - { - size_t run = bits::one_at_bit(i); - PagemapProvider::pagemap().set_range( - ss, static_cast(CMLargeRangeMin + i), run); - ss = ss + SUPERSLAB_SIZE * run; - } - } - /** - * Update the pagemap to remove a large allocation, of `size` bytes from - * address `p`. - */ - static void clear_large_size(CapPtr vp, size_t size) - { - auto p = address_cast(vp); - size_t rounded_size = bits::next_pow2(size); - SNMALLOC_ASSERT(get(p) == bits::next_pow2_bits(size)); - auto count = rounded_size >> SUPERSLAB_BITS; - PagemapProvider::pagemap().set_range(p, CMNotOurs, count); - } - - private: - /** - * Helper function to set a pagemap entry. This is not part of the public - * interface and exists to make it easy to reuse the code in the public - * methods in other pagemap adaptors. - */ - static void set(address_t p, uint8_t x) - { - PagemapProvider::pagemap().set(p, x); - } - }; - -#ifndef SNMALLOC_DEFAULT_CHUNKMAP -# define SNMALLOC_DEFAULT_CHUNKMAP snmalloc::DefaultChunkMap<> -#endif - -} // namespace snmalloc diff --git a/src/mem/freelist.h b/src/mem/freelist.h deleted file mode 100644 index c8766c8cd..000000000 --- a/src/mem/freelist.h +++ /dev/null @@ -1,504 +0,0 @@ -#pragma once -/** - * This file encapsulates the in disused object free lists - * that are used per slab of small objects. - */ - -#include "../ds/address.h" -#include "../ds/cdllist.h" -#include "../ds/dllist.h" -#include "../ds/helpers.h" -#include "allocconfig.h" -#include "entropy.h" - -#include - -namespace snmalloc -{ -#ifdef CHECK_CLIENT - static constexpr std::size_t PRESERVE_BOTTOM_BITS = 16; -#endif - - /** - * Used to turn a location into a key. This is currently - * just the slab address truncated to 16bits and offset by 1. - */ - template - inline static address_t initial_key(CapPtr slab) - { -#ifdef CHECK_CLIENT - /** - * This file assumes that SLAB_BITS is smaller than 16. In multiple - * places it uses uint16_t to represent the offset into a slab. - */ - static_assert( - SLAB_BITS <= 16, - "Encoding requires slab offset representable in 16bits."); - - return (address_cast(slab) & SLAB_MASK) + 1; -#else - UNUSED(slab); - return 0; -#endif - } - - static inline bool different_slab(address_t p1, address_t p2) - { - return ((p1 ^ p2) >= SLAB_SIZE); - } - - template - static inline bool different_slab(address_t p1, CapPtr p2) - { - return different_slab(p1, address_cast(p2)); - } - - template - static inline bool - different_slab(CapPtr p1, CapPtr p2) - { - return different_slab(address_cast(p1), address_cast(p2)); - } - - class FreeObject; - - class EncodeFreeObjectReference - { - CapPtr reference; - - /** - * On architectures which use IntegerPointers, we can obfuscate our free - * lists and use this to drive some probabilistic checks for integrity. - * - * There are two definitions of encode() below, which use std::enable_if_t - * to gate on do_encode. - */ -#ifndef CHECK_CLIENT - static constexpr bool do_encode = false; -#else - static constexpr bool do_encode = aal_supports; -#endif - - public: -#ifdef CHECK_CLIENT - template - static std::enable_if_t> encode( - uint16_t local_key, CapPtr next_object, LocalEntropy& entropy) - { - // Simple involutional encoding. The bottom half of each word is - // multiplied by a function of both global and local keys (the latter, - // in practice, being the address of the previous list entry) and the - // resulting word's top part is XORed into the pointer value before it - // is stored. - auto next = address_cast(next_object); - constexpr address_t MASK = bits::one_at_bit(PRESERVE_BOTTOM_BITS) - 1; - // Mix in local_key - address_t key = (local_key + 1) * entropy.get_constant_key(); - next ^= (((next & MASK) + 1) * key) & - ~(bits::one_at_bit(PRESERVE_BOTTOM_BITS) - 1); - return CapPtr(reinterpret_cast(next)); - } -#endif - - template - static std::enable_if_t> encode( - uint16_t local_key, CapPtr next_object, LocalEntropy& entropy) - { - UNUSED(local_key); - UNUSED(entropy); - return next_object; - } - - void store( - CapPtr value, - uint16_t local_key, - LocalEntropy& entropy) - { - reference = encode(local_key, value, entropy); - } - - CapPtr read(uint16_t local_key, LocalEntropy& entropy) - { - return encode(local_key, reference, entropy); - } - }; - - struct Remote; - /** - * Free objects within each slab point directly to the next. - * The next_object pointer can be encoded to detect - * corruption caused by writes in a UAF or a double free. - */ - class FreeObject - { - public: - EncodeFreeObjectReference next_object; - - static CapPtr make(CapPtr p) - { - return p.template as_static(); - } - - /** - * Construct a FreeObject for local slabs from a Remote message. - */ - static CapPtr make(CapPtr p) - { - // TODO: Zero the difference between a FreeObject and a Remote - return p.template as_reinterpret(); - } - - /** - * Read the next pointer handling any required decoding of the pointer - */ - CapPtr read_next(uint16_t key, LocalEntropy& entropy) - { - return next_object.read(key, entropy); - } - }; - - /** - * Used to iterate a free list in object space. - * - * Checks signing of pointers - */ - class FreeListIter - { - CapPtr curr = nullptr; -#ifdef CHECK_CLIENT - address_t prev = 0; -#endif - - uint16_t get_prev() - { -#ifdef CHECK_CLIENT - return prev & 0xffff; -#else - return 0; -#endif - } - - /** - * Updates the cursor to the new value, - * importantly this updates the key being used. - * Currently this is just the value of current before this call. - * Other schemes could be used. - */ - void update_cursor(CapPtr next) - { -#ifdef CHECK_CLIENT -# ifndef NDEBUG - if (next != nullptr) - { - check_client( - !different_slab(prev, next), - "Heap corruption - free list corrupted!"); - } -# endif - prev = address_cast(curr); -#endif - curr = next; - } - - public: - FreeListIter(CapPtr head) - : curr(head) -#ifdef CHECK_CLIENT - , - prev(initial_key(head)) -#endif - { - SNMALLOC_ASSERT(head != nullptr); - } - - FreeListIter() = default; - - /** - * Checks if there are any more values to iterate. - */ - bool empty() - { - return curr == nullptr; - } - - /** - * Returns current head without affecting the iterator. - */ - CapPtr peek() - { - return curr; - } - - /** - * Moves the iterator on, and returns the current value. - */ - CapPtr take(LocalEntropy& entropy) - { -#ifdef CHECK_CLIENT - check_client( - !different_slab(prev, curr), "Heap corruption - free list corrupted!"); -#endif - auto c = curr; - update_cursor(curr->read_next(get_prev(), entropy)); - return c; - } - }; - - /** - * Used to build a free list in object space. - * - * Adds signing of pointers - * - * On 64bit ptr architectures this data structure has - * 44 bytes of data - * and has an alignment of - * 8 bytes - * This unfortunately means its sizeof is 48bytes. We - * use the template parameter, so that an enclosing - * class can make use of the remaining four bytes. - * - * The builder uses two queues, and "randomly" decides to - * add to one of the two queues. This means that we will - * maintain a randomisation of the order between - * allocations. - * - * The fields are paired up to give better codegen as then they are offset - * by a power of 2, and the bit extract from the interleaving seed can - * be shifted to calculate the relevant offset to index the fields. - * - * If RANDOM is set to false, then the code does not perform any - * randomisation. - */ - template - class FreeListBuilder - { - static constexpr size_t LENGTH = RANDOM ? 2 : 1; - - // Pointer to the first element. - EncodeFreeObjectReference head[LENGTH]; - // Pointer to the reference to the last element. - // In the empty case end[i] == &head[i] - // This enables branch free enqueuing. - EncodeFreeObjectReference* end[LENGTH]; -#ifdef CHECK_CLIENT - // The bottom 16 bits of the previous pointer - uint16_t prev[LENGTH]; - // The bottom 16 bits of the current pointer - // This needs to be stored for the empty case - // where it is `initial_key()` for the slab. - uint16_t curr[LENGTH]; -#endif - public: - S s; - - uint16_t get_prev(uint32_t index) - { -#ifdef CHECK_CLIENT - return prev[index]; -#else - UNUSED(index); - return 0; -#endif - } - - uint16_t get_curr(uint32_t index) - { -#ifdef CHECK_CLIENT - return curr[index]; -#else - UNUSED(index); - return 0; -#endif - } - - static constexpr uint16_t HEAD_KEY = 1; - - public: - FreeListBuilder() - { - init(); - } - - /** - * Start building a new free list. - * Provide pointer to the slab to initialise the system. - */ - void open(CapPtr p) - { - SNMALLOC_ASSERT(empty()); - for (size_t i = 0; i < LENGTH; i++) - { -#ifdef CHECK_CLIENT - prev[i] = HEAD_KEY; - curr[i] = initial_key(p) & 0xffff; -#else - UNUSED(p); -#endif - end[i] = &head[i]; - } - } - - /** - * Checks if the builder contains any elements. - */ - bool empty() - { - for (size_t i = 0; i < LENGTH; i++) - { - if (address_cast(end[i]) != address_cast(&head[i])) - return false; - } - return true; - } - - bool debug_different_slab(CapPtr n) - { - for (size_t i = 0; i < LENGTH; i++) - { - if (!different_slab(address_cast(end[i]), n)) - return false; - } - return true; - } - - /** - * Adds an element to the builder - */ - void add(CapPtr n, LocalEntropy& entropy) - { - SNMALLOC_ASSERT(!debug_different_slab(n) || empty()); - - auto index = RANDOM ? entropy.next_bit() : 0; - - end[index]->store(n, get_prev(index), entropy); - end[index] = &(n->next_object); -#ifdef CHECK_CLIENT - prev[index] = curr[index]; - curr[index] = address_cast(n) & 0xffff; -#endif - } - - /** - * Calculates the length of the queue. - * This is O(n) as it walks the queue. - * If this is needed in a non-debug setting then - * we should look at redesigning the queue. - */ - size_t debug_length(LocalEntropy& entropy) - { - size_t count = 0; - for (size_t i = 0; i < LENGTH; i++) - { - uint16_t local_prev = HEAD_KEY; - EncodeFreeObjectReference* iter = &head[i]; - CapPtr prev_obj = iter->read(local_prev, entropy); - uint16_t local_curr = initial_key(prev_obj) & 0xffff; - while (end[i] != iter) - { - CapPtr next = iter->read(local_prev, entropy); - check_client(!different_slab(next, prev_obj), "Heap corruption"); - local_prev = local_curr; - local_curr = address_cast(next) & 0xffff; - count++; - iter = &next->next_object; - } - } - return count; - } - - /** - * Adds a terminator at the end of a free list, - * but does not close the builder. Thus new elements - * can still be added. It returns a new iterator to the - * list. - * - * This also collapses the two queues into one, so that it can - * be iterated easily. - * - * This is used to iterate an list that is being constructed. - * - * It is used with preserve_queue enabled to check - * invariants in Debug builds. - * - * It is used with preserve_queue disabled by close. - */ - FreeListIter terminate(LocalEntropy& entropy, bool preserve_queue = true) - { - if constexpr (RANDOM) - { - SNMALLOC_ASSERT(end[1] != &head[0]); - SNMALLOC_ASSERT(end[0] != &head[1]); - - // If second list is non-empty, perform append. - if (end[1] != &head[1]) - { - end[1]->store(nullptr, get_prev(1), entropy); - - // Append 1 to 0 - auto mid = head[1].read(HEAD_KEY, entropy); - end[0]->store(mid, get_prev(0), entropy); - // Re-code first link in second list (if there is one). - // The first link in the second list will be encoded with initial_key, - // But that needs to be changed to the curr of the first list. - if (mid != nullptr) - { - auto mid_next = mid->read_next(initial_key(mid) & 0xffff, entropy); - mid->next_object.store(mid_next, get_curr(0), entropy); - } - - auto h = head[0].read(HEAD_KEY, entropy); - - // If we need to continue adding to the builder - // Set up the second list as empty, - // and extend the first list to cover all of the second. - if (preserve_queue && h != nullptr) - { -#ifdef CHECK_CLIENT - prev[0] = prev[1]; - curr[0] = curr[1]; -#endif - end[0] = end[1]; -#ifdef CHECK_CLIENT - prev[1] = HEAD_KEY; - curr[1] = initial_key(h) & 0xffff; -#endif - end[1] = &(head[1]); - } - - SNMALLOC_ASSERT(end[1] != &head[0]); - SNMALLOC_ASSERT(end[0] != &head[1]); - - return {h}; - } - } - else - { - UNUSED(preserve_queue); - } - - end[0]->store(nullptr, get_prev(0), entropy); - return {head[0].read(HEAD_KEY, entropy)}; - } - - /** - * Close a free list, and set the iterator parameter - * to iterate it. - */ - void close(FreeListIter& dst, LocalEntropy& entropy) - { - dst = terminate(entropy, false); - init(); - } - - /** - * Set the builder to a not building state. - */ - void init() - { - for (size_t i = 0; i < LENGTH; i++) - { - end[i] = &head[i]; - } - } - }; -} // namespace snmalloc diff --git a/src/mem/globalalloc.h b/src/mem/globalalloc.h deleted file mode 100644 index 7ce318990..000000000 --- a/src/mem/globalalloc.h +++ /dev/null @@ -1,200 +0,0 @@ -#pragma once - -#include "../ds/helpers.h" -#include "alloc.h" -#include "pool.h" - -namespace snmalloc -{ - inline bool needs_initialisation(void*); - void* init_thread_allocator(function_ref); - - template - class AllocPool : Pool - { - using Parent = Pool; - - public: - static AllocPool* make(MemoryProvider& mp) - { - static_assert( - sizeof(AllocPool) == sizeof(Parent), - "You cannot add fields to this class."); - // This cast is safe due to the static assert. - return static_cast(Parent::make(mp)); - } - - static AllocPool* make() noexcept - { - return make(default_memory_provider()); - } - - Alloc* acquire() - { - return Parent::acquire(Parent::memory_provider); - } - - void release(Alloc* a) - { - Parent::release(a); - } - - public: - void aggregate_stats(Stats& stats) - { - auto* alloc = Parent::iterate(); - - while (alloc != nullptr) - { - stats.add(alloc->stats()); - alloc = Parent::iterate(alloc); - } - } - -#ifdef USE_SNMALLOC_STATS - void print_all_stats(std::ostream& o, uint64_t dumpid = 0) - { - auto alloc = Parent::iterate(); - - while (alloc != nullptr) - { - alloc->stats().template print(o, dumpid, alloc->id()); - alloc = Parent::iterate(alloc); - } - } -#else - void print_all_stats(void*& o, uint64_t dumpid = 0) - { - UNUSED(o); - UNUSED(dumpid); - } -#endif - - void cleanup_unused() - { -#ifndef SNMALLOC_PASS_THROUGH - // Call this periodically to free and coalesce memory allocated by - // allocators that are not currently in use by any thread. - // One atomic operation to extract the stack, another to restore it. - // Handling the message queue for each stack is non-atomic. - auto* first = Parent::extract(); - auto* alloc = first; - decltype(alloc) last; - - if (alloc != nullptr) - { - while (alloc != nullptr) - { - alloc->handle_message_queue(); - last = alloc; - alloc = Parent::extract(alloc); - } - - restore(first, last); - } -#endif - } - - /** - If you pass a pointer to a bool, then it returns whether all the - allocators are empty. If you don't pass a pointer to a bool, then will - raise an error all the allocators are not empty. - */ - void debug_check_empty(bool* result = nullptr) - { -#ifndef SNMALLOC_PASS_THROUGH - // This is a debugging function. It checks that all memory from all - // allocators has been freed. - auto* alloc = Parent::iterate(); - - bool done = false; - bool okay = true; - - while (!done) - { - done = true; - alloc = Parent::iterate(); - okay = true; - - while (alloc != nullptr) - { - // Check that the allocator has freed all memory. - alloc->debug_is_empty(&okay); - - // Post all remotes, including forwarded ones. If any allocator posts, - // repeat the loop. - if (alloc->remote_cache.capacity < REMOTE_CACHE) - { - alloc->stats().remote_post(); - alloc->remote_cache.post(alloc, alloc->get_trunc_id()); - done = false; - } - - alloc = Parent::iterate(alloc); - } - } - - if (result != nullptr) - { - *result = okay; - return; - } - - if (!okay) - { - alloc = Parent::iterate(); - while (alloc != nullptr) - { - alloc->debug_is_empty(nullptr); - alloc = Parent::iterate(alloc); - } - } -#else - UNUSED(result); -#endif - } - - void debug_in_use(size_t count) - { - auto alloc = Parent::iterate(); - while (alloc != nullptr) - { - if (alloc->debug_is_in_use()) - { - if (count == 0) - { - error("ERROR: allocator in use."); - } - count--; - } - alloc = Parent::iterate(alloc); - - if (count != 0) - { - error("Error: two few allocators in use."); - } - } - } - }; - - using Alloc = Allocator< - needs_initialisation, - init_thread_allocator, - GlobalVirtual, - SNMALLOC_DEFAULT_CHUNKMAP, - true>; - - inline AllocPool*& current_alloc_pool() - { - return Singleton< - AllocPool*, - AllocPool::make>::get(); - } - - template - inline AllocPool* make_alloc_pool(MemoryProvider& mp) - { - return AllocPool::make(mp); - } - -} // namespace snmalloc diff --git a/src/mem/largealloc.h b/src/mem/largealloc.h deleted file mode 100644 index 0d870fcdd..000000000 --- a/src/mem/largealloc.h +++ /dev/null @@ -1,448 +0,0 @@ -#pragma once - -#include "../ds/flaglock.h" -#include "../ds/helpers.h" -#include "../ds/mpmcstack.h" -#include "../pal/pal.h" -#include "address_space.h" -#include "allocstats.h" -#include "baseslab.h" -#include "sizeclass.h" - -#include -#include - -namespace snmalloc -{ - template - class MemoryProviderStateMixin; - - class Largeslab : public Baseslab - { - // This is the view of a contiguous memory area when it is being kept - // in the global size-classed caches of available contiguous memory areas. - private: - template< - class a, - Construction c, - template - typename P, - template - typename AP> - friend class MPMCStack; - template - friend class MemoryProviderStateMixin; - AtomicCapPtr next = nullptr; - - public: - void init() - { - kind = Large; - } - }; - - /** - * A slab that has been decommitted. The first page remains committed and - * the only fields that are guaranteed to exist are the kind and next - * pointer from the superclass. - */ - struct Decommittedslab : public Largeslab - { - /** - * Constructor. Expected to be called via placement new into some memory - * that was formerly a superslab or large allocation and is now just some - * spare address space. - */ - Decommittedslab() - { - kind = Decommitted; - } - }; - - // This represents the state that the large allcoator needs to add to the - // global state of the allocator. This is currently stored in the memory - // provider, so we add this in. - template - class MemoryProviderStateMixin - { - /** - * Simple flag for checking if another instance of lazy-decommit is - * running - */ - std::atomic_flag lazy_decommit_guard = {}; - - /** - * Instantiate the ArenaMap here. - * - * In most cases, this will be a purely static object (a DefaultArenaMap - * using a GlobalPagemapTemplate or ExternalGlobalPagemapTemplate). For - * sandboxes, this may have per-instance state (e.g., the sandbox root); - * presently, that's handled by the MemoryProviderStateMixin constructor - * that takes a pointer to address space it owns. There is some - * non-orthogonality of concerns here. - */ - ArenaMap arena_map = {}; - - using ASM = AddressSpaceManager; - /** - * Manages address space for this memory provider. - */ - ASM address_space = {}; - - /** - * High-water mark of used memory. - */ - std::atomic peak_memory_used_bytes{0}; - - /** - * Memory current available in large_stacks - */ - std::atomic available_large_chunks_in_bytes{0}; - - /** - * Stack of large allocations that have been returned for reuse. - */ - ModArray< - NUM_LARGE_CLASSES, - MPMCStack> - large_stack; - - public: - using Pal = PAL; - - /** - * Pop an allocation from a large-allocation stack. This is safe to call - * concurrently with other acceses. If there is no large allocation on a - * particular stack then this will return `nullptr`. - */ - SNMALLOC_FAST_PATH CapPtr - pop_large_stack(size_t large_class) - { - auto p = large_stack[large_class].pop(); - if (p != nullptr) - { - const size_t rsize = bits::one_at_bit(SUPERSLAB_BITS) << large_class; - available_large_chunks_in_bytes -= rsize; - } - return p; - } - - /** - * Push `slab` onto the large-allocation stack associated with the size - * class specified by `large_class`. Always succeeds. - */ - SNMALLOC_FAST_PATH void - push_large_stack(CapPtr slab, size_t large_class) - { - const size_t rsize = bits::one_at_bit(SUPERSLAB_BITS) << large_class; - available_large_chunks_in_bytes += rsize; - large_stack[large_class].push(slab); - } - - /** - * Default constructor. This constructs a memory provider that doesn't yet - * own any memory, but which can claim memory from the PAL. - */ - MemoryProviderStateMixin() = default; - - /** - * Construct a memory provider owning some memory. The PAL provided with - * memory providers constructed in this way does not have to be able to - * allocate memory, if the initial reservation is sufficient. - */ - MemoryProviderStateMixin(CapPtr start, size_t len) - : address_space(start, len) - {} - /** - * Make a new memory provide for this PAL. - */ - static MemoryProviderStateMixin* make() noexcept - { - // Temporary stack-based storage to start the allocator in. - ASM local_asm{}; - ArenaMap local_am{}; - - // Allocate permanent storage for the allocator usung temporary allocator - MemoryProviderStateMixin* allocated = - local_asm - .template reserve_with_left_over( - sizeof(MemoryProviderStateMixin), local_am) - .template as_static() - .unsafe_capptr; - - if (allocated == nullptr) - error("Failed to initialise system!"); - - // Move address range inside itself - allocated->address_space = std::move(local_asm); - allocated->arena_map = std::move(local_am); - - // Register this allocator for low-memory call-backs - if constexpr (pal_supports) - { - auto callback = - allocated->template alloc_chunk( - allocated); - PAL::register_for_low_memory_callback(callback); - } - - return allocated; - } - - private: - SNMALLOC_SLOW_PATH void lazy_decommit() - { - // If another thread is try to do lazy decommit, let it continue. If - // we try to parallelise this, we'll most likely end up waiting on the - // same page table locks. - if (!lazy_decommit_guard.test_and_set()) - { - return; - } - // When we hit low memory, iterate over size classes and decommit all of - // the memory that we can. Start with the small size classes so that we - // hit cached superslabs first. - // FIXME: We probably shouldn't do this all at once. - // FIXME: We currently Decommit all the sizeclasses larger than 0. - for (size_t large_class = 0; large_class < NUM_LARGE_CLASSES; - large_class++) - { - if (!PAL::expensive_low_memory_check()) - { - break; - } - size_t rsize = bits::one_at_bit(SUPERSLAB_BITS) << large_class; - size_t decommit_size = rsize - OS_PAGE_SIZE; - // Grab all of the chunks of this size class. - CapPtr slab = large_stack[large_class].pop_all(); - while (slab != nullptr) - { - // Decommit all except for the first page and then put it back on - // the stack. - if (slab->get_kind() != Decommitted) - { - PAL::notify_not_using( - pointer_offset(slab.unsafe_capptr, OS_PAGE_SIZE), decommit_size); - } - // Once we've removed these from the stack, there will be no - // concurrent accesses and removal should have established a - // happens-before relationship, so it's safe to use relaxed loads - // here. - auto next = slab->next.load(std::memory_order_relaxed); - large_stack[large_class].push(CapPtr( - new (slab.unsafe_capptr) Decommittedslab())); - slab = next; - } - } - lazy_decommit_guard.clear(); - } - - class LowMemoryNotificationObject : public PalNotificationObject - { - MemoryProviderStateMixin* memory_provider; - - /*** - * Method for callback object to perform lazy decommit. - */ - static void process(PalNotificationObject* p) - { - // Unsafe downcast here. Don't want vtable and RTTI. - auto self = reinterpret_cast(p); - self->memory_provider->lazy_decommit(); - } - - public: - LowMemoryNotificationObject(MemoryProviderStateMixin* memory_provider) - : PalNotificationObject(&process), memory_provider(memory_provider) - {} - }; - - public: - /** - * Primitive allocator for structure that are required before - * the allocator can be running. - */ - template - T* alloc_chunk(Args&&... args) - { - // Cache line align - size_t size = bits::align_up(sizeof(T), 64); - size = bits::max(size, alignment); - auto p = - address_space.template reserve_with_left_over(size, arena_map); - if (p == nullptr) - return nullptr; - - peak_memory_used_bytes += size; - - return new (p.unsafe_capptr) T(std::forward(args)...); - } - - template - CapPtr reserve(size_t large_class) noexcept - { - size_t size = bits::one_at_bit(SUPERSLAB_BITS) << large_class; - peak_memory_used_bytes += size; - return address_space.template reserve(size, arena_map) - .template as_static(); - } - - /** - * Returns a pair of current memory usage and peak memory usage. - * Both statistics are very coarse-grained. - */ - std::pair memory_usage() - { - size_t avail = available_large_chunks_in_bytes; - size_t peak = peak_memory_used_bytes; - return {peak - avail, peak}; - } - - template - SNMALLOC_FAST_PATH CapPtr capptr_amplify(CapPtr r) - { - return arena_map.template capptr_amplify(r); - } - - ArenaMap& arenamap() - { - return arena_map; - } - }; - - using Stats = AllocStats; - - template - class LargeAlloc - { - public: - // This will be a zero-size structure if stats are not enabled. - Stats stats; - - MemoryProvider& memory_provider; - - LargeAlloc(MemoryProvider& mp) : memory_provider(mp) {} - - template - CapPtr - alloc(size_t large_class, size_t rsize, size_t size) - { - SNMALLOC_ASSERT( - (bits::one_at_bit(SUPERSLAB_BITS) << large_class) == rsize); - - CapPtr p = - memory_provider.pop_large_stack(large_class); - - if (p == nullptr) - { - p = memory_provider.template reserve(large_class); - if (p == nullptr) - return nullptr; - MemoryProvider::Pal::template notify_using( - p.unsafe_capptr, rsize); - } - else - { - stats.superslab_pop(); - - // Cross-reference alloc.h's large_dealloc decommitment condition. - bool decommitted = - ((decommit_strategy == DecommitSuperLazy) && - (p.template as_static().unsafe_capptr->get_kind() == - Decommitted)) || - (large_class > 0) || (decommit_strategy == DecommitSuper); - - if (decommitted) - { - // The first page is already in "use" for the stack element, - // this will need zeroing for a YesZero call. - if constexpr (zero_mem == YesZero) - pal_zero(p, OS_PAGE_SIZE); - - // Notify we are using the rest of the allocation. - // Passing zero_mem ensures the PAL provides zeroed pages if - // required. - MemoryProvider::Pal::template notify_using( - pointer_offset(p.unsafe_capptr, OS_PAGE_SIZE), - rsize - OS_PAGE_SIZE); - } - else - { - // This is a superslab that has not been decommitted. - if constexpr (zero_mem == YesZero) - pal_zero( - p, bits::align_up(size, OS_PAGE_SIZE)); - else - UNUSED(size); - } - } - - SNMALLOC_ASSERT(p.as_void() == pointer_align_up(p.as_void(), rsize)); - return p; - } - - void dealloc(CapPtr p, size_t large_class) - { - if constexpr (decommit_strategy == DecommitSuperLazy) - { - static_assert( - pal_supports, - "A lazy decommit strategy cannot be implemented on platforms " - "without low memory notifications"); - } - - size_t rsize = bits::one_at_bit(SUPERSLAB_BITS) << large_class; - - // Cross-reference largealloc's alloc() decommitted condition. - if ( - (decommit_strategy != DecommitNone) && - (large_class != 0 || decommit_strategy == DecommitSuper)) - { - MemoryProvider::Pal::notify_not_using( - pointer_offset(p, OS_PAGE_SIZE).unsafe_capptr, rsize - OS_PAGE_SIZE); - } - - stats.superslab_push(); - memory_provider.push_large_stack(p, large_class); - } - - template - SNMALLOC_FAST_PATH CapPtr capptr_amplify(CapPtr r) - { - return memory_provider.template capptr_amplify(r); - } - }; - - struct DefaultPrimAlloc; - -#ifndef SNMALLOC_DEFAULT_MEMORY_PROVIDER -# define SNMALLOC_DEFAULT_MEMORY_PROVIDER \ - MemoryProviderStateMixin> -#endif - - /** - * The type of the default memory allocator. This can be changed by defining - * `SNMALLOC_DEFAULT_MEMORY_PROVIDER` before including this file. By default - * it is `MemoryProviderStateMixin` a class that allocates directly from - * the platform abstraction layer. - */ - using GlobalVirtual = SNMALLOC_DEFAULT_MEMORY_PROVIDER; - - /** - * The memory provider that will be used if no other provider is explicitly - * passed as an argument. - */ - inline GlobalVirtual& default_memory_provider() - { - return *(Singleton::get()); - } - - struct DefaultPrimAlloc - { - template - static T* alloc_chunk(Args&&... args) - { - return default_memory_provider().alloc_chunk(args...); - } - }; -} // namespace snmalloc diff --git a/src/mem/mediumslab.h b/src/mem/mediumslab.h deleted file mode 100644 index c1ea9be33..000000000 --- a/src/mem/mediumslab.h +++ /dev/null @@ -1,156 +0,0 @@ -#pragma once - -#include "../ds/dllist.h" -#include "allocconfig.h" -#include "allocslab.h" -#include "sizeclass.h" - -namespace snmalloc -{ - class Mediumslab : public Allocslab - { - // This is the view of a 16 mb area when it is being used to allocate - // medium sized classes: 64 kb to 16 mb, non-inclusive. - private: - friend DLList; - - // Keep the allocator pointer on a separate cache line. It is read by - // other threads, and does not change, so we avoid false sharing. - alignas(CACHELINE_SIZE) CapPtr next; - CapPtr prev; - - // Store a pointer to ourselves without platform constraints applied, - // as we need this to be able to zero memory by manipulating the VM map - CapPtr self_chunk; - - uint16_t free; - uint8_t head; - uint8_t sizeclass; - uint16_t stack[SLAB_COUNT - 1]; - - public: - static constexpr size_t header_size() - { - static_assert( - sizeof(Mediumslab) < OS_PAGE_SIZE, - "Mediumslab header size must be less than the page size"); - static_assert( - sizeof(Mediumslab) < SLAB_SIZE, - "Mediumslab header size must be less than the slab size"); - - /* - * Always use a full page or SLAB, whichever is smaller, in order - * to get good alignment of individual allocations. Some platforms - * have huge minimum pages (e.g., Linux on PowerPC uses 64KiB) and - * our SLABs are occasionally small by comparison (e.g., in OE, when - * we take them to be 8KiB). - */ - return bits::align_up(sizeof(Mediumslab), min(OS_PAGE_SIZE, SLAB_SIZE)); - } - - /** - * Given a highly-privileged pointer pointing to or within an object in - * this slab, return a pointer to the slab headers. - * - * In debug builds on StrictProvenance architectures, we will enforce the - * slab bounds on this returned pointer. In non-debug builds, we will - * return a highly-privileged pointer (i.e., CBArena) instead as these - * pointers are not exposed from the allocator. - */ - template - static SNMALLOC_FAST_PATH CapPtr - get(CapPtr p) - { - return capptr_bound_chunkd( - pointer_align_down(p.as_void()), - SUPERSLAB_SIZE); - } - - static void init( - CapPtr self, - RemoteAllocator* alloc, - sizeclass_t sc, - size_t rsize) - { - SNMALLOC_ASSERT(sc >= NUM_SMALL_CLASSES); - SNMALLOC_ASSERT((sc - NUM_SMALL_CLASSES) < NUM_MEDIUM_CLASSES); - - self->allocator = alloc; - self->head = 0; - - // If this was previously a Mediumslab of the same sizeclass, don't - // initialise the allocation stack. - if ((self->kind != Medium) || (self->sizeclass != sc)) - { - self->self_chunk = self.as_void(); - self->sizeclass = static_cast(sc); - uint16_t ssize = static_cast(rsize >> 8); - self->kind = Medium; - self->free = medium_slab_free(sc); - for (uint16_t i = self->free; i > 0; i--) - self->stack[self->free - i] = - static_cast((SUPERSLAB_SIZE >> 8) - (i * ssize)); - } - else - { - SNMALLOC_ASSERT(self->free == medium_slab_free(sc)); - SNMALLOC_ASSERT(self->self_chunk == self.as_void()); - } - } - - uint8_t get_sizeclass() - { - return sizeclass; - } - - template - static CapPtr - alloc(CapPtr self, size_t size) - { - SNMALLOC_ASSERT(!full(self)); - - uint16_t index = self->stack[self->head++]; - auto p = pointer_offset(self, (static_cast(index) << 8)); - self->free--; - - if constexpr (zero_mem == YesZero) - pal_zero(Aal::capptr_rebound(self->self_chunk, p), size); - else - UNUSED(size); - - return Aal::capptr_bound(p, size); - } - - static bool - dealloc(CapPtr self, CapPtr p) - { - SNMALLOC_ASSERT(self->head > 0); - - // Returns true if the Mediumslab was full before this deallocation. - bool was_full = full(self); - self->free++; - self->stack[--(self->head)] = self->address_to_index(address_cast(p)); - - return was_full; - } - - template - static bool full(CapPtr self) - { - return self->free == 0; - } - - template - static bool empty(CapPtr self) - { - return self->head == 0; - } - - private: - uint16_t address_to_index(address_t p) - { - // Get the offset from the slab for a memory location. - return static_cast((p - address_cast(this)) >> 8); - } - }; -} // namespace snmalloc diff --git a/src/mem/metaslab.h b/src/mem/metaslab.h deleted file mode 100644 index c1b51f544..000000000 --- a/src/mem/metaslab.h +++ /dev/null @@ -1,255 +0,0 @@ -#pragma once - -#include "../ds/cdllist.h" -#include "../ds/dllist.h" -#include "../ds/helpers.h" -#include "freelist.h" -#include "ptrhelpers.h" -#include "sizeclass.h" - -namespace snmalloc -{ - class Slab; - - using SlabList = CDLLNode; - using SlabLink = CDLLNode; - - static_assert( - sizeof(SlabLink) <= MIN_ALLOC_SIZE, - "Need to be able to pack a SlabLink into any free small alloc"); - - /** - * This struct is used inside FreeListBuilder to account for the - * alignment space that is wasted in sizeof. - * - * This is part of Metaslab abstraction. - */ - struct MetaslabEnd - { - /** - * How many entries are not in the free list of slab, i.e. - * how many entries are needed to fully free this slab. - * - * In the case of a fully allocated slab, where prev==0 needed - * will be 1. This enables 'return_object' to detect the slow path - * case with a single operation subtract and test. - */ - uint16_t needed = 0; - - uint8_t sizeclass; - // Initially zero to encode the superslabs relative list of slabs. - uint8_t next = 0; - }; - - // The Metaslab represent the status of a single slab. - // This can be either a short or a standard slab. - class Metaslab : public SlabLink - { - public: - /** - * Data-structure for building the free list for this slab. - * - * Spare 32bits are used for the fields in MetaslabEnd. - */ -#ifdef CHECK_CLIENT - FreeListBuilder free_queue; -#else - FreeListBuilder free_queue; -#endif - - uint16_t& needed() - { - return free_queue.s.needed; - } - - uint8_t sizeclass() - { - return free_queue.s.sizeclass; - } - - uint8_t& next() - { - return free_queue.s.next; - } - - void initialise(sizeclass_t sizeclass, CapPtr slab) - { - free_queue.s.sizeclass = static_cast(sizeclass); - free_queue.init(); - // Set up meta data as if the entire slab has been turned into a free - // list. This means we don't have to check for special cases where we have - // returned all the elements, but this is a slab that is still being bump - // allocated from. Hence, the bump allocator slab will never be returned - // for use in another size class. - set_full(slab); - } - - /** - * Updates statistics for adding an entry to the free list, if the - * slab is either - * - empty adding the entry to the free list, or - * - was full before the subtraction - * this returns true, otherwise returns false. - */ - bool return_object() - { - return (--needed()) == 0; - } - - bool is_unused() - { - return needed() == 0; - } - - bool is_full() - { - return get_prev() == nullptr; - } - - /** - * Only wake slab if we have this many free allocations - * - * This helps remove bouncing around empty to non-empty cases. - * - * It also increases entropy, when we have randomisation. - */ - uint16_t threshold_for_waking_slab(bool is_short_slab) - { - auto capacity = get_slab_capacity(sizeclass(), is_short_slab); - uint16_t threshold = (capacity / 8) | 1; - uint16_t max = 32; - return bits::min(threshold, max); - } - - template - SNMALLOC_FAST_PATH void set_full(CapPtr slab) - { - static_assert(B == CBChunkD || B == CBChunk); - SNMALLOC_ASSERT(free_queue.empty()); - - // Prepare for the next free queue to be built. - free_queue.open(slab.as_void()); - - // Set needed to at least one, possibly more so we only use - // a slab when it has a reasonable amount of free elements - needed() = threshold_for_waking_slab(Metaslab::is_short(slab)); - null_prev(); - } - - template - static SNMALLOC_FAST_PATH CapPtr()> - get_slab(CapPtr p) - { - static_assert(B == CBArena || B == CBChunkD || B == CBChunk); - - return capptr_bound_chunkd( - pointer_align_down(p.as_void()), SLAB_SIZE); - } - - template - static bool is_short(CapPtr p) - { - return pointer_align_down(p.as_void()) == p; - } - - SNMALLOC_FAST_PATH bool is_start_of_object(address_t p) - { - return is_multiple_of_sizeclass( - sizeclass(), SLAB_SIZE - (p - address_align_down(p))); - } - - /** - * Takes a free list out of a slabs meta data. - * Returns the link as the allocation, and places the free list into the - * `fast_free_list` for further allocations. - */ - template - static SNMALLOC_FAST_PATH CapPtr alloc( - CapPtr self, - FreeListIter& fast_free_list, - size_t rsize, - LocalEntropy& entropy) - { - SNMALLOC_ASSERT(rsize == sizeclass_to_size(self->sizeclass())); - SNMALLOC_ASSERT(!self->is_full()); - - self->free_queue.close(fast_free_list, entropy); - auto p = fast_free_list.take(entropy); - auto slab = Aal::capptr_rebound(self.as_void(), p); - auto meta = Metaslab::get_slab(slab); - - entropy.refresh_bits(); - - // Treat stealing the free list as allocating it all. - self->remove(); - self->set_full(meta); - - SNMALLOC_ASSERT(self->is_start_of_object(address_cast(p))); - - self->debug_slab_invariant(meta, entropy); - - if constexpr (zero_mem == YesZero) - { - if (rsize < PAGE_ALIGNED_SIZE) - pal_zero(p, rsize); - else - pal_zero(Aal::capptr_rebound(self.as_void(), p), rsize); - } - else - { - UNUSED(rsize); - } - - // TODO: Should this be zeroing the FreeObject state? - return capptr_export(p.as_void()); - } - - template - void debug_slab_invariant(CapPtr slab, LocalEntropy& entropy) - { - static_assert(B == CBChunkD || B == CBChunk); - -#if !defined(NDEBUG) && !defined(SNMALLOC_CHEAP_CHECKS) - bool is_short = Metaslab::is_short(slab); - - if (is_full()) - { - size_t count = free_queue.debug_length(entropy); - SNMALLOC_ASSERT(count < threshold_for_waking_slab(is_short)); - return; - } - - if (is_unused()) - return; - - size_t size = sizeclass_to_size(sizeclass()); - size_t offset = get_initial_offset(sizeclass(), is_short); - size_t accounted_for = needed() * size + offset; - - // Block is not full - SNMALLOC_ASSERT(SLAB_SIZE > accounted_for); - - // Account for list size - size_t count = free_queue.debug_length(entropy); - accounted_for += count * size; - - SNMALLOC_ASSERT(count <= get_slab_capacity(sizeclass(), is_short)); - - auto bumpptr = (get_slab_capacity(sizeclass(), is_short) * size) + offset; - // Check we haven't allocated more than fits in a slab - SNMALLOC_ASSERT(bumpptr <= SLAB_SIZE); - - // Account for to be bump allocated space - accounted_for += SLAB_SIZE - bumpptr; - - SNMALLOC_ASSERT(!is_full()); - - // All space accounted for - SNMALLOC_ASSERT(SLAB_SIZE == accounted_for); -#else - UNUSED(slab); - UNUSED(entropy); -#endif - } - }; -} // namespace snmalloc diff --git a/src/mem/pagemap.h b/src/mem/pagemap.h deleted file mode 100644 index 25050ec86..000000000 --- a/src/mem/pagemap.h +++ /dev/null @@ -1,522 +0,0 @@ -#pragma once - -#include "../ds/bits.h" -#include "../ds/helpers.h" -#include "../ds/invalidptr.h" - -#include -#include - -namespace snmalloc -{ - static constexpr size_t PAGEMAP_NODE_BITS = 16; - static constexpr size_t PAGEMAP_NODE_SIZE = 1ULL << PAGEMAP_NODE_BITS; - - /** - * Structure describing the configuration of a pagemap. When querying a - * pagemap from a different instantiation of snmalloc, the pagemap is exposed - * as a `void*`. This structure allows the caller to check whether the - * pagemap is of the format that they expect. - */ - struct PagemapConfig - { - /** - * The version of the pagemap structure. This is always 1 in existing - * versions of snmalloc. This will be incremented every time the format - * changes in an incompatible way. Changes to the format may add fields to - * the end of this structure. - */ - uint32_t version; - /** - * Is this a flat pagemap? If this field is false, the pagemap is the - * hierarchical structure. - */ - bool is_flat_pagemap; - /** - * Number of bytes in a pointer. - */ - uint8_t sizeof_pointer; - /** - * The number of bits of the address used to index into the pagemap. - */ - uint64_t pagemap_bits; - /** - * The size (in bytes) of a pagemap entry. - */ - size_t size_of_entry; - }; - - /** - * The Pagemap is the shared data structure ultimately used by multiple - * snmalloc threads / allocators to determine who owns memory and, - * therefore, to whom deallocated memory should be returned. The - * allocators do not interact with this directly but rather via the - * static ChunkMap object, which encapsulates knowledge about the - * pagemap's parametric type T. - * - * The other template paramters are... - * - * GRANULARITY_BITS: the log2 of the size in bytes of the address space - * granule associated with each entry. - * - * default_content: An initial value of T (typically "0" or something akin) - * - * PrimAlloc: A class used to source PageMap-internal memory; it must have a - * method callable as if it had the following type: - * - * template static T* alloc_chunk(void); - */ - template< - size_t GRANULARITY_BITS, - typename T, - T default_content, - typename PrimAlloc> - class Pagemap - { - private: - static constexpr size_t COVERED_BITS = - bits::ADDRESS_BITS - GRANULARITY_BITS; - static constexpr size_t CONTENT_BITS = - bits::next_pow2_bits_const(sizeof(T)); - - static_assert( - PAGEMAP_NODE_BITS - CONTENT_BITS < COVERED_BITS, - "Should use the FlatPageMap as it does not require a tree"); - - static constexpr size_t BITS_FOR_LEAF = PAGEMAP_NODE_BITS - CONTENT_BITS; - static constexpr size_t ENTRIES_PER_LEAF = 1 << BITS_FOR_LEAF; - static constexpr size_t LEAF_MASK = ENTRIES_PER_LEAF - 1; - - static constexpr size_t BITS_PER_INDEX_LEVEL = - PAGEMAP_NODE_BITS - POINTER_BITS; - static constexpr size_t ENTRIES_PER_INDEX_LEVEL = 1 << BITS_PER_INDEX_LEVEL; - static constexpr size_t ENTRIES_MASK = ENTRIES_PER_INDEX_LEVEL - 1; - - static constexpr size_t INDEX_BITS = - BITS_FOR_LEAF > COVERED_BITS ? 0 : COVERED_BITS - BITS_FOR_LEAF; - - static constexpr size_t INDEX_LEVELS = INDEX_BITS / BITS_PER_INDEX_LEVEL; - static constexpr size_t TOPLEVEL_BITS = - INDEX_BITS - (INDEX_LEVELS * BITS_PER_INDEX_LEVEL); - static constexpr size_t TOPLEVEL_ENTRIES = 1 << TOPLEVEL_BITS; - static constexpr size_t TOPLEVEL_SHIFT = - (INDEX_LEVELS * BITS_PER_INDEX_LEVEL) + BITS_FOR_LEAF + GRANULARITY_BITS; - - // Value used to represent when a node is being added too - static constexpr InvalidPointer<1> LOCKED_ENTRY{}; - - struct Leaf - { - TrivialInitAtomic values[ENTRIES_PER_LEAF]; - - static_assert(sizeof(TrivialInitAtomic) == sizeof(T)); - static_assert(alignof(TrivialInitAtomic) == alignof(T)); - }; - - struct PagemapEntry - { - TrivialInitAtomic entries[ENTRIES_PER_INDEX_LEVEL]; - - static_assert( - sizeof(TrivialInitAtomic) == sizeof(PagemapEntry*)); - static_assert( - alignof(TrivialInitAtomic) == alignof(PagemapEntry*)); - }; - - static_assert( - sizeof(PagemapEntry) == sizeof(Leaf), "Should be the same size"); - - static_assert( - sizeof(PagemapEntry) == PAGEMAP_NODE_SIZE, "Should be the same size"); - - // Init removed as not required as this is only ever a global - // cl is generating a memset of zero, which will be a problem - // in libc/ucrt bring up. On ucrt this will run after the first - // allocation. - // TODO: This is fragile that it is not being memset, and we should review - // to ensure we don't get bitten by this in the future. - TrivialInitAtomic top[TOPLEVEL_ENTRIES]; - - template - SNMALLOC_FAST_PATH PagemapEntry* - get_node(TrivialInitAtomic* e, bool& result) - { - // The page map nodes are all allocated directly from the OS zero - // initialised with a system call. We don't need any ordered to guarantee - // to see that correctly. The only transistions are monotone and handled - // by the slow path. - PagemapEntry* value = e->load(std::memory_order_relaxed); - - if (likely(value > LOCKED_ENTRY)) - { - result = true; - return value; - } - if constexpr (create_addr) - { - return get_node_slow(e, result); - } - else - { - result = false; - return nullptr; - } - } - - SNMALLOC_SLOW_PATH PagemapEntry* - get_node_slow(TrivialInitAtomic* e, bool& result) - { - // The page map nodes are all allocated directly from the OS zero - // initialised with a system call. We don't need any ordered to guarantee - // to see that correctly. - PagemapEntry* value = e->load(std::memory_order_relaxed); - - if ((value == nullptr) || (value == LOCKED_ENTRY)) - { - value = nullptr; - - if (e->compare_exchange_strong( - value, LOCKED_ENTRY, std::memory_order_relaxed)) - { - value = PrimAlloc::template alloc_chunk(); - e->store(value, std::memory_order_release); - } - else - { - while (address_cast(e->load(std::memory_order_relaxed)) == - LOCKED_ENTRY) - { - Aal::pause(); - } - value = e->load(std::memory_order_acquire); - } - } - result = true; - return value; - } - - template - SNMALLOC_FAST_PATH std::pair - get_leaf_index(uintptr_t addr, bool& result) - { -#ifdef FreeBSD_KERNEL - // Zero the top 16 bits - kernel addresses all have them set, but the - // data structure assumes that they're zero. - addr &= 0xffffffffffffULL; -#endif - size_t ix = addr >> TOPLEVEL_SHIFT; - size_t shift = TOPLEVEL_SHIFT; - TrivialInitAtomic* e = &top[ix]; - - // This is effectively a - // for (size_t i = 0; i < INDEX_LEVELS; i++) - // loop, but uses constexpr to guarantee optimised version - // where the INDEX_LEVELS in {0,1}. - if constexpr (INDEX_LEVELS != 0) - { - size_t i = 0; - while (true) - { - PagemapEntry* value = get_node(e, result); - if (unlikely(!result)) - return {nullptr, 0}; - - shift -= BITS_PER_INDEX_LEVEL; - ix = (static_cast(addr) >> shift) & ENTRIES_MASK; - e = &value->entries[ix]; - - if constexpr (INDEX_LEVELS == 1) - { - UNUSED(i); - break; - } - else - { - i++; - if (i == INDEX_LEVELS) - break; - } - } - } - - Leaf* leaf = reinterpret_cast(get_node(e, result)); - - if (unlikely(!result)) - return {nullptr, 0}; - - shift -= BITS_FOR_LEAF; - ix = (static_cast(addr) >> shift) & LEAF_MASK; - return {leaf, ix}; - } - - template - SNMALLOC_FAST_PATH TrivialInitAtomic* - get_addr(uintptr_t p, bool& success) - { - auto leaf_ix = get_leaf_index(p, success); - return &(leaf_ix.first->values[leaf_ix.second]); - } - - TrivialInitAtomic* get_ptr(uintptr_t p) - { - bool success; - return get_addr(p, success); - } - - public: - /** - * The pagemap configuration describing this instantiation of the template. - */ - static constexpr PagemapConfig config = { - 1, false, sizeof(uintptr_t), GRANULARITY_BITS, sizeof(T)}; - - /** - * Cast a `void*` to a pointer to this template instantiation, given a - * config describing the configuration. Return null if the configuration - * passed does not correspond to this template instantiation. - * - * This intended to allow code that depends on the pagemap having a - * specific representation to fail gracefully. - */ - static Pagemap* cast_to_pagemap(void* pm, const PagemapConfig* c) - { - if ( - (c->version != 1) || (c->is_flat_pagemap) || - (c->sizeof_pointer != sizeof(uintptr_t)) || - (c->pagemap_bits != GRANULARITY_BITS) || - (c->size_of_entry != sizeof(T)) || (!std::is_integral_v)) - { - return nullptr; - } - return static_cast(pm); - } - - /** - * Returns the index of a pagemap entry within a given page. This is used - * in code that propagates changes to the pagemap elsewhere. - */ - size_t index_for_address(uintptr_t p) - { - bool success; - return (OS_PAGE_SIZE - 1) & - reinterpret_cast(get_addr(p, success)); - } - - /** - * Returns the address of the page containing - */ - void* page_for_address(uintptr_t p) - { - bool success; - return pointer_align_down(get_addr(p, success)); - } - - T get(uintptr_t p) - { - bool success; - auto addr = get_addr(p, success); - if (!success) - return default_content; - return addr->load(std::memory_order_relaxed); - } - - void set(uintptr_t p, T x) - { - bool success; - auto addr = get_addr(p, success); - addr->store(x, std::memory_order_relaxed); - } - - void set_range(uintptr_t p, T x, size_t length) - { - bool success; - do - { - auto leaf_ix = get_leaf_index(p, success); - size_t ix = leaf_ix.second; - - auto last = bits::min(LEAF_MASK + 1, ix + length); - - auto diff = last - ix; - - for (; ix < last; ix++) - { - SNMALLOC_ASSUME(leaf_ix.first != nullptr); - leaf_ix.first->values[ix].store(x); - } - - length = length - diff; - p = p + (diff << GRANULARITY_BITS); - } while (length > 0); - } - }; - - /** - * Simple pagemap that for each GRANULARITY_BITS of the address range - * stores a T. - */ - template - class alignas(OS_PAGE_SIZE) FlatPagemap - { - private: - static constexpr size_t COVERED_BITS = - bits::ADDRESS_BITS - GRANULARITY_BITS; - static constexpr size_t ENTRIES = 1ULL << COVERED_BITS; - static constexpr size_t SHIFT = GRANULARITY_BITS; - - TrivialInitAtomic top[ENTRIES]; - - static_assert(sizeof(TrivialInitAtomic) == sizeof(T)); - static_assert(alignof(TrivialInitAtomic) == alignof(T)); - - public: - /** - * The pagemap configuration describing this instantiation of the template. - */ - static constexpr PagemapConfig config = { - 1, true, sizeof(uintptr_t), GRANULARITY_BITS, sizeof(T)}; - - /** - * Cast a `void*` to a pointer to this template instantiation, given a - * config describing the configuration. Return null if the configuration - * passed does not correspond to this template instantiation. - * - * This intended to allow code that depends on the pagemap having a - * specific representation to fail gracefully. - */ - static FlatPagemap* cast_to_pagemap(void* pm, const PagemapConfig* c) - { - if ( - (c->version != 1) || (!c->is_flat_pagemap) || - (c->sizeof_pointer != sizeof(uintptr_t)) || - (c->pagemap_bits != GRANULARITY_BITS) || - (c->size_of_entry != sizeof(T)) || (!std::is_integral_v)) - { - return nullptr; - } - return static_cast(pm); - } - - T get(uintptr_t p) - { - return top[p >> SHIFT].load(std::memory_order_relaxed); - } - - void set(uintptr_t p, T x) - { - top[p >> SHIFT].store(x, std::memory_order_relaxed); - } - - void set_range(uintptr_t p, T x, size_t length) - { - size_t index = p >> SHIFT; - do - { - top[index].store(x, std::memory_order_relaxed); - index++; - length--; - } while (length > 0); - } - - /** - * Returns the index within a page for the specified address. - */ - size_t index_for_address(uintptr_t p) - { - return (static_cast(p) >> SHIFT) % OS_PAGE_SIZE; - } - - /** - * Returns the address of the page containing the pagemap address p. - */ - void* page_for_address(uintptr_t p) - { - SNMALLOC_ASSERT( - (reinterpret_cast(&top) & (OS_PAGE_SIZE - 1)) == 0); - return reinterpret_cast( - reinterpret_cast(&top[p >> SHIFT]) & ~(OS_PAGE_SIZE - 1)); - } - }; - - /** - * Mixin used by `ChunkMap` and other `PageMap` consumers to directly access - * the pagemap via a global variable. This should be used from within the - * library or program that owns the pagemap. - * - * This class makes the global pagemap a static field so that its name - * includes the type mangling. If two compilation units try to instantiate - * two different types of pagemap then they will see two distinct pagemaps. - * This will prevent allocating with one and freeing with the other (because - * the memory will show up as not owned by any allocator in the other - * compilation unit) but will prevent the same memory being interpreted as - * having two different types. - * - * Simiarly, perhaps two modules wish to instantiate *different* pagemaps - * of the *same* type. Therefore, we add a `Purpose` parameter that can be - * used to pry symbols apart. By default, the `Purpose` is just the type of - * the pagemap; that is, pagemaps default to discrimination solely by their - * type. - */ - template - class GlobalPagemapTemplate - { - /** - * The global pagemap variable. The name of this symbol will include the - * type of `T` and `U`. - */ - SNMALLOC_FORCE_BSS - inline static T global_pagemap; - - public: - /** - * Returns the pagemap. - */ - static T& pagemap() - { - return global_pagemap; - } - }; - - /** - * Mixin used by `ChunkMap` and other `PageMap` consumers to access the global - * pagemap via a type-checked C interface. This should be used when another - * library (e.g. your C standard library) uses snmalloc and you wish to use a - * different configuration in your program or library, but wish to share a - * pagemap so that either version can deallocate memory. - * - * The `Purpose` parameter is as with `GlobalPgemapTemplate`. - */ - template< - typename T, - void* (*raw_get)(const PagemapConfig**), - typename Purpose = T> - class ExternalGlobalPagemapTemplate - { - /** - * A pointer to the pagemap. - */ - inline static T* external_pagemap; - - public: - /** - * Returns the exported pagemap. - * Accesses the pagemap via the C ABI accessor and casts it to - * the expected type, failing in cases of ABI mismatch. - */ - static T& pagemap() - { - if (external_pagemap == nullptr) - { - const snmalloc::PagemapConfig* c = nullptr; - void* raw_pagemap = raw_get(&c); - external_pagemap = T::cast_to_pagemap(raw_pagemap, c); - if (!external_pagemap) - { - Pal::error("Incorrect ABI of global pagemap."); - } - } - return *external_pagemap; - } - }; - -} // namespace snmalloc diff --git a/src/mem/pool.h b/src/mem/pool.h deleted file mode 100644 index e9e4c9993..000000000 --- a/src/mem/pool.h +++ /dev/null @@ -1,115 +0,0 @@ -#pragma once - -#include "../ds/flaglock.h" -#include "../ds/mpmcstack.h" -#include "../pal/pal_concept.h" -#include "pooled.h" - -namespace snmalloc -{ - /** - * Pool of a particular type of object. - * - * This pool will never return objects to the OS. It maintains a list of all - * objects ever allocated that can be iterated (not concurrency safe). Pooled - * types can be acquired from the pool, and released back to the pool. This is - * concurrency safe. - * - * This is used to bootstrap the allocation of allocators. - */ - template - class Pool - { - private: - friend Pooled; - template - friend class MemoryProviderStateMixin; - friend SNMALLOC_DEFAULT_MEMORY_PROVIDER; - - std::atomic_flag lock = ATOMIC_FLAG_INIT; - MPMCStack stack; - T* list = nullptr; - - Pool(MemoryProvider& m) : memory_provider(m) {} - - public: - MemoryProvider& memory_provider; - - static Pool* make(MemoryProvider& memory_provider) noexcept - { - return memory_provider.template alloc_chunk( - memory_provider); - } - - static Pool* make() noexcept - { - return Pool::make(default_memory_provider()); - } - - template - T* acquire(Args&&... args) - { - T* p = stack.pop(); - - if (p != nullptr) - { - p->set_in_use(); - return p; - } - - p = memory_provider - .template alloc_chunk( - std::forward(args)...); - - FlagLock f(lock); - p->list_next = list; - list = p; - - p->set_in_use(); - return p; - } - - /** - * Return to the pool an object previously retrieved by `acquire` - * - * Do not return objects from `extract`. - */ - void release(T* p) - { - // The object's destructor is not run. If the object is "reallocated", it - // is returned without the constructor being run, so the object is reused - // without re-initialisation. - p->reset_in_use(); - stack.push(p); - } - - T* extract(T* p = nullptr) - { - // Returns a linked list of all objects in the stack, emptying the stack. - if (p == nullptr) - return stack.pop_all(); - - return p->next; - } - - /** - * Return to the pool a list of object previously retrieved by `extract` - * - * Do not return objects from `acquire`. - */ - void restore(T* first, T* last) - { - // Pushes a linked list of objects onto the stack. Use to put a linked - // list returned by extract back onto the stack. - stack.push(first, last); - } - - T* iterate(T* p = nullptr) - { - if (p == nullptr) - return list; - - return p->list_next; - } - }; -} // namespace snmalloc diff --git a/src/mem/ptrhelpers.h b/src/mem/ptrhelpers.h deleted file mode 100644 index e43119d13..000000000 --- a/src/mem/ptrhelpers.h +++ /dev/null @@ -1,98 +0,0 @@ -#pragma once - -#include "../aal/aal.h" -#include "../ds/ptrwrap.h" -#include "allocconfig.h" - -namespace snmalloc -{ - /* - * At various points, we do pointer math on high-authority pointers to find - * some metadata. `capptr_bound_chunkd` and `capptr_chunk_from_chunkd` - * encapsulate the notion that the result of these accesses is left unbounded - * in non-debug builds, because most codepaths do not reveal these pointers or - * any progeny to the application. However, in some cases we have already - * (partially) bounded these high-authority pointers (to CBChunk) and wish to - * preserve this annotation (rather than always returning a CBChunkD-annotated - * pointer); `capptr_bound_chunkd_bounds` does the computation for us and is - * used in the signatures of below and in those of wrappers around them. - */ - - template - constexpr capptr_bounds capptr_bound_chunkd_bounds() - { - switch (B) - { - case CBArena: - return CBChunkD; - case CBChunkD: - return CBChunkD; - case CBChunk: - return CBChunk; - } - } - - /** - * Construct an CapPtr from an CapPtr or - * CapPtr input. For an CapPtr input, simply pass - * it through (preserving the static notion of bounds). - * - * Applies bounds on debug builds, otherwise is just sleight of hand. - * - * Requires that `p` point at a multiple of `sz` (that is, at the base of a - * highly-aligned object) to avoid representability issues. - */ - template - SNMALLOC_FAST_PATH CapPtr()> - capptr_bound_chunkd(CapPtr p, size_t sz) - { - static_assert(B == CBArena || B == CBChunkD || B == CBChunk); - SNMALLOC_ASSERT((address_cast(p) % sz) == 0); - -#ifndef NDEBUG - // On Debug builds, apply bounds if not already there - if constexpr (B == CBArena) - return Aal::capptr_bound(p, sz); - else // quiesce MSVC's warnings about unreachable code below -#endif - { - UNUSED(sz); - return CapPtr()>(p.unsafe_capptr); - } - } - - /** - * Apply bounds that might not have been applied when constructing an - * CapPtr. That is, on non-debug builds, apply bounds; debug - * builds have already had them applied. - * - * Requires that `p` point at a multiple of `sz` (that is, at the base of a - * highly-aligned object) to avoid representability issues. - */ - template - SNMALLOC_FAST_PATH CapPtr - capptr_chunk_from_chunkd(CapPtr p, size_t sz) - { - SNMALLOC_ASSERT((address_cast(p) % sz) == 0); - -#ifndef NDEBUG - // On debug builds, CBChunkD are already bounded as if CBChunk. - UNUSED(sz); - return CapPtr(p.unsafe_capptr); -#else - // On non-debug builds, apply bounds now, as they haven't been already. - return Aal::capptr_bound(p, sz); -#endif - } - - /** - * Very rarely, while debugging, it's both useful and acceptable to forget - * that we have applied chunk bounds to something. - */ - template - SNMALLOC_FAST_PATH CapPtr - capptr_debug_chunkd_from_chunk(CapPtr p) - { - return CapPtr(p.unsafe_capptr); - } -} // namespace snmalloc diff --git a/src/mem/remoteallocator.h b/src/mem/remoteallocator.h deleted file mode 100644 index 2c04f1f59..000000000 --- a/src/mem/remoteallocator.h +++ /dev/null @@ -1,263 +0,0 @@ -#pragma once - -#include "../ds/mpscq.h" -#include "../mem/allocconfig.h" -#include "../mem/freelist.h" -#include "../mem/sizeclass.h" -#include "../mem/superslab.h" - -#include - -#ifdef CHECK_CLIENT -# define SNMALLOC_DONT_CACHE_ALLOCATOR_PTR -#endif - -namespace snmalloc -{ - /* - * A region of memory destined for a remote allocator's dealloc() via the - * message passing system. This structure is placed at the beginning of - * the allocation itself when it is queued for sending. - */ - struct Remote - { - using alloc_id_t = size_t; - union - { - CapPtr non_atomic_next; - AtomicCapPtr next{nullptr}; - }; - -#ifdef SNMALLOC_DONT_CACHE_ALLOCATOR_PTR - /** - * Cache the size class of the object to improve performance. - * - * This implementation does not cache the allocator id due to security - * concerns. Alternative implementations may store the allocator - * id, so that amplification costs can be mitigated on CHERI with MTE. - */ - sizeclass_t sizeclasscache; -#else - /* This implementation assumes that storing the allocator ID in a freed - * object is not a security concern. Either we trust the code running on - * top of the allocator, or additional security measure are in place such - * as MTE + CHERI. - * - * We embed the size class in the bottom 8 bits of an allocator ID (i.e., - * the address of an Alloc's remote_alloc's message_queue; in practice we - * only need 7 bits, but using 8 is conjectured to be faster). The hashing - * algorithm of the Alloc's RemoteCache already ignores the bottom - * "initial_shift" bits, which is, in practice, well above 8. There's a - * static_assert() over there that helps ensure this stays true. - * - * This does mean that we might have message_queues that always collide in - * the hash algorithm, if they're within "initial_shift" of each other. Such - * pairings will substantially decrease performance and so we prohibit them - * and use SNMALLOC_ASSERT to verify that they do not exist in debug builds. - */ - alloc_id_t alloc_id_and_sizeclass; -#endif - - /** - * Set up a remote object. Potentially cache sizeclass and allocator id. - */ - void set_info(alloc_id_t id, sizeclass_t sc) - { -#ifdef SNMALLOC_DONT_CACHE_ALLOCATOR_PTR - UNUSED(id); - sizeclasscache = sc; -#else - alloc_id_and_sizeclass = (id & ~SIZECLASS_MASK) | sc; -#endif - } - - /** - * Return allocator for this object. This may perform amplification. - */ - template - static alloc_id_t - trunc_target_id(CapPtr r, LargeAlloc* large_allocator) - { -#ifdef SNMALLOC_DONT_CACHE_ALLOCATOR_PTR - // Rederive allocator id. - auto r_auth = large_allocator->template capptr_amplify(r); - auto super = Superslab::get(r_auth); - return super->get_allocator()->trunc_id(); -#else - UNUSED(large_allocator); - return r->alloc_id_and_sizeclass & ~SIZECLASS_MASK; -#endif - } - - sizeclass_t sizeclass() - { -#ifdef SNMALLOC_DONT_CACHE_ALLOCATOR_PTR - return sizeclasscache; -#else - return alloc_id_and_sizeclass & SIZECLASS_MASK; -#endif - } - - /** Zero out a Remote tracking structure, return pointer to object base */ - template - SNMALLOC_FAST_PATH static CapPtr clear(CapPtr self) - { - pal_zero(self, sizeof(Remote)); - return self.as_void(); - } - }; - - static_assert( - sizeof(Remote) <= MIN_ALLOC_SIZE, - "Needs to be able to fit in smallest allocation."); - - struct RemoteAllocator - { - using alloc_id_t = Remote::alloc_id_t; - // Store the message queue on a separate cacheline. It is mutable data that - // is read by other threads. - alignas(CACHELINE_SIZE) - MPSCQ message_queue; - - alloc_id_t trunc_id() - { - return static_cast( - reinterpret_cast(&message_queue)) & - ~SIZECLASS_MASK; - } - }; - - /* - * A singly-linked list of Remote objects, supporting append and - * take-all operations. Intended only for the private use of this - * allocator; the Remote objects here will later be taken and pushed - * to the inter-thread message queues. - */ - struct RemoteList - { - /* - * A stub Remote object that will always be the head of this list; - * never taken for further processing. - */ - Remote head{}; - - CapPtr last{&head}; - - void clear() - { - last = CapPtr(&head); - } - - bool empty() - { - return address_cast(last) == address_cast(&head); - } - }; - - struct RemoteCache - { - /** - * The total amount of memory we are waiting for before we will dispatch - * to other allocators. Zero or negative mean we should dispatch on the - * next remote deallocation. This is initialised to the 0 so that we - * always hit a slow path to start with, when we hit the slow path and - * need to dispatch everything, we can check if we are a real allocator - * and lazily provide a real allocator. - */ - int64_t capacity{0}; - std::array list{}; - - /// Used to find the index into the array of queues for remote - /// deallocation - /// r is used for which round of sending this is. - template - inline size_t get_slot(size_t id, size_t r) - { - constexpr size_t allocator_size = sizeof(Alloc); - constexpr size_t initial_shift = - bits::next_pow2_bits_const(allocator_size); - static_assert( - initial_shift >= 8, - "Can't embed sizeclass_t into allocator ID low bits"); - SNMALLOC_ASSERT((initial_shift + (r * REMOTE_SLOT_BITS)) < 64); - return (id >> (initial_shift + (r * REMOTE_SLOT_BITS))) & REMOTE_MASK; - } - - template - SNMALLOC_FAST_PATH void dealloc( - Remote::alloc_id_t target_id, - CapPtr p, - sizeclass_t sizeclass) - { - this->capacity -= sizeclass_to_size(sizeclass); - auto r = p.template as_reinterpret(); - - r->set_info(target_id, sizeclass); - - RemoteList* l = &list[get_slot(target_id, 0)]; - l->last->non_atomic_next = r; - l->last = r; - } - - template - void post(Alloc* allocator, Remote::alloc_id_t id) - { - // When the cache gets big, post lists to their target allocators. - capacity = REMOTE_CACHE; - - size_t post_round = 0; - - while (true) - { - auto my_slot = get_slot(id, post_round); - - for (size_t i = 0; i < REMOTE_SLOTS; i++) - { - if (i == my_slot) - continue; - - RemoteList* l = &list[i]; - CapPtr first = l->head.non_atomic_next; - - if (!l->empty()) - { - // Send all slots to the target at the head of the list. - auto first_auth = - allocator->large_allocator.template capptr_amplify(first); - auto super = Superslab::get(first_auth); - super->get_allocator()->message_queue.enqueue(first, l->last); - l->clear(); - } - } - - RemoteList* resend = &list[my_slot]; - if (resend->empty()) - break; - - // Entries could map back onto the "resend" list, - // so take copy of the head, mark the last element, - // and clear the original list. - CapPtr r = resend->head.non_atomic_next; - resend->last->non_atomic_next = nullptr; - resend->clear(); - - post_round++; - - while (r != nullptr) - { - // Use the next N bits to spread out remote deallocs in our own - // slot. - size_t slot = get_slot( - Remote::trunc_target_id(r, &allocator->large_allocator), - post_round); - RemoteList* l = &list[slot]; - l->last->non_atomic_next = r; - l->last = r; - - r = r->non_atomic_next; - } - } - } - }; - -} // namespace snmalloc diff --git a/src/mem/sizeclass.h b/src/mem/sizeclass.h deleted file mode 100644 index 1a94cb573..000000000 --- a/src/mem/sizeclass.h +++ /dev/null @@ -1,89 +0,0 @@ -#pragma once - -#include "../pal/pal.h" -#include "allocconfig.h" - -namespace snmalloc -{ - // Both usings should compile - // We use size_t as it generates better code. - using sizeclass_t = size_t; - // using sizeclass_t = uint8_t; - using sizeclass_compress_t = uint8_t; - - constexpr static uintptr_t SIZECLASS_MASK = 0xFF; - - constexpr static uint16_t get_initial_offset(sizeclass_t sc, bool is_short); - constexpr static uint16_t get_slab_capacity(sizeclass_t sc, bool is_short); - - constexpr static size_t sizeclass_to_size(sizeclass_t sizeclass); - constexpr static uint16_t medium_slab_free(sizeclass_t sizeclass); - static sizeclass_t size_to_sizeclass(size_t size); - - constexpr static inline sizeclass_t size_to_sizeclass_const(size_t size) - { - // Don't use sizeclasses that are not a multiple of the alignment. - // For example, 24 byte allocations can be - // problematic for some data due to alignment issues. - auto sc = static_cast( - bits::to_exp_mant_const(size)); - - SNMALLOC_ASSERT(sc == static_cast(sc)); - - return sc; - } - - constexpr static inline size_t large_sizeclass_to_size(uint8_t large_class) - { - return bits::one_at_bit(large_class + SUPERSLAB_BITS); - } - - // Small classes range from [MIN, SLAB], i.e. inclusive. - static constexpr size_t NUM_SMALL_CLASSES = - size_to_sizeclass_const(bits::one_at_bit(SLAB_BITS)) + 1; - - static constexpr size_t NUM_SIZECLASSES = - size_to_sizeclass_const(SUPERSLAB_SIZE); - - // Medium classes range from (SLAB, SUPERSLAB), i.e. non-inclusive. - static constexpr size_t NUM_MEDIUM_CLASSES = - NUM_SIZECLASSES - NUM_SMALL_CLASSES; - - // Large classes range from [SUPERSLAB, ADDRESS_SPACE). - static constexpr size_t NUM_LARGE_CLASSES = - bits::ADDRESS_BITS - SUPERSLAB_BITS; - - SNMALLOC_FAST_PATH static size_t aligned_size(size_t alignment, size_t size) - { - // Client responsible for checking alignment is not zero - SNMALLOC_ASSERT(alignment != 0); - // Client responsible for checking alignment is a power of two - SNMALLOC_ASSERT(bits::is_pow2(alignment)); - - return ((alignment - 1) | (size - 1)) + 1; - } - - SNMALLOC_FAST_PATH static size_t round_size(size_t size) - { - if (size > sizeclass_to_size(NUM_SIZECLASSES - 1)) - { - return bits::next_pow2(size); - } - if (size == 0) - { - size = 1; - } - return sizeclass_to_size(size_to_sizeclass(size)); - } - - // Uses table for reciprocal division, so provide forward reference. - static bool is_multiple_of_sizeclass(sizeclass_t sc, size_t offset); - - /// Returns the alignment that this size naturally has, that is - /// all allocations of size `size` will be aligned to the returned value. - SNMALLOC_FAST_PATH static size_t natural_alignment(size_t size) - { - auto rsize = round_size(size); - return bits::one_at_bit(bits::ctz(rsize)); - } -} // namespace snmalloc diff --git a/src/mem/sizeclasstable.h b/src/mem/sizeclasstable.h deleted file mode 100644 index 322fa2f3c..000000000 --- a/src/mem/sizeclasstable.h +++ /dev/null @@ -1,211 +0,0 @@ -#pragma once - -#include "../ds/helpers.h" -#include "superslab.h" - -namespace snmalloc -{ - constexpr size_t PTR_BITS = bits::next_pow2_bits_const(sizeof(void*)); - - constexpr static SNMALLOC_PURE size_t sizeclass_lookup_index(const size_t s) - { - // We subtract and shirt to reduce the size of the table, i.e. we don't have - // to store a value for every size class. - // We could shift by MIN_ALLOC_BITS, as this would give us the most - // compressed table, but by shifting by PTR_BITS the code-gen is better - // as the most important path using this subsequently shifts left by - // PTR_BITS, hence they can be fused into a single mask. - return (s - 1) >> PTR_BITS; - } - - constexpr static size_t sizeclass_lookup_size = - sizeclass_lookup_index(SLAB_SIZE + 1); - - struct SizeClassTable - { - sizeclass_t sizeclass_lookup[sizeclass_lookup_size] = {{}}; - ModArray size; - ModArray initial_offset_ptr; - ModArray short_initial_offset_ptr; - ModArray capacity; - ModArray short_capacity; - ModArray medium_slab_slots; - // Table of constants for reciprocal division for each sizeclass. - ModArray div_mult; - // Table of constants for reciprocal modulus for each sizeclass. - ModArray mod_mult; - - constexpr SizeClassTable() - : size(), - initial_offset_ptr(), - short_initial_offset_ptr(), - capacity(), - short_capacity(), - medium_slab_slots(), - div_mult(), - mod_mult() - { - size_t curr = 1; - for (sizeclass_t sizeclass = 0; sizeclass < NUM_SIZECLASSES; sizeclass++) - { - size[sizeclass] = - bits::from_exp_mant(sizeclass); - - div_mult[sizeclass] = - (bits::one_at_bit(bits::BITS - SUPERSLAB_BITS) / - (size[sizeclass] / MIN_ALLOC_SIZE)); - if (!bits::is_pow2(size[sizeclass])) - div_mult[sizeclass]++; - - mod_mult[sizeclass] = - (bits::one_at_bit(bits::BITS - 1) / size[sizeclass]); - if (!bits::is_pow2(size[sizeclass])) - mod_mult[sizeclass]++; - // Shift multiplier, so that the result of division completely - // overflows, and thus the top SUPERSLAB_BITS will be zero if the mod is - // zero. - mod_mult[sizeclass] *= 2; - - if (sizeclass < NUM_SMALL_CLASSES) - { - for (; curr <= size[sizeclass]; curr += 1 << PTR_BITS) - { - sizeclass_lookup[sizeclass_lookup_index(curr)] = sizeclass; - } - } - } - - size_t header_size = sizeof(Superslab); - size_t short_slab_size = SLAB_SIZE - header_size; - - for (sizeclass_t i = 0; i < NUM_SMALL_CLASSES; i++) - { - // We align to the end of the block to remove special cases for the - // short block. Calculate remainders - size_t short_correction = short_slab_size % size[i]; - size_t correction = SLAB_SIZE % size[i]; - - // First element in the block is the link - initial_offset_ptr[i] = static_cast(correction); - short_initial_offset_ptr[i] = - static_cast(header_size + short_correction); - - capacity[i] = static_cast( - (SLAB_SIZE - initial_offset_ptr[i]) / (size[i])); - short_capacity[i] = static_cast( - (SLAB_SIZE - short_initial_offset_ptr[i]) / (size[i])); - } - - for (sizeclass_t i = NUM_SMALL_CLASSES; i < NUM_SIZECLASSES; i++) - { - medium_slab_slots[i - NUM_SMALL_CLASSES] = static_cast( - (SUPERSLAB_SIZE - Mediumslab::header_size()) / size[i]); - } - } - }; - - static constexpr SizeClassTable sizeclass_metadata = SizeClassTable(); - - static inline constexpr uint16_t - get_initial_offset(sizeclass_t sc, bool is_short) - { - if (is_short) - return sizeclass_metadata.short_initial_offset_ptr[sc]; - - return sizeclass_metadata.initial_offset_ptr[sc]; - } - - static inline constexpr uint16_t - get_slab_capacity(sizeclass_t sc, bool is_short) - { - if (is_short) - return sizeclass_metadata.short_capacity[sc]; - - return sizeclass_metadata.capacity[sc]; - } - - constexpr static inline size_t sizeclass_to_size(sizeclass_t sizeclass) - { - return sizeclass_metadata.size[sizeclass]; - } - - static inline sizeclass_t size_to_sizeclass(size_t size) - { - if ((size - 1) <= (SLAB_SIZE - 1)) - { - auto index = sizeclass_lookup_index(size); - SNMALLOC_ASSUME(index <= sizeclass_lookup_index(SLAB_SIZE)); - return sizeclass_metadata.sizeclass_lookup[index]; - } - - // Don't use sizeclasses that are not a multiple of the alignment. - // For example, 24 byte allocations can be - // problematic for some data due to alignment issues. - return static_cast( - bits::to_exp_mant(size)); - } - - constexpr static inline uint16_t medium_slab_free(sizeclass_t sizeclass) - { - return sizeclass_metadata - .medium_slab_slots[(sizeclass - NUM_SMALL_CLASSES)]; - } - - inline static size_t round_by_sizeclass(sizeclass_t sc, size_t offset) - { - // Only works up to certain offsets, exhaustively tested upto - // SUPERSLAB_SIZE. - SNMALLOC_ASSERT(offset <= SUPERSLAB_SIZE); - - auto rsize = sizeclass_to_size(sc); - - if constexpr (bits::is64()) - { - // Only works for 64 bit multiplication, as the following will overflow in - // 32bit. - // The code is using reciprocal division. If SUPERSLABS - // get larger then we should review this code. For 24 bits, there are in - // sufficient bits to do this completely efficiently as 24 * 3 is larger - // than 64 bits. But we can pre-round by MIN_ALLOC_SIZE which gets us an - // extra 4 * 3 bits, and thus achievable in 64bit multiplication. - static_assert( - SUPERSLAB_BITS <= 24, "The following code assumes max of 24 bits"); - - return (((offset >> MIN_ALLOC_BITS) * sizeclass_metadata.div_mult[sc]) >> - (bits::BITS - SUPERSLAB_BITS)) * - rsize; - } - else - // Use 32-bit division as considerably faster than 64-bit, and - // everything fits into 32bits here. - return static_cast(offset / rsize) * rsize; - } - - inline static bool is_multiple_of_sizeclass(sizeclass_t sc, size_t offset) - { - // Only works up to certain offsets, exhaustively tested upto - // SUPERSLAB_SIZE. - SNMALLOC_ASSERT(offset <= SUPERSLAB_SIZE); - - if constexpr (bits::is64()) - { - // Only works for 64 bit multiplication, as the following will overflow in - // 32bit. - // The code is using reciprocal division. If SUPERSLABS - // get larger then we should review this code. The modulus code - // has fewer restrictions than division, as it only requires the - // square of the offset to be representable. - static_assert( - SUPERSLAB_BITS <= 24, "The following code assumes max of 24 bits"); - static constexpr size_t MASK = - ~(bits::one_at_bit(bits::BITS - 1 - SUPERSLAB_BITS) - 1); - - return ((offset * sizeclass_metadata.mod_mult[sc]) & MASK) == 0; - } - else - // Use 32-bit division as considerably faster than 64-bit, and - // everything fits into 32bits here. - return static_cast(offset % sizeclass_to_size(sc)) == 0; - } - -} // namespace snmalloc diff --git a/src/mem/slab.h b/src/mem/slab.h deleted file mode 100644 index d8f8b1f81..000000000 --- a/src/mem/slab.h +++ /dev/null @@ -1,197 +0,0 @@ -#pragma once - -#include "freelist.h" -#include "ptrhelpers.h" -#include "superslab.h" - -#include - -namespace snmalloc -{ - class Slab - { - private: - uint16_t address_to_index(address_t p) - { - // Get the offset from the slab for a memory location. - return static_cast(p - address_cast(this)); - } - - public: - template - static CapPtr get_meta(CapPtr self) - { - static_assert(B == CBArena || B == CBChunkD || B == CBChunk); - - auto super = Superslab::get(self); - return super->get_meta(self); - } - - /** - * Given a bumpptr and a fast_free_list head reference, builds a new free - * list, and stores it in the fast_free_list. It will only create a page - * worth of allocations, or one if the allocation size is larger than a - * page. - */ - static SNMALLOC_FAST_PATH void alloc_new_list( - CapPtr& bumpptr, - FreeListIter& fast_free_list, - size_t rsize, - LocalEntropy& entropy) - { - auto slab_end = pointer_align_up(pointer_offset(bumpptr, 1)); - - FreeListBuilder b; - SNMALLOC_ASSERT(b.empty()); - - b.open(bumpptr); - -#ifdef CHECK_CLIENT - // Structure to represent the temporary list elements - struct PreAllocObject - { - CapPtr next; - }; - // The following code implements Sattolo's algorithm for generating - // random cyclic permutations. This implementation is in the opposite - // direction, so that the original space does not need initialising. This - // is described as outside-in without citation on Wikipedia, appears to be - // Folklore algorithm. - - // Note the wide bounds on curr relative to each of the ->next fields; - // curr is not persisted once the list is built. - CapPtr curr = - pointer_offset(bumpptr, 0).template as_static(); - curr->next = Aal::capptr_bound(curr, rsize); - - uint16_t count = 1; - for (curr = - pointer_offset(curr, rsize).template as_static(); - curr.as_void() < slab_end; - curr = - pointer_offset(curr, rsize).template as_static()) - { - size_t insert_index = entropy.sample(count); - curr->next = std::exchange( - pointer_offset(bumpptr, insert_index * rsize) - .template as_static() - ->next, - Aal::capptr_bound(curr, rsize)); - count++; - } - - // Pick entry into space, and then build linked list by traversing cycle - // to the start. Use ->next to jump from CBArena to CBAlloc. - auto start_index = entropy.sample(count); - auto start_ptr = pointer_offset(bumpptr, start_index * rsize) - .template as_static() - ->next; - auto curr_ptr = start_ptr; - do - { - b.add(FreeObject::make(curr_ptr.as_void()), entropy); - curr_ptr = curr_ptr->next; - } while (curr_ptr != start_ptr); -#else - for (auto p = bumpptr; p < slab_end; p = pointer_offset(p, rsize)) - { - b.add(Aal::capptr_bound(p, rsize), entropy); - } -#endif - // This code consumes everything up to slab_end. - bumpptr = slab_end; - - SNMALLOC_ASSERT(!b.empty()); - b.close(fast_free_list, entropy); - } - - // Returns true, if it deallocation can proceed without changing any status - // bits. Note that this does remove the use from the meta slab, so it - // doesn't need doing on the slow path. - static SNMALLOC_FAST_PATH bool dealloc_fast( - CapPtr self, - CapPtr super, - CapPtr p, - LocalEntropy& entropy) - { - auto meta = super->get_meta(self); - SNMALLOC_ASSERT(!meta->is_unused()); - - if (unlikely(meta->return_object())) - return false; - - // Update the head and the next pointer in the free list. - meta->free_queue.add(p, entropy); - - return true; - } - - // If dealloc fast returns false, then call this. - // This does not need to remove the "use" as done by the fast path. - // Returns a complex return code for managing the superslab meta data. - // i.e. This deallocation could make an entire superslab free. - static SNMALLOC_SLOW_PATH typename Superslab::Action dealloc_slow( - CapPtr self, - SlabList* sl, - CapPtr super, - CapPtr p, - LocalEntropy& entropy) - { - auto meta = super->get_meta(self); - meta->debug_slab_invariant(self, entropy); - - if (meta->is_full()) - { - auto allocated = get_slab_capacity( - meta->sizeclass(), - Metaslab::is_short( - Metaslab::get_slab(Aal::capptr_rebound(super.as_void(), p)))); - // We are not on the sizeclass list. - if (allocated == 1) - { - // Dealloc on the superslab. - if (Metaslab::is_short(self)) - return super->dealloc_short_slab(); - - return super->dealloc_slab(self); - } - - meta->free_queue.add(p, entropy); - // Remove trigger threshold from how many we need before we have fully - // freed the slab. - meta->needed() = - allocated - meta->threshold_for_waking_slab(Metaslab::is_short(self)); - - // Push on the list of slabs for this sizeclass. - // ChunkD-to-Chunk conversion might apply bounds, so we need to do so to - // the aligned object and then shift over to these bounds. - auto super_chunk = capptr_chunk_from_chunkd(super, SUPERSLAB_SIZE); - auto metalink = Aal::capptr_rebound( - super_chunk.as_void(), meta.template as_static()); - sl->insert_prev(metalink); - meta->debug_slab_invariant(self, entropy); - return Superslab::NoSlabReturn; - } - -#ifdef CHECK_CLIENT - size_t count = 1; - // Check free list is well-formed on platforms with - // integers as pointers. - FreeListIter fl; - meta->free_queue.close(fl, entropy); - - while (!fl.empty()) - { - fl.take(entropy); - count++; - } -#endif - - meta->remove(); - - if (Metaslab::is_short(self)) - return super->dealloc_short_slab(); - return super->dealloc_slab(self); - } - }; -} // namespace snmalloc diff --git a/src/mem/superslab.h b/src/mem/superslab.h deleted file mode 100644 index 27775034c..000000000 --- a/src/mem/superslab.h +++ /dev/null @@ -1,272 +0,0 @@ -#pragma once - -#include "../ds/helpers.h" -#include "allocslab.h" -#include "metaslab.h" - -#include - -namespace snmalloc -{ - /** - * Superslabs are, to first approximation, a `CHUNK_SIZE`-sized and -aligned - * region of address space, internally composed of a header (a `Superslab` - * structure) followed by an array of `Slab`s, each `SLAB_SIZE`-sized and - * -aligned. Each active `Slab` holds an array of identically sized - * allocations strung on an invasive free list, which is lazily constructed - * from a bump-pointer allocator (see `Metaslab::alloc_new_list`). - * - * In order to minimize overheads, Slab metadata is held externally, in - * `Metaslab` structures; all `Metaslab`s for the Slabs within a Superslab are - * densely packed within the `Superslab` structure itself. Moreover, as the - * `Superslab` structure is typically much smaller than `SLAB_SIZE`, a "short - * Slab" is overlaid with the `Superslab`. This short Slab can hold only - * allocations that are smaller than the `SLAB_SIZE - sizeof(Superslab)` - * bytes; see `Superslab::is_short_sizeclass`. The Metaslab state for a short - * slabs is constructed in a way that avoids branches on fast paths; - * effectively, the object slots that overlay the `Superslab` at the start are - * omitted from consideration. - */ - class Superslab : public Allocslab - { - private: - friend DLList; - - // Keep the allocator pointer on a separate cache line. It is read by - // other threads, and does not change, so we avoid false sharing. - alignas(CACHELINE_SIZE) - // The superslab is kept on a doubly linked list of superslabs which - // have some space. - CapPtr next; - CapPtr prev; - - // This is a reference to the first unused slab in the free slab list - // It is does not contain the short slab, which is handled using a bit - // in the "used" field below. The list is terminated by pointing to - // the short slab. - // The head linked list has an absolute pointer for head, but the next - // pointers stores in the metaslabs are relative pointers, that is they - // are the relative offset to the next entry minus 1. This means that - // all zeros is a list that chains through all the blocks, so the zero - // initialised memory requires no more work. - Mod head; - - // Represents twice the number of full size slabs used - // plus 1 for the short slab. i.e. using 3 slabs and the - // short slab would be 6 + 1 = 7 - uint16_t used; - - ModArray meta; - - // Used size_t as results in better code in MSVC - template - size_t slab_to_index(CapPtr slab) - { - auto res = (pointer_diff(this, slab.unsafe_capptr) >> SLAB_BITS); - SNMALLOC_ASSERT(res == static_cast(res)); - return static_cast(res); - } - - public: - enum Status - { - Full, - Available, - OnlyShortSlabAvailable, - Empty - }; - - enum Action - { - NoSlabReturn = 0, - NoStatusChange = 1, - StatusChange = 2 - }; - - /** - * Given a highly-privileged pointer pointing to or within an object in - * this slab, return a pointer to the slab headers. - * - * In debug builds on StrictProvenance architectures, we will enforce the - * slab bounds on this returned pointer. In non-debug builds, we will - * return a highly-privileged pointer (i.e., CBArena) instead as these - * pointers are not exposed from the allocator. - */ - template - static SNMALLOC_FAST_PATH CapPtr()> - get(CapPtr p) - { - static_assert(B == CBArena || B == CBChunkD || B == CBChunk); - - return capptr_bound_chunkd( - pointer_align_down(p.as_void()), - SUPERSLAB_SIZE); - } - - static bool is_short_sizeclass(sizeclass_t sizeclass) - { - static_assert(SLAB_SIZE > sizeof(Superslab), "Meta data requires this."); - /* - * size_to_sizeclass_const rounds *up* and returns the smallest class that - * could contain (and so may be larger than) the free space available for - * the short slab. While we could detect the exact fit case and compare - * `<= h` therein, it's simpler to just treat this class as a strict upper - * bound and only permit strictly smaller classes in short slabs. - */ - constexpr sizeclass_t h = - size_to_sizeclass_const(SLAB_SIZE - sizeof(Superslab)); - return sizeclass < h; - } - - void init(RemoteAllocator* alloc) - { - allocator = alloc; - - // If Superslab is larger than a page, then we cannot guarantee it still - // has a valid layout as the subsequent pages could have been freed and - // zeroed, hence only skip initialisation if smaller. - if (kind != Super || (sizeof(Superslab) >= OS_PAGE_SIZE)) - { - if (kind != Fresh) - { - // If this wasn't previously Fresh, we need to zero some things. - used = 0; - for (size_t i = 0; i < SLAB_COUNT; i++) - { - new (&(meta[i])) Metaslab(); - } - } - - // If this wasn't previously a Superslab, we need to set up the - // header. - kind = Super; - // Point head at the first non-short slab. - head = 1; - } - -#ifndef NDEBUG - auto curr = head; - for (size_t i = 0; i < SLAB_COUNT - used - 1; i++) - { - curr = (curr + meta[curr].next() + 1) & (SLAB_COUNT - 1); - } - if (curr != 0) - abort(); - - for (size_t i = 0; i < SLAB_COUNT; i++) - { - SNMALLOC_ASSERT(meta[i].is_unused()); - } -#endif - } - - bool is_empty() - { - return used == 0; - } - - bool is_full() - { - return (used == (((SLAB_COUNT - 1) << 1) + 1)); - } - - bool is_almost_full() - { - return (used >= ((SLAB_COUNT - 1) << 1)); - } - - Status get_status() - { - if (!is_almost_full()) - { - if (!is_empty()) - { - return Available; - } - - return Empty; - } - - if (!is_full()) - { - return OnlyShortSlabAvailable; - } - - return Full; - } - - template - CapPtr get_meta(CapPtr slab) - { - return CapPtr(&meta[slab_to_index(slab)]); - } - - static CapPtr - alloc_short_slab(CapPtr self, sizeclass_t sizeclass) - { - if ((self->used & 1) == 1) - return alloc_slab(self, sizeclass); - - auto slab = self.template as_reinterpret(); - auto& metaz = self->meta[0]; - - metaz.initialise(sizeclass, slab); - - self->used++; - return slab; - } - - static CapPtr - alloc_slab(CapPtr self, sizeclass_t sizeclass) - { - uint8_t h = self->head; - auto slab = pointer_offset(self, (static_cast(h) << SLAB_BITS)) - .template as_static(); - - auto& metah = self->meta[h]; - uint8_t n = metah.next(); - - metah.initialise(sizeclass, slab); - - self->head = h + n + 1; - self->used += 2; - - return slab; - } - - // Returns true, if this alters the value of get_status - template - Action dealloc_slab(CapPtr slab) - { - static_assert(B == CBArena || B == CBChunkD || B == CBChunk); - - // This is not the short slab. - uint8_t index = static_cast(slab_to_index(slab)); - uint8_t n = head - index - 1; - - meta[index].next() = n; - head = index; - bool was_almost_full = is_almost_full(); - used -= 2; - - SNMALLOC_ASSERT(meta[index].is_unused()); - if (was_almost_full || is_empty()) - return StatusChange; - - return NoStatusChange; - } - - // Returns true, if this alters the value of get_status - Action dealloc_short_slab() - { - bool was_full = is_full(); - used--; - - SNMALLOC_ASSERT(meta[0].is_unused()); - if (was_full || is_empty()) - return StatusChange; - - return NoStatusChange; - } - }; -} // namespace snmalloc diff --git a/src/mem/threadalloc.h b/src/mem/threadalloc.h deleted file mode 100644 index 0fd0dc42b..000000000 --- a/src/mem/threadalloc.h +++ /dev/null @@ -1,310 +0,0 @@ -#pragma once - -#include "../ds/helpers.h" -#include "globalalloc.h" -#if defined(SNMALLOC_USE_THREAD_DESTRUCTOR) && \ - defined(SNMALLOC_USE_THREAD_CLEANUP) -#error At most one out of SNMALLOC_USE_THREAD_CLEANUP and SNMALLOC_USE_THREAD_DESTRUCTOR may be defined. -#endif - -extern "C" void _malloc_thread_cleanup(); - -namespace snmalloc -{ -#ifdef SNMALLOC_EXTERNAL_THREAD_ALLOC - /** - * Version of the `ThreadAlloc` interface that does no management of thread - * local state, and just assumes that "ThreadAllocUntyped::get" has been - * declared before including snmalloc.h. As it is included before, it cannot - * know the allocator type, hence the casting. - * - * This class is used only when snmalloc is compiled as part of a runtime, - * which has its own management of the thread local allocator pointer. - */ - class ThreadAllocUntypedWrapper - { - protected: - static void register_cleanup() {} - - public: - static SNMALLOC_FAST_PATH Alloc* get_noncachable() - { - return (Alloc*)ThreadAllocUntyped::get(); - } - - static SNMALLOC_FAST_PATH Alloc* get() - { - return (Alloc*)ThreadAllocUntyped::get(); - } - }; - - /** - * Function passed as a template parameter to `Allocator` to allow lazy - * replacement. This function returns true, if the allocator passed in - * requires initialisation. As the TLS state is managed externally, - * this will always return false. - */ - SNMALLOC_FAST_PATH bool needs_initialisation(void* existing) - { - UNUSED(existing); - return false; - } - - /** - * Function passed as a template parameter to `Allocator` to allow lazy - * replacement. There is nothing to initialise in this case, so we expect - * this to never be called. - */ -# ifdef _MSC_VER -// 32Bit Windows release MSVC is determining this as having unreachable code for -// f(nullptr), which is true. But other platforms don't. Disabling the warning -// seems simplist. -# pragma warning(push) -# pragma warning(disable : 4702) -# endif - SNMALLOC_FAST_PATH void* init_thread_allocator(function_ref f) - { - error("Critical Error: This should never be called."); - return f(nullptr); - } -# ifdef _MSC_VER -# pragma warning(pop) -# endif - - using ThreadAlloc = ThreadAllocUntypedWrapper; -#else - /** - * A global fake allocator object. This never allocates memory and, as a - * result, never owns any slabs. On the slow paths, where it would fetch - * slabs to allocate from, it will discover that it is the placeholder and - * replace itself with the thread-local allocator, allocating one if - * required. This avoids a branch on the fast path. - * - * The fake allocator is a zero initialised area of memory of the correct - * size. All data structures used potentially before initialisation must be - * okay with zero init to move to the slow path, that is, zero must signify - * empty. - */ - inline const char GlobalPlaceHolder[sizeof(Alloc)] = {0}; - inline Alloc* get_GlobalPlaceHolder() - { - // This cast is not legal. Effectively, we want a minimal constructor - // for the global allocator as zero, and then a second constructor for - // the rest. This is UB. - auto a = reinterpret_cast(&GlobalPlaceHolder); - return const_cast(a); - } - - /** - * Common aspects of thread local allocator. Subclasses handle how releasing - * the allocator is triggered. - */ - class ThreadAllocCommon - { - friend void* init_thread_allocator(function_ref); - - protected: - /** - * Thread local variable that is set to true, once `inner_release` - * has been run. If we try to reinitialise the allocator once - * `inner_release` has run, then we can stay on the slow path so we don't - * leak allocators. - * - * This is required to allow for the allocator to be called during - * destructors of other thread_local state. - */ - inline static thread_local bool destructor_has_run = false; - - static inline void inner_release() - { - auto& per_thread = get_reference(); - if (per_thread != get_GlobalPlaceHolder()) - { - current_alloc_pool()->release(per_thread); - destructor_has_run = true; - per_thread = get_GlobalPlaceHolder(); - } - } - - /** - * Default clean up does nothing except print statistics if enabled. - */ - static bool register_cleanup() - { -# ifdef USE_SNMALLOC_STATS - Singleton::get(); -# endif - return false; - } - -# ifdef USE_SNMALLOC_STATS - static void print_stats() - { - Stats s; - current_alloc_pool()->aggregate_stats(s); - s.print(std::cout); - } - - static int atexit_print_stats() noexcept - { - return atexit(print_stats); - } -# endif - - public: - /** - * Returns a reference to the allocator for the current thread. This allows - * the caller to replace the current thread's allocator. - */ - static inline Alloc*& get_reference() - { - // Inline casting as codegen doesn't create a lazy init like this. - static thread_local Alloc* alloc = - const_cast(reinterpret_cast(&GlobalPlaceHolder)); - return alloc; - } - - /** - * Public interface, returns the allocator for this thread, constructing - * one if necessary. - * - * If no operations have been performed on an allocator returned by either - * `get()` nor `get_noncachable()`, then the value contained in the return - * will be an Alloc* that will always use the slow path. - * - * Only use this API if you intend to use the returned allocator just once - * per call, or if you know other calls have already been made to the - * allocator. - */ - static inline Alloc* get_noncachable() - { - return get_reference(); - } - - /** - * Public interface, returns the allocator for this thread, constructing - * one if necessary. - * This incurs a cost, so use `get_noncachable` if you can meet its - * criteria. - */ - static SNMALLOC_FAST_PATH Alloc* get() - { -# ifdef SNMALLOC_PASS_THROUGH - return get_reference(); -# else - auto*& alloc = get_reference(); - if (unlikely(needs_initialisation(alloc)) && !destructor_has_run) - { - // Call `init_thread_allocator` to perform down call in case - // register_clean_up does more. - // During teardown for the destructor based ThreadAlloc this will set - // alloc to GlobalPlaceHolder; - init_thread_allocator([](void*) { return nullptr; }); - } - return alloc; -# endif - } - }; - - /** - * Version of the `ThreadAlloc` interface that uses a hook provided by libc - * to destroy thread-local state. This is the ideal option, because it - * enforces ordering of destruction such that the malloc state is destroyed - * after anything that can allocate memory. - * - * This class is used only when snmalloc is compiled as part of a compatible - * libc (for example, FreeBSD libc). - */ - class ThreadAllocLibcCleanup : public ThreadAllocCommon - { - /** - * Libc will call `_malloc_thread_cleanup` just before a thread terminates. - * This function must be allowed to call back into this class to destroy - * the state. - */ - friend void ::_malloc_thread_cleanup(); - }; - - /** - * Version of the `ThreadAlloc` interface that uses C++ `thread_local` - * destructors for cleanup. If a per-thread allocator is used during the - * destruction of other per-thread data, this class will create a new - * instance and register its destructor, so should eventually result in - * cleanup, but may result in allocators being returned to the global pool - * and then reacquired multiple times. - * - * This implementation depends on nothing outside of a working C++ - * environment and so should be the simplest for initial bringup on an - * unsupported platform. It is currently used in the FreeBSD kernel version. - */ - class ThreadAllocThreadDestructor : public ThreadAllocCommon - { - template - friend class OnDestruct; - - public: - static bool register_cleanup() - { - static thread_local OnDestruct tidier; - - ThreadAllocCommon::register_cleanup(); - - return destructor_has_run; - } - }; - -# ifdef SNMALLOC_USE_THREAD_CLEANUP - using ThreadAlloc = ThreadAllocLibcCleanup; -# else - using ThreadAlloc = ThreadAllocThreadDestructor; -# endif - - /** - * Slow path for the placeholder replacement. - * Function passed as a tempalte parameter to `Allocator` to allow lazy - * replacement. This function initialises the thread local state if requried. - * The simple check that this is the global placeholder is inlined, the rest - * of it is only hit in a very unusual case and so should go off the fast - * path. - * The second component of the return indicates if this TLS is being torndown. - */ - SNMALLOC_FAST_PATH void* init_thread_allocator(function_ref f) - { - auto*& local_alloc = ThreadAlloc::get_reference(); - // If someone reuses a noncachable call, then we can end up here - // with an already initialised allocator. Could either error - // to say stop doing this, or just give them the initialised version. - if (local_alloc == get_GlobalPlaceHolder()) - { - local_alloc = current_alloc_pool()->acquire(); - } - auto result = f(local_alloc); - // Check if we have already run the destructor for the TLS. If so, - // we need to deallocate the allocator. - if (ThreadAlloc::register_cleanup()) - ThreadAlloc::inner_release(); - return result; - } - - /** - * Function passed as a template parameter to `Allocator` to allow lazy - * replacement. This function returns true, if the allocated passed in, - * is the placeholder allocator. If it returns true, then - * `init_thread_allocator` should be called. - */ - SNMALLOC_FAST_PATH bool needs_initialisation(void* existing) - { - return existing == get_GlobalPlaceHolder(); - } -#endif -} // namespace snmalloc -#ifdef SNMALLOC_USE_THREAD_CLEANUP -/** - * Entry point that allows libc to call into the allocator for per-thread - * cleanup. - */ -void _malloc_thread_cleanup() -{ - snmalloc::ThreadAllocLibcCleanup::inner_release(); -} -#endif diff --git a/src/override/malloc-extensions.cc b/src/override/malloc-extensions.cc deleted file mode 100644 index ff621a98e..000000000 --- a/src/override/malloc-extensions.cc +++ /dev/null @@ -1,12 +0,0 @@ -#include "malloc-extensions.h" - -#include "../snmalloc.h" - -using namespace snmalloc; - -void get_malloc_info_v1(malloc_info_v1* stats) -{ - auto next_memory_usage = default_memory_provider().memory_usage(); - stats->current_memory_usage = next_memory_usage.first; - stats->peak_memory_usage = next_memory_usage.second; -} \ No newline at end of file diff --git a/src/override/malloc.cc b/src/override/malloc.cc deleted file mode 100644 index adc272b2e..000000000 --- a/src/override/malloc.cc +++ /dev/null @@ -1,274 +0,0 @@ -#include "../mem/slowalloc.h" -#include "../snmalloc.h" - -#include -#include - -using namespace snmalloc; - -#ifndef SNMALLOC_EXPORT -# define SNMALLOC_EXPORT -#endif -#ifdef SNMALLOC_STATIC_LIBRARY_PREFIX -# define __SN_CONCAT(a, b) a##b -# define __SN_EVALUATE(a, b) __SN_CONCAT(a, b) -# define SNMALLOC_NAME_MANGLE(a) \ - __SN_EVALUATE(SNMALLOC_STATIC_LIBRARY_PREFIX, a) -#elif !defined(SNMALLOC_NAME_MANGLE) -# define SNMALLOC_NAME_MANGLE(a) a -#endif - -#ifndef MALLOC_USABLE_SIZE_QUALIFIER -# define MALLOC_USABLE_SIZE_QUALIFIER -#endif - -extern "C" -{ - void SNMALLOC_NAME_MANGLE(check_start)(void* ptr) - { -#if !defined(NDEBUG) && !defined(SNMALLOC_PASS_THROUGH) - if (ThreadAlloc::get_noncachable()->external_pointer(ptr) != ptr) - { - error("Using pointer that is not to the start of an allocation"); - } -#else - UNUSED(ptr); -#endif - } - - SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(__malloc_end_pointer)(void* ptr) - { - return ThreadAlloc::get_noncachable()->external_pointer(ptr); - } - - SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(malloc)(size_t size) - { - return ThreadAlloc::get_noncachable()->alloc(size); - } - - SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(free)(void* ptr) - { - SNMALLOC_NAME_MANGLE(check_start)(ptr); - ThreadAlloc::get_noncachable()->dealloc(ptr); - } - - SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(cfree)(void* ptr) - { - SNMALLOC_NAME_MANGLE(free)(ptr); - } - - SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(calloc)(size_t nmemb, size_t size) - { - bool overflow = false; - size_t sz = bits::umul(size, nmemb, overflow); - if (overflow) - { - errno = ENOMEM; - return nullptr; - } - return ThreadAlloc::get_noncachable()->alloc(sz); - } - - SNMALLOC_EXPORT - size_t SNMALLOC_NAME_MANGLE(malloc_usable_size)( - MALLOC_USABLE_SIZE_QUALIFIER void* ptr) - { - return ThreadAlloc::get_noncachable()->alloc_size(ptr); - } - - SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(realloc)(void* ptr, size_t size) - { - if (size == (size_t)-1) - { - errno = ENOMEM; - return nullptr; - } - if (ptr == nullptr) - { - return SNMALLOC_NAME_MANGLE(malloc)(size); - } - if (size == 0) - { - SNMALLOC_NAME_MANGLE(free)(ptr); - return nullptr; - } - - SNMALLOC_NAME_MANGLE(check_start)(ptr); - - size_t sz = ThreadAlloc::get_noncachable()->alloc_size(ptr); - // Keep the current allocation if the given size is in the same sizeclass. - if (sz == round_size(size)) - { -#ifdef SNMALLOC_PASS_THROUGH - // snmallocs alignment guarantees can be broken by realloc in pass-through - // this is not exercised, by existing clients, but is tested. - if (pointer_align_up(ptr, natural_alignment(size)) == ptr) - return ptr; -#else - return ptr; -#endif - } - void* p = SNMALLOC_NAME_MANGLE(malloc)(size); - if (p != nullptr) - { - SNMALLOC_NAME_MANGLE(check_start)(p); - sz = bits::min(size, sz); - memcpy(p, ptr, sz); - SNMALLOC_NAME_MANGLE(free)(ptr); - } - return p; - } - -#if !defined(__FreeBSD__) && !defined(__OpenBSD__) - SNMALLOC_EXPORT void* - SNMALLOC_NAME_MANGLE(reallocarray)(void* ptr, size_t nmemb, size_t size) - { - bool overflow = false; - size_t sz = bits::umul(size, nmemb, overflow); - if (overflow) - { - errno = ENOMEM; - return nullptr; - } - return SNMALLOC_NAME_MANGLE(realloc)(ptr, sz); - } -#endif - - SNMALLOC_EXPORT void* - SNMALLOC_NAME_MANGLE(memalign)(size_t alignment, size_t size) - { - if ((alignment == 0) || (alignment == size_t(-1))) - { - errno = EINVAL; - return nullptr; - } - - if ((size + alignment) < size) - { - errno = ENOMEM; - return nullptr; - } - - return SNMALLOC_NAME_MANGLE(malloc)( - size ? aligned_size(alignment, size) : alignment); - } - - SNMALLOC_EXPORT void* - SNMALLOC_NAME_MANGLE(aligned_alloc)(size_t alignment, size_t size) - { - SNMALLOC_ASSERT((size % alignment) == 0); - return SNMALLOC_NAME_MANGLE(memalign)(alignment, size); - } - - SNMALLOC_EXPORT int SNMALLOC_NAME_MANGLE(posix_memalign)( - void** memptr, size_t alignment, size_t size) - { - if ( - ((alignment % sizeof(uintptr_t)) != 0) || - ((alignment & (alignment - 1)) != 0) || (alignment == 0)) - { - return EINVAL; - } - - void* p = SNMALLOC_NAME_MANGLE(memalign)(alignment, size); - if (p == nullptr) - { - return ENOMEM; - } - *memptr = p; - return 0; - } - -#if !defined(__FreeBSD__) && !defined(__OpenBSD__) - SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(valloc)(size_t size) - { - return SNMALLOC_NAME_MANGLE(memalign)(OS_PAGE_SIZE, size); - } -#endif - - SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(pvalloc)(size_t size) - { - if (size == size_t(-1)) - { - errno = ENOMEM; - return nullptr; - } - return SNMALLOC_NAME_MANGLE(memalign)( - OS_PAGE_SIZE, (size + OS_PAGE_SIZE - 1) & ~(OS_PAGE_SIZE - 1)); - } - - // Stub implementations for jemalloc compatibility. - // These are called by FreeBSD's libthr (pthreads) to notify malloc of - // various events. They are currently unused, though we may wish to reset - // statistics on fork if built with statistics. - - SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(_malloc_prefork)(void) {} - SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(_malloc_postfork)(void) {} - SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(_malloc_first_thread)(void) {} - - SNMALLOC_EXPORT int - SNMALLOC_NAME_MANGLE(mallctl)(const char*, void*, size_t*, void*, size_t) - { - return ENOENT; - } - -#ifdef SNMALLOC_EXPOSE_PAGEMAP - /** - * Export the pagemap. The return value is a pointer to the pagemap - * structure. The argument is used to return a pointer to a `PagemapConfig` - * structure describing the type of the pagemap. Static methods on the - * concrete pagemap templates can then be used to safely cast the return from - * this function to the correct type. This allows us to preserve some - * semblance of ABI safety via a pure C API. - */ - SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(snmalloc_chunkmap_global_get)( - PagemapConfig const** config) - { - auto& pm = GlobalChunkmap::pagemap(); - if (config) - { - *config = &ChunkmapPagemap::config; - SNMALLOC_ASSERT(ChunkmapPagemap::cast_to_pagemap(&pm, *config) == &pm); - } - return ± - } -#endif - -#ifdef SNMALLOC_EXPOSE_RESERVE - SNMALLOC_EXPORT void* - SNMALLOC_NAME_MANGLE(snmalloc_reserve_shared)(size_t* size, size_t align) - { - return snmalloc::default_memory_provider().reserve(size, align); - } -#endif - -#if !defined(__PIC__) && !defined(NO_BOOTSTRAP_ALLOCATOR) - // The following functions are required to work before TLS is set up, in - // statically-linked programs. These temporarily grab an allocator from the - // pool and return it. - - void* __je_bootstrap_malloc(size_t size) - { - return get_slow_allocator()->alloc(size); - } - - void* __je_bootstrap_calloc(size_t nmemb, size_t size) - { - bool overflow = false; - size_t sz = bits::umul(size, nmemb, overflow); - if (overflow) - { - errno = ENOMEM; - return nullptr; - } - // Include size 0 in the first sizeclass. - sz = ((sz - 1) >> (bits::BITS - 1)) + sz; - return get_slow_allocator()->alloc(sz); - } - - void __je_bootstrap_free(void* ptr) - { - get_slow_allocator()->dealloc(ptr); - } -#endif -} diff --git a/src/override/rust.cc b/src/override/rust.cc deleted file mode 100644 index 0ef25644d..000000000 --- a/src/override/rust.cc +++ /dev/null @@ -1,45 +0,0 @@ -#define SNMALLOC_NAME_MANGLE(a) sn_##a -#include "malloc.cc" - -#include - -#ifndef SNMALLOC_EXPORT -# define SNMALLOC_EXPORT -#endif - -using namespace snmalloc; - -extern "C" SNMALLOC_EXPORT void* rust_alloc(size_t alignment, size_t size) -{ - return ThreadAlloc::get_noncachable()->alloc(aligned_size(alignment, size)); -} - -extern "C" SNMALLOC_EXPORT void* -rust_alloc_zeroed(size_t alignment, size_t size) -{ - return ThreadAlloc::get_noncachable()->alloc( - aligned_size(alignment, size)); -} - -extern "C" SNMALLOC_EXPORT void -rust_dealloc(void* ptr, size_t alignment, size_t size) -{ - ThreadAlloc::get_noncachable()->dealloc(ptr, aligned_size(alignment, size)); -} - -extern "C" SNMALLOC_EXPORT void* -rust_realloc(void* ptr, size_t alignment, size_t old_size, size_t new_size) -{ - size_t aligned_old_size = aligned_size(alignment, old_size), - aligned_new_size = aligned_size(alignment, new_size); - if ( - size_to_sizeclass(aligned_old_size) == size_to_sizeclass(aligned_new_size)) - return ptr; - void* p = ThreadAlloc::get_noncachable()->alloc(aligned_new_size); - if (p) - { - std::memcpy(p, ptr, old_size < new_size ? old_size : new_size); - ThreadAlloc::get_noncachable()->dealloc(ptr, aligned_old_size); - } - return p; -} diff --git a/src/pal/pal.h b/src/pal/pal.h deleted file mode 100644 index e5343bd0b..000000000 --- a/src/pal/pal.h +++ /dev/null @@ -1,134 +0,0 @@ -#pragma once - -#include "../ds/concept.h" -#include "pal_concept.h" -#include "pal_consts.h" - -// If simultating OE, then we need the underlying platform -#if defined(OPEN_ENCLAVE) -# include "pal_open_enclave.h" -#endif -#if !defined(OPEN_ENCLAVE) || defined(OPEN_ENCLAVE_SIMULATION) -# include "pal_apple.h" -# include "pal_dragonfly.h" -# include "pal_freebsd.h" -# include "pal_freebsd_kernel.h" -# include "pal_haiku.h" -# include "pal_linux.h" -# include "pal_netbsd.h" -# include "pal_noalloc.h" -# include "pal_openbsd.h" -# include "pal_solaris.h" -# include "pal_windows.h" -#endif -#include "pal_plain.h" - -namespace snmalloc -{ -#if !defined(OPEN_ENCLAVE) || defined(OPEN_ENCLAVE_SIMULATION) - using DefaultPal = -# if defined(_WIN32) - PALWindows; -# elif defined(__APPLE__) - PALApple<>; -# elif defined(__linux__) - PALLinux; -# elif defined(FreeBSD_KERNEL) - PALFreeBSDKernel; -# elif defined(__FreeBSD__) - PALFreeBSD; -# elif defined(__HAIKU__) - PALHaiku; -# elif defined(__NetBSD__) - PALNetBSD; -# elif defined(__OpenBSD__) - PALOpenBSD; -# elif defined(__sun) - PALSolaris; -# elif defined(__DragonFly__) - PALDragonfly; -# else -# error Unsupported platform -# endif -#endif - - using Pal = -#if defined(SNMALLOC_MEMORY_PROVIDER) - PALPlainMixin; -#elif defined(OPEN_ENCLAVE) - PALPlainMixin; -#else - DefaultPal; -#endif - - [[noreturn]] SNMALLOC_SLOW_PATH inline SNMALLOC_COLD void - error(const char* const str) - { - Pal::error(str); - } - - // Used to keep Superslab metadata committed. - static constexpr size_t OS_PAGE_SIZE = Pal::page_size; - - /** - * Perform platform-specific adjustment of return pointers. - * - * This is here, rather than in every PAL proper, merely to minimize - * disruption to PALs for platforms that do not support StrictProvenance AALs. - */ - template - static SNMALLOC_FAST_PATH typename std::enable_if_t< - !aal_supports, - CapPtr()>> - capptr_export(CapPtr p) - { - return CapPtr()>(p.unsafe_capptr); - } - - template - static SNMALLOC_FAST_PATH typename std::enable_if_t< - aal_supports, - CapPtr()>> - capptr_export(CapPtr p) - { - return PAL::capptr_export(p); - } - - /** - * A convenience wrapper that avoids the need to litter unsafe accesses with - * every call to PAL::zero. - * - * We do this here rather than plumb CapPtr further just to minimize - * disruption and avoid code bloat. This wrapper ought to compile down to - * nothing if SROA is doing its job. - */ - template - static SNMALLOC_FAST_PATH void pal_zero(CapPtr p, size_t sz) - { - static_assert( - !page_aligned || B == CBArena || B == CBChunkD || B == CBChunk); - PAL::template zero(p.unsafe_capptr, sz); - } - - static_assert( - bits::is_pow2(OS_PAGE_SIZE), "OS_PAGE_SIZE must be a power of two"); - static_assert( - OS_PAGE_SIZE % Aal::smallest_page_size == 0, - "The smallest architectural page size must divide OS_PAGE_SIZE"); - - // Some system headers (e.g. Linux' sys/user.h, FreeBSD's machine/param.h) - // define `PAGE_SIZE` as a macro, while others (e.g. macOS 11's - // mach/machine/vm_param.h) define `PAGE_SIZE` as an extern. We don't use - // `PAGE_SIZE` as our variable name, to avoid conflicts, but if we do see a - // macro definition evaluates to a constant then check that our value matches - // the platform's expected value. -#ifdef PAGE_SIZE - static_assert( -# if __has_builtin(__builtin_constant_p) - !__builtin_constant_p(PAGE_SIZE) || (PAGE_SIZE == OS_PAGE_SIZE), -# else - true, -# endif - "Page size from system header does not match snmalloc config page size."); -#endif -} // namespace snmalloc diff --git a/src/pal/pal_concept.h b/src/pal/pal_concept.h deleted file mode 100644 index bebd130d5..000000000 --- a/src/pal/pal_concept.h +++ /dev/null @@ -1,111 +0,0 @@ -#pragma once - -#ifdef __cpp_concepts -# include "../ds/concept.h" -# include "pal_consts.h" - -# include - -namespace snmalloc -{ - /** - * PALs must advertize the bit vector of their supported features and the - * platform's page size. This concept enforces that these are indeed - * constants that fit in the desired types. (This is subtly different from - * saying that they are the required types; C++ may handle constants without - * much regard for their claimed type.) - */ - template - concept ConceptPAL_static_members = requires() - { - typename std::integral_constant; - typename std::integral_constant; - }; - - /** - * PALs expose an error reporting function which takes a const C string. - */ - template - concept ConceptPAL_error = requires(const char* const str) - { - { PAL::error(str) } -> ConceptSame; - }; - - /** - * PALs expose a basic library of memory operations. - */ - template - concept ConceptPAL_memops = requires(void* vp, std::size_t sz) - { - { PAL::notify_not_using(vp, sz) } noexcept -> ConceptSame; - - { PAL::template notify_using(vp, sz) } noexcept - -> ConceptSame; - { PAL::template notify_using(vp, sz) } noexcept - -> ConceptSame; - - { PAL::template zero(vp, sz) } noexcept -> ConceptSame; - { PAL::template zero(vp, sz) } noexcept -> ConceptSame; - }; - - /** - * Absent any feature flags, the PAL must support a crude primitive allocator - */ - template - concept ConceptPAL_reserve_at_least = - requires(PAL p, void* vp, std::size_t sz) - { - { PAL::reserve_at_least(sz) } noexcept - -> ConceptSame>; - }; - - /** - * Some PALs expose a richer allocator which understands aligned allocations - */ - template - concept ConceptPAL_reserve_aligned = requires(std::size_t sz) - { - { PAL::template reserve_aligned(sz) } noexcept -> ConceptSame; - { PAL::template reserve_aligned(sz) } noexcept - -> ConceptSame; - }; - - /** - * Some PALs can provide memory pressure callbacks. - */ - template - concept ConceptPAL_mem_low_notify = requires(PalNotificationObject* pno) - { - { PAL::expensive_low_memory_check() } -> ConceptSame; - { PAL::register_for_low_memory_callback(pno) } -> ConceptSame; - }; - - template - concept ConceptPAL_get_entropy64 = requires() - { - { PAL::get_entropy64() } -> ConceptSame; - }; - - /** - * PALs ascribe to the conjunction of several concepts. These are broken - * out by the shape of the requires() quantifiers required and by any - * requisite claimed pal_features. PALs not claiming particular features - * are, naturally, not bound by the corresponding concept. - */ - template - concept ConceptPAL = - ConceptPAL_static_members && - ConceptPAL_error && - ConceptPAL_memops && - (!pal_supports || - ConceptPAL_get_entropy64) && - (!pal_supports || - ConceptPAL_mem_low_notify) && - (pal_supports || - (pal_supports && - ConceptPAL_reserve_aligned) || - (!pal_supports && - ConceptPAL_reserve_at_least)); - -} // namespace snmalloc -#endif diff --git a/src/pal/pal_freebsd.h b/src/pal/pal_freebsd.h deleted file mode 100644 index 13d74aec2..000000000 --- a/src/pal/pal_freebsd.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#if defined(__FreeBSD__) && !defined(_KERNEL) -# include "pal_bsd_aligned.h" - -namespace snmalloc -{ - /** - * FreeBSD-specific platform abstraction layer. - * - * This adds FreeBSD-specific aligned allocation to the generic BSD - * implementation. - */ - class PALFreeBSD : public PALBSD_Aligned - { - public: - /** - * Bitmap of PalFeatures flags indicating the optional features that this - * PAL supports. - * - * The FreeBSD PAL does not currently add any features beyond those of a - * generic BSD with support for arbitrary alignment from `mmap`. This - * field is declared explicitly to remind anyone modifying this class to - * add new features that they should add any required feature flags. - */ - static constexpr uint64_t pal_features = PALBSD_Aligned::pal_features; - }; -} // namespace snmalloc -#endif diff --git a/src/pal/pal_linux.h b/src/pal/pal_linux.h deleted file mode 100644 index 8104b335c..000000000 --- a/src/pal/pal_linux.h +++ /dev/null @@ -1,70 +0,0 @@ -#pragma once - -#if defined(__linux__) -# include "../ds/bits.h" -# include "pal_posix.h" - -# include -# include - -extern "C" int puts(const char* str); - -namespace snmalloc -{ - class PALLinux : public PALPOSIX - { - public: - /** - * Bitmap of PalFeatures flags indicating the optional features that this - * PAL supports. - * - * Linux does not support any features other than those in a generic POSIX - * platform. This field is declared explicitly to remind anyone who - * extends this PAL that they may need to extend the set of advertised - * features. - */ - static constexpr uint64_t pal_features = PALPOSIX::pal_features; - - static constexpr size_t page_size = - Aal::aal_name == PowerPC ? 0x10000 : PALPOSIX::page_size; - - /** - * Linux requires an explicit no-reserve flag in `mmap` to guarantee lazy - * commit if /proc/sys/vm/overcommit_memory is set to `heuristic` (0). - * - * https://www.kernel.org/doc/html/latest/vm/overcommit-accounting.html - */ - static constexpr int default_mmap_flags = MAP_NORESERVE; - - /** - * OS specific function for zeroing memory. - * - * Linux implements an unusual interpretation of `MADV_DONTNEED`, which - * immediately resets the pages to the zero state (rather than marking them - * as sensible ones to swap out in high memory pressure). We use this to - * clear the underlying memory range. - */ - template - static void zero(void* p, size_t size) noexcept - { - // QEMU does not seem to be giving the desired behaviour for - // MADV_DONTNEED. switch back to memset only for QEMU. -# ifndef SNMALLOC_QEMU_WORKAROUND - if ( - (page_aligned || is_aligned_block(p, size)) && - (size > 16 * page_size)) - { - // Only use this on large allocations as memset faster, and doesn't - // introduce IPI so faster for small allocations. - SNMALLOC_ASSERT(is_aligned_block(p, size)); - madvise(p, size, MADV_DONTNEED); - } - else -# endif - { - ::memset(p, 0, size); - } - } - }; -} // namespace snmalloc -#endif diff --git a/src/pal/pal_open_enclave.h b/src/pal/pal_open_enclave.h deleted file mode 100644 index 3487d3f93..000000000 --- a/src/pal/pal_open_enclave.h +++ /dev/null @@ -1,83 +0,0 @@ -#pragma once - -#include "ds/address.h" -#include "ds/flaglock.h" -#include "pal_plain.h" - -#include -#ifdef OPEN_ENCLAVE -extern "C" void* oe_memset_s(void* p, size_t p_size, int c, size_t size); -extern "C" int oe_random(void* data, size_t size); -extern "C" [[noreturn]] void oe_abort(); - -namespace snmalloc -{ - class PALOpenEnclave - { - /// Base of OE heap - static inline void* heap_base = nullptr; - - /// Size of OE heap - static inline size_t heap_size; - - // This is infrequently used code, a spin lock simplifies the code - // considerably, and should never be on the fast path. - static inline std::atomic_flag spin_lock; - - public: - /** - * This will be called by oe_allocator_init to set up enclave heap bounds. - */ - static void setup_initial_range(void* base, void* end) - { - heap_size = pointer_diff(base, end); - heap_base = base; - } - - /** - * Bitmap of PalFeatures flags indicating the optional features that this - * PAL supports. - */ - static constexpr uint64_t pal_features = Entropy; - - static constexpr size_t page_size = Aal::smallest_page_size; - - [[noreturn]] static void error(const char* const str) - { - UNUSED(str); - oe_abort(); - } - - static std::pair - reserve_at_least(size_t request_size) noexcept - { - // First call returns the entire address space - // subsequent calls return {nullptr, 0} - FlagLock lock(spin_lock); - if (request_size > heap_size) - return {nullptr, 0}; - - auto result = std::make_pair(heap_base, heap_size); - heap_size = 0; - return result; - } - - template - static void zero(void* p, size_t size) noexcept - { - oe_memset_s(p, size, 0, size); - } - - /** - * Source of Entropy - */ - static uint64_t get_entropy64() - { - uint64_t result = 0; - if (oe_random(&result, sizeof(result)) != OE_OK) - error("Failed to get system randomness"); - return result; - } - }; -} -#endif diff --git a/src/snmalloc.h b/src/snmalloc.h deleted file mode 100644 index 8c2264726..000000000 --- a/src/snmalloc.h +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -#include "mem/threadalloc.h" diff --git a/src/snmalloc/README.md b/src/snmalloc/README.md new file mode 100644 index 000000000..0366a5e77 --- /dev/null +++ b/src/snmalloc/README.md @@ -0,0 +1,40 @@ +Include hierarchy +----------------- + +The `snmalloc/` include path contains all of the snmalloc headers. +These are arranged in a hierarchy such that each of the directories may include ones below, in the following order, starting at the bottom: + + - `ds_core/` provides core data structures that depend on the C++ implementation and nothing else. + This directory includes a number of things that abstract over different language extensions (for example, different built-in function names in different compilers). + - `aal/` provides the architecture abstraction layer (AAL). + This layer provides abstractions over CPU-specific intrinsics and defines things such as the virtual address-space size. + There is a single AAL for an snmalloc instantiation. + - `pal/` provides the platform abstraction layer (PAL). + This exposes OS- or environment-specific abstractions into the rest of the code. + An snmalloc instantiation may use more than one PAL, including ones provided by the user. + - `ds/` includes data structures that may depend on platform services or on features specific to the current CPU. + - `mem/` provides the core allocator abstractions. + The code here is templated over a back-end, which defines a particular embedding of snmalloc. + - `backend_helpers/` provides helper classes for use in defining a back end. + This includes data structures such as pagemap implementations (efficient maps from a chunk address to associated metadata) and buddy allocators for managing address-space ranges. + - `backend/` provides some example implementations for snmalloc embeddings that provide a global memory allocator for an address space. + Users may ignore this entirely and use the types in `mem/` with a custom back end to expose an snmalloc instance with specific behaviour. + Layers above this can be used with a custom configuration by defining `SNMALLOC_PROVIDE_OWN_CONFIG` and exporting a type as `snmalloc::Alloc` that defines the type of an `snmalloc::LocalAllocator` template specialisation. + - `global/` provides some front-end components that assume that snmalloc is available in a global configuration. + - `override/` builds on top of `global/` to provide specific implementations with compatibility with external specifications (for example C `malloc`, C++ `operator new`, jemalloc's `*allocx`, or Rust's `std::alloc`). + +Each layer until `backend_helpers/` provides a single header with the same name as the directory. +Files in higher layers should depend only on the single-file version. +This allows specific files to be moved to a lower layer if appropriate, without too much code churn. + +There is only one exception to this rule: `backend/globalconfig.h`. +This file defines either the default configuration *or* nothing, depending on whether the user has defined `SNMALLOC_PROVIDE_OWN_CONFIG`. +The layers above the back end should include only this file, so that there is a single interception point for externally defined back ends. + +External code should include only the following files: + + - `snmalloc/snmalloc_core.h` includes everything up to `backend_helpers`. + This provides the building blocks required to assemble an snmalloc instance, but does not assume any global configuration. + - `snmalloc/snmalloc_front.h` assumes a global configuration (either user-provided or the default from `snmalloc/backend/globalconfig.h` and exposes all of the functionality that depends on both. + - `snmalloc/snmalloc.h` is a convenience wrapper that includes both of the above files. + - `snmalloc/override/*.cc` can be compiled as-is or included after `snmalloc/snmalloc_core.h` and a custom global allocator definition to provide specific languages' global memory allocator APIs with a custom snmalloc embedding. diff --git a/src/aal/aal.h b/src/snmalloc/aal/aal.h similarity index 56% rename from src/aal/aal.h rename to src/snmalloc/aal/aal.h index d1a71c830..c25ce5f12 100644 --- a/src/aal/aal.h +++ b/src/snmalloc/aal/aal.h @@ -1,7 +1,12 @@ +/** + * The snmalloc architecture abstraction layer. This defines + * CPU-architecture-specific functionality. + * + * Files in this directory may depend on `ds_core` and each other, but nothing + * else in snmalloc. + */ #pragma once -#include "../ds/concept.h" -#include "../ds/defines.h" -#include "../ds/ptrwrap.h" +#include "../ds_core/ds_core.h" #include "aal_concept.h" #include "aal_consts.h" @@ -32,6 +37,14 @@ # define PLATFORM_IS_SPARC #endif +#if defined(__riscv) +# define PLATFORM_IS_RISCV +#endif + +#if defined(__loongarch__) +# define PLATFORM_IS_LOONGARCH +#endif + namespace snmalloc { /** @@ -67,6 +80,66 @@ namespace snmalloc default_address_t, Arch>::address_t; + private: + /** + * SFINAE template and default case. T will be Arch and the second template + * argument defaults at the call site (below). + */ + template + struct default_bits_t + { + static constexpr size_t value = sizeof(size_t) * 8; + }; + + /** + * SFINAE override case. T will be Arch, and the computation in the second + * position yields the type int iff T::bits exists and is a substituion + * failure otherwise. That is, if T::bits exists, this specialization + * shadows the default; otherwise, this specialization has no effect. + */ + template + struct default_bits_t + { + static constexpr size_t value = T::bits; + }; + + public: + /** + * Architectural word width as overridden by the underlying Arch-itecture or + * defaulted as per above. + */ + static constexpr size_t bits = default_bits_t::value; + + private: + /** + * Architectures have a default opinion of their address space size, but + * this is mediated by the platform (e.g., the kernel may cleave the address + * space in twain or my use only some of the radix points available to + * hardware paging mechanisms). + * + * This is more SFINAE-based type-level trickery; see default_bits_t, above, + * for more details. + */ + template + struct default_address_bits_t + { + static constexpr size_t value = (bits == 64) ? 48 : 32; + }; + + /** + * Yet more SFINAE; see default_bits_t for more discussion. Here, the + * computation in the second parameter yields the type int iff + * T::address_bits exists. + */ + template + struct default_address_bits_t + { + static constexpr size_t value = T::address_bits; + }; + + public: + static constexpr size_t address_bits = default_address_bits_t::value; + /** * Prefetch a specific address. * @@ -129,32 +202,22 @@ namespace snmalloc */ template< typename T, - enum capptr_bounds nbounds, - enum capptr_bounds obounds, + SNMALLOC_CONCEPT(capptr::IsBound) BOut, + SNMALLOC_CONCEPT(capptr::IsBound) BIn, typename U = T> - static SNMALLOC_FAST_PATH CapPtr - capptr_bound(CapPtr a, size_t size) noexcept + static SNMALLOC_FAST_PATH CapPtr + capptr_bound(CapPtr a, size_t size) noexcept { - // Impose constraints on bounds annotations. static_assert( - obounds == CBArena || obounds == CBChunkD || obounds == CBChunk || - obounds == CBChunkE); - static_assert(capptr_is_bounds_refinement()); + BIn::spatial > capptr::dimension::Spatial::Alloc, + "Refusing to re-bound Spatial::Alloc CapPtr"); + static_assert( + capptr::is_spatial_refinement(), + "capptr_bound must preserve non-spatial CapPtr dimensions"); UNUSED(size); - return CapPtr(a.template as_static().unsafe_capptr); - } - - /** - * For architectures which do not enforce StrictProvenance, there's nothing - * to be done, so just return the pointer unmodified with new annotation. - */ - template - static SNMALLOC_FAST_PATH CapPtr - capptr_rebound(CapPtr a, CapPtr r) noexcept - { - UNUSED(a); - return CapPtr(r.unsafe_capptr); + return CapPtr::unsafe_from( + a.template as_static().unsafe_ptr()); } }; } // namespace snmalloc @@ -169,19 +232,38 @@ namespace snmalloc # include "aal_powerpc.h" #elif defined(PLATFORM_IS_SPARC) # include "aal_sparc.h" +#elif defined(PLATFORM_IS_RISCV) +# include "aal_riscv.h" +#elif defined(PLATFORM_IS_LOONGARCH) +# include "aal_loongarch.h" +#endif + +#if defined(__CHERI_PURE_CAPABILITY__) +# include "aal_cheri.h" #endif namespace snmalloc { +#if defined(__CHERI_PURE_CAPABILITY__) + using Aal = AAL_Generic>; +#else using Aal = AAL_Generic>; +#endif - template + template constexpr static bool aal_supports = (AAL::aal_features & F) == F; -} // namespace snmalloc -#if defined(_MSC_VER) && defined(SNMALLOC_VA_BITS_32) -# include -#endif + /* + * The backend's leading-order response to StrictProvenance is entirely + * within its data structures and not actually anything to do with the + * architecture. Rather than test aal_supports or + * defined(__CHERI_PURE_CAPABILITY__) or such therein, using this + * backend_strict_provenance flag makes it easy to test a lot of machinery + * on non-StrictProvenance architectures. + */ + static constexpr bool backend_strict_provenance = + aal_supports; +} // namespace snmalloc #ifdef __POINTER_WIDTH__ # if ((__POINTER_WIDTH__ == 64) && !defined(SNMALLOC_VA_BITS_64)) || \ @@ -199,3 +281,7 @@ static_assert(sizeof(size_t) == 4); #elif defined(SNMALLOC_VA_BITS_64) static_assert(sizeof(size_t) == 8); #endif + +// Included after the AAL has been defined, depends on the AAL's notion of an +// address +#include "address.h" diff --git a/src/aal/aal_arm.h b/src/snmalloc/aal/aal_arm.h similarity index 89% rename from src/aal/aal_arm.h rename to src/snmalloc/aal/aal_arm.h index b238de5c4..39bcd95a4 100644 --- a/src/aal/aal_arm.h +++ b/src/snmalloc/aal/aal_arm.h @@ -48,12 +48,12 @@ namespace snmalloc { #ifdef _MSC_VER __prefetch(ptr); -#else -# ifdef SNMALLOC_VA_BITS_64 +#elif __has_builtin(__builtin_prefetch) && !defined(SNMALLOC_NO_AAL_BUILTINS) + __builtin_prefetch(ptr); +#elif defined(SNMALLOC_VA_BITS_64) __asm__ volatile("prfm pldl1keep, [%0]" : "=r"(ptr)); -# else +#else __asm__ volatile("pld\t[%0]" : "=r"(ptr)); -# endif #endif } }; diff --git a/src/snmalloc/aal/aal_cheri.h b/src/snmalloc/aal/aal_cheri.h new file mode 100644 index 000000000..25f35bb6b --- /dev/null +++ b/src/snmalloc/aal/aal_cheri.h @@ -0,0 +1,92 @@ +#pragma once + +#include "../ds_core/ds_core.h" + +#include + +namespace snmalloc +{ + /** + * A mixin AAL that applies CHERI to a `Base` architecture. Gives + * architectural teeth to the capptr_bound primitive. + */ + template + class AAL_CHERI : public Base + { + public: + /** + * CHERI pointers are not integers and come with strict provenance + * requirements. + */ + static constexpr uint64_t aal_features = + (Base::aal_features & ~IntegerPointers) | StrictProvenance; + + enum AalCheriFeatures : uint64_t + { + /** + * This CHERI flavor traps if the capability input to a bounds-setting + * instruction has its tag clear, rather than just leaving the output + * untagged. + * + * For example, CHERI-RISC-V's CSetBoundsExact traps in contrast to + * Morello's SCBNDSE. + */ + SetBoundsTrapsUntagged = (1 << 0), + + /** + * This CHERI flavor traps if the capability input to a + * permissions-masking instruction has its tag clear, rather than just + * leaving the output untagged. + * + * For example, CHERI-RISC-V's CAndPerms traps in contrast to Morello's + * CLRPERM. + */ + AndPermsTrapsUntagged = (1 << 0), + }; + + /** + * Specify "features" of the particular CHERI machine we're running on. + */ + static constexpr uint64_t aal_cheri_features = + /* CHERI-RISC-V prefers to trap on untagged inputs. Morello does not. */ + (Base::aal_name == RISCV ? + SetBoundsTrapsUntagged | AndPermsTrapsUntagged : + 0); + + /** + * On CHERI-aware compilers, ptraddr_t is an integral type that is wide + * enough to hold any address that may be contained within a memory + * capability. It does not carry provenance: it is not a capability, but + * merely an address. + */ + typedef ptraddr_t address_t; + + template< + typename T, + SNMALLOC_CONCEPT(capptr::IsBound) BOut, + SNMALLOC_CONCEPT(capptr::IsBound) BIn, + typename U = T> + static SNMALLOC_FAST_PATH CapPtr + capptr_bound(CapPtr a, size_t size) noexcept + { + static_assert( + BIn::spatial > capptr::dimension::Spatial::Alloc, + "Refusing to re-bound Spatial::Alloc CapPtr"); + static_assert( + capptr::is_spatial_refinement(), + "capptr_bound must preserve non-spatial CapPtr dimensions"); + SNMALLOC_ASSERT(__builtin_cheri_tag_get(a.unsafe_ptr())); + + if constexpr (aal_cheri_features & SetBoundsTrapsUntagged) + { + if (a == nullptr) + { + return nullptr; + } + } + + void* pb = __builtin_cheri_bounds_set_exact(a.unsafe_ptr(), size); + return CapPtr::unsafe_from(static_cast(pb)); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/aal/aal_concept.h b/src/snmalloc/aal/aal_concept.h new file mode 100644 index 000000000..4fa0896ff --- /dev/null +++ b/src/snmalloc/aal/aal_concept.h @@ -0,0 +1,69 @@ +#pragma once + +#ifdef __cpp_concepts +# include "../ds_core/ds_core.h" +# include "aal_consts.h" + +# include +# include + +namespace snmalloc +{ + /** + * AALs must advertise the bit vector of supported features, their name, + * machine word size, and an upper bound on the address space size + */ + template + concept IsAAL_static_members = requires() + { + typename std::integral_constant; + typename std::integral_constant; + typename std::integral_constant; + typename std::integral_constant; + }; + + /** + * AALs provide a prefetch operation. + */ + template + concept IsAAL_prefetch = requires(void* ptr) + { + { + AAL::prefetch(ptr) + } + noexcept->ConceptSame; + }; + + /** + * AALs provide a notion of high-precision timing. + */ + template + concept IsAAL_tick = requires() + { + { + AAL::tick() + } + noexcept->ConceptSame; + }; + + template + concept IsAAL_capptr_methods = + requires(capptr::Chunk auth, capptr::AllocFull ret, size_t sz) + { + /** + * Produce a pointer with reduced authority from a more privilged pointer. + * The resulting pointer will have base at auth's address and length of + * exactly sz. auth+sz must not exceed auth's limit. + */ + { + AAL::template capptr_bound(auth, sz) + } + noexcept->ConceptSame>; + }; + + template + concept IsAAL = IsAAL_static_members&& IsAAL_prefetch&& + IsAAL_tick&& IsAAL_capptr_methods; + +} // namespace snmalloc +#endif diff --git a/src/aal/aal_consts.h b/src/snmalloc/aal/aal_consts.h similarity index 97% rename from src/aal/aal_consts.h rename to src/snmalloc/aal/aal_consts.h index 24b31ff73..afc4740ce 100644 --- a/src/aal/aal_consts.h +++ b/src/snmalloc/aal/aal_consts.h @@ -33,5 +33,7 @@ namespace snmalloc X86, X86_SGX, Sparc, + RISCV, + LoongArch }; } // namespace snmalloc diff --git a/src/snmalloc/aal/aal_loongarch.h b/src/snmalloc/aal/aal_loongarch.h new file mode 100644 index 000000000..6e25c60ae --- /dev/null +++ b/src/snmalloc/aal/aal_loongarch.h @@ -0,0 +1,57 @@ +#pragma once + +#if __SIZEOF_POINTER__ == 8 +# define SNMALLOC_VA_BITS_64 +#else +# define SNMALLOC_VA_BITS_32 +#endif + +#include +namespace snmalloc +{ + /** + * Loongarch-specific architecture abstraction layer. + */ + class AAL_LoongArch + { + public: + /** + * Bitmap of AalFeature flags + */ + static constexpr uint64_t aal_features = + IntegerPointers | NoCpuCycleCounters; + + static constexpr enum AalName aal_name = LoongArch; + + static constexpr size_t smallest_page_size = 0x1000; + + /** + * On pipelined processors, notify the core that we are in a spin loop and + * that speculative execution past this point may not be a performance gain. + */ + static inline void pause() + { + __asm__ __volatile__("dbar 0" : : : "memory"); + } + + /** + * PRELD reads a cache-line of data from memory in advance into the Cache. + * The access address is the 12bit immediate number of the value in the + * general register rj plus the symbol extension. + * + * The processor learns from the hint in the PRELD instruction what type + * will be acquired and which level of Cache the data to be taken back fill + * in, hint has 32 optional values (0 to 31), 0 represents load to level 1 + * Cache If the Cache attribute of the access address of the PRELD + * instruction is not cached, then the instruction cannot generate a memory + * access action and is treated as a NOP instruction. The PRELD instruction + * will not trigger any exceptions related to MMU or address. + */ + static inline void prefetch(void* ptr) + { + __asm__ volatile("preld 0, %0, 0" : "=r"(ptr)); + } + }; + + using AAL_Arch = AAL_LoongArch; +} // namespace snmalloc \ No newline at end of file diff --git a/src/aal/aal_powerpc.h b/src/snmalloc/aal/aal_powerpc.h similarity index 100% rename from src/aal/aal_powerpc.h rename to src/snmalloc/aal/aal_powerpc.h diff --git a/src/snmalloc/aal/aal_riscv.h b/src/snmalloc/aal/aal_riscv.h new file mode 100644 index 000000000..2d2f7a4f1 --- /dev/null +++ b/src/snmalloc/aal/aal_riscv.h @@ -0,0 +1,54 @@ +#pragma once + +#if __riscv_xlen == 64 +# define SNMALLOC_VA_BITS_64 +#elif __riscv_xlen == 32 +# define SNMALLOC_VA_BITS_32 +#endif + +namespace snmalloc +{ + /** + * RISC-V architecture layer, phrased as generically as possible. Specific + * implementations may need to adjust some of these. + */ + class AAL_RISCV + { + public: + static constexpr uint64_t aal_features = IntegerPointers; + + static constexpr size_t smallest_page_size = 0x1000; + + static constexpr AalName aal_name = RISCV; + + static void inline pause() + { + /* + * The "Zihintpause" extension claims to be the right thing to do here, + * and it is expected to be used in analogous places, e.g., Linux's + * cpu_relax(), but... + * + * its specification is somewhat unusual, in that it talks about the rate + * at which a HART's instructions retire rather than the rate at which + * they are dispatched (Intel's PAUSE instruction explicitly promises + * that it "de-pipelines" the spin-wait loop, for example) or anything + * about memory semantics (Intel's PAUSE docs talk about a possible + * memory order violation and pipeline flush upon loop exit). + * + * we don't yet have examples of what implementations have done. + * + * it's not yet understood by C frontends or assembler, meaning we'd have + * to spell it out by hand, as + * __asm__ volatile(".byte 0xF; .byte 0x0; .byte 0x0; .byte 0x1"); + * + * All told, we just leave this function empty for the moment. The good + * news is that, if and when we do add a PAUSE, the instruction is encoded + * by stealing some dead space of the FENCE instruction and so should be + * available everywhere even if it doesn't do anything on a particular + * microarchitecture. + */ + } + }; + + using AAL_Arch = AAL_RISCV; +} diff --git a/src/aal/aal_sparc.h b/src/snmalloc/aal/aal_sparc.h similarity index 100% rename from src/aal/aal_sparc.h rename to src/snmalloc/aal/aal_sparc.h diff --git a/src/aal/aal_x86.h b/src/snmalloc/aal/aal_x86.h similarity index 100% rename from src/aal/aal_x86.h rename to src/snmalloc/aal/aal_x86.h diff --git a/src/aal/aal_x86_sgx.h b/src/snmalloc/aal/aal_x86_sgx.h similarity index 100% rename from src/aal/aal_x86_sgx.h rename to src/snmalloc/aal/aal_x86_sgx.h diff --git a/src/ds/address.h b/src/snmalloc/aal/address.h similarity index 58% rename from src/ds/address.h rename to src/snmalloc/aal/address.h index 56fe12141..9be599814 100644 --- a/src/ds/address.h +++ b/src/snmalloc/aal/address.h @@ -1,35 +1,41 @@ #pragma once -#include "../pal/pal_consts.h" -#include "bits.h" -#include "ptrwrap.h" +#include "../ds_core/ds_core.h" #include namespace snmalloc { /** - * The type used for an address. Currently, all addresses are assumed to be - * provenance-carrying values and so it is possible to cast back from the - * result of arithmetic on an address_t. Eventually, this will want to be - * separated into two types, one for raw addresses and one for addresses that - * can be cast back to pointers. + * The type used for an address. On CHERI, this is not a provenance-carrying + * value and so cannot be converted back to a pointer. */ using address_t = Aal::address_t; + /** + * Perform arithmetic on a uintptr_t. + */ + inline uintptr_t pointer_offset(uintptr_t base, size_t diff) + { + return base + diff; + } + /** * Perform pointer arithmetic and return the adjusted pointer. */ template inline U* pointer_offset(T* base, size_t diff) { - return reinterpret_cast(reinterpret_cast(base) + diff); + SNMALLOC_ASSERT(base != nullptr); /* Avoid UB */ + return unsafe_from_uintptr( + unsafe_to_uintptr(base) + static_cast(diff)); } - template + template inline CapPtr pointer_offset(CapPtr base, size_t diff) { - return CapPtr(pointer_offset(base.unsafe_capptr, diff)); + return CapPtr::unsafe_from( + pointer_offset(base.unsafe_ptr(), diff)); } /** @@ -38,22 +44,23 @@ namespace snmalloc template inline U* pointer_offset_signed(T* base, ptrdiff_t diff) { + SNMALLOC_ASSERT(base != nullptr); /* Avoid UB */ return reinterpret_cast(reinterpret_cast(base) + diff); } - template + template inline CapPtr pointer_offset_signed(CapPtr base, ptrdiff_t diff) { - return CapPtr( - pointer_offset_signed(base.unsafe_capptr, diff)); + return CapPtr::unsafe_from( + pointer_offset_signed(base.unsafe_ptr(), diff)); } /** * Cast from a pointer type to an address. */ template - inline address_t address_cast(T* ptr) + inline SNMALLOC_FAST_PATH address_t address_cast(T* ptr) { return reinterpret_cast(ptr); } @@ -65,11 +72,15 @@ namespace snmalloc * as per above, and uses the wrapper types in its own definition, e.g., of * capptr_bound. */ + template + inline SNMALLOC_FAST_PATH address_t address_cast(CapPtr a) + { + return address_cast(a.unsafe_ptr()); + } - template - inline address_t address_cast(CapPtr a) + inline SNMALLOC_FAST_PATH address_t address_cast(uintptr_t a) { - return address_cast(a.unsafe_capptr); + return static_cast(a); } /** @@ -91,31 +102,45 @@ namespace snmalloc } /** - * Align a pointer down to a statically specified granularity, which must be a - * power of two. + * Align a uintptr_t down to a statically specified granularity, which must be + * a power of two. */ - template - SNMALLOC_FAST_PATH T* pointer_align_down(void* p) + template + inline uintptr_t pointer_align_down(uintptr_t p) { static_assert(alignment > 0); static_assert(bits::is_pow2(alignment)); if constexpr (alignment == 1) - return static_cast(p); + return p; else { #if __has_builtin(__builtin_align_down) - return static_cast(__builtin_align_down(p, alignment)); + return __builtin_align_down(p, alignment); #else - return reinterpret_cast( - bits::align_down(reinterpret_cast(p), alignment)); + return bits::align_down(p, alignment); #endif } } - template + /** + * Align a pointer down to a statically specified granularity, which must be a + * power of two. + */ + template + inline T* pointer_align_down(void* p) + { + return unsafe_from_uintptr( + pointer_align_down(unsafe_to_uintptr(p))); + } + + template< + size_t alignment, + typename T, + SNMALLOC_CONCEPT(capptr::IsBound) bounds> inline CapPtr pointer_align_down(CapPtr p) { - return CapPtr(pointer_align_down(p.unsafe_capptr)); + return CapPtr::unsafe_from( + pointer_align_down(p.unsafe_ptr())); } template @@ -140,16 +165,20 @@ namespace snmalloc #if __has_builtin(__builtin_align_up) return static_cast(__builtin_align_up(p, alignment)); #else - return reinterpret_cast( - bits::align_up(reinterpret_cast(p), alignment)); + return unsafe_from_uintptr( + bits::align_up(unsafe_to_uintptr(p), alignment)); #endif } } - template + template< + size_t alignment, + typename T = void, + SNMALLOC_CONCEPT(capptr::IsBound) bounds> inline CapPtr pointer_align_up(CapPtr p) { - return CapPtr(pointer_align_up(p.unsafe_capptr)); + return CapPtr::unsafe_from( + pointer_align_up(p.unsafe_ptr())); } template @@ -163,18 +192,26 @@ namespace snmalloc * a power of two. */ template - SNMALLOC_FAST_PATH T* pointer_align_down(void* p, size_t alignment) + inline T* pointer_align_down(void* p, size_t alignment) { SNMALLOC_ASSERT(alignment > 0); SNMALLOC_ASSERT(bits::is_pow2(alignment)); #if __has_builtin(__builtin_align_down) return static_cast(__builtin_align_down(p, alignment)); #else - return reinterpret_cast( - bits::align_down(reinterpret_cast(p), alignment)); + return unsafe_from_uintptr( + bits::align_down(unsafe_to_uintptr(p), alignment)); #endif } + template + inline CapPtr + pointer_align_down(CapPtr p, size_t alignment) + { + return CapPtr::unsafe_from( + pointer_align_down(p.unsafe_ptr(), alignment)); + } + /** * Align a pointer up to a dynamically specified granularity, which must * be a power of two. @@ -187,16 +224,17 @@ namespace snmalloc #if __has_builtin(__builtin_align_up) return static_cast(__builtin_align_up(p, alignment)); #else - return reinterpret_cast( - bits::align_up(reinterpret_cast(p), alignment)); + return unsafe_from_uintptr( + bits::align_up(unsafe_to_uintptr(p), alignment)); #endif } - template + template inline CapPtr pointer_align_up(CapPtr p, size_t alignment) { - return CapPtr(pointer_align_up(p.unsafe_capptr, alignment)); + return CapPtr::unsafe_from( + pointer_align_up(p.unsafe_ptr(), alignment)); } /** @@ -204,21 +242,21 @@ namespace snmalloc * expected to point to the base of some (sub)allocation into which cursor * points; would-be negative answers trip an assertion in debug builds. */ - inline size_t pointer_diff(void* base, void* cursor) + inline size_t pointer_diff(const void* base, const void* cursor) { SNMALLOC_ASSERT(cursor >= base); return static_cast( - static_cast(cursor) - static_cast(base)); + static_cast(cursor) - static_cast(base)); } template< typename T = void, typename U = void, - enum capptr_bounds Tbounds, - enum capptr_bounds Ubounds> + SNMALLOC_CONCEPT(capptr::IsBound) Tbounds, + SNMALLOC_CONCEPT(capptr::IsBound) Ubounds> inline size_t pointer_diff(CapPtr base, CapPtr cursor) { - return pointer_diff(base.unsafe_capptr, cursor.unsafe_capptr); + return pointer_diff(base.unsafe_ptr(), cursor.unsafe_ptr()); } /** @@ -234,12 +272,12 @@ namespace snmalloc template< typename T = void, typename U = void, - enum capptr_bounds Tbounds, - enum capptr_bounds Ubounds> + SNMALLOC_CONCEPT(capptr::IsBound) Tbounds, + SNMALLOC_CONCEPT(capptr::IsBound) Ubounds> inline ptrdiff_t pointer_diff_signed(CapPtr base, CapPtr cursor) { - return pointer_diff_signed(base.unsafe_capptr, cursor.unsafe_capptr); + return pointer_diff_signed(base.unsafe_ptr(), cursor.unsafe_ptr()); } } // namespace snmalloc diff --git a/src/snmalloc/backend/backend.h b/src/snmalloc/backend/backend.h new file mode 100644 index 000000000..9401e71d6 --- /dev/null +++ b/src/snmalloc/backend/backend.h @@ -0,0 +1,178 @@ +#pragma once +#include "../backend_helpers/backend_helpers.h" + +namespace snmalloc +{ + /** + * This class implements the standard backend for handling allocations. + * It is parameterised by its Pagemap management and + * address space management (LocalState). + */ + template< + SNMALLOC_CONCEPT(IsPAL) PAL, + typename PagemapEntry, + typename Pagemap, + typename LocalState> + class BackendAllocator + { + using GlobalMetaRange = typename LocalState::GlobalMetaRange; + using Stats = typename LocalState::Stats; + + public: + using Pal = PAL; + using SlabMetadata = typename PagemapEntry::SlabMetadata; + +#ifdef __cpp_concepts + static_assert(IsSlabMeta_Arena); +#endif + + public: + /** + * Provide a block of meta-data with size and align. + * + * Backend allocator may use guard pages and separate area of + * address space to protect this from corruption. + * + * The template argument is the type of the metadata being allocated. This + * allows the backend to allocate different types of metadata in different + * places or with different policies. The default implementation, here, + * does not avail itself of this degree of freedom. + */ + template + static capptr::Arena + alloc_meta_data(LocalState* local_state, size_t size) + { + capptr::Arena p; + if (local_state != nullptr) + { + p = local_state->get_meta_range().alloc_range_with_leftover(size); + } + else + { + static_assert( + GlobalMetaRange::ConcurrencySafe, + "Global meta data range needs to be concurrency safe."); + GlobalMetaRange global_state; + p = global_state.alloc_range(bits::next_pow2(size)); + } + + if (p == nullptr) + errno = ENOMEM; + + return p; + } + + /** + * Returns a chunk of memory with alignment and size of `size`, and a + * block containing metadata about the slab. + * + * It additionally set the meta-data for this chunk of memory to + * be + * (remote, sizeclass, slab_metadata) + * where slab_metadata, is the second element of the pair return. + */ + static std::pair, SlabMetadata*> + alloc_chunk(LocalState& local_state, size_t size, uintptr_t ras) + { + SNMALLOC_ASSERT(bits::is_pow2(size)); + SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE); + + auto meta_cap = + local_state.get_meta_range().alloc_range(sizeof(SlabMetadata)); + + auto meta = meta_cap.template as_reinterpret().unsafe_ptr(); + + if (meta == nullptr) + { + errno = ENOMEM; + return {nullptr, nullptr}; + } + + capptr::Arena p = local_state.get_object_range()->alloc_range(size); + +#ifdef SNMALLOC_TRACING + message<1024>("Alloc chunk: {} ({})", p.unsafe_ptr(), size); +#endif + if (p == nullptr) + { + local_state.get_meta_range().dealloc_range( + meta_cap, sizeof(SlabMetadata)); + errno = ENOMEM; +#ifdef SNMALLOC_TRACING + message<1024>("Out of memory"); +#endif + return {nullptr, nullptr}; + } + + meta->arena_set(p); + typename Pagemap::Entry t(meta, ras); + Pagemap::set_metaentry(address_cast(p), size, t); + + return {Aal::capptr_bound(p, size), meta}; + } + + /** + * Deallocate a chunk of memory of size `size` and base `alloc`. + * The `slab_metadata` is the meta-data block associated with this + * chunk. The backend can recalculate this, but as the callee will + * already have it, we take it for possibly more optimal code. + * + * LocalState contains all the information about the various ranges + * that are used by the backend to manage the address space. + */ + static void dealloc_chunk( + LocalState& local_state, + SlabMetadata& slab_metadata, + capptr::Alloc alloc, + size_t size) + { + /* + * The backend takes possession of these chunks now, by disassociating + * any existing remote allocator and metadata structure. If + * interrogated, the sizeclass reported by the FrontendMetaEntry is 0, + * which has size 0. + */ + typename Pagemap::Entry t(nullptr, 0); + t.claim_for_backend(); + SNMALLOC_ASSERT_MSG( + Pagemap::get_metaentry(address_cast(alloc)).get_slab_metadata() == + &slab_metadata, + "Slab metadata {} passed for address {} does not match the meta entry " + "{} that is used for that address", + &slab_metadata, + address_cast(alloc), + Pagemap::get_metaentry(address_cast(alloc)).get_slab_metadata()); + Pagemap::set_metaentry(address_cast(alloc), size, t); + + /* + * On CHERI, the passed alloc has had its bounds narrowed to just the + * Chunk, and so we retrieve the Arena-bounded cap for use in the + * remainder of the backend. + */ + capptr::Arena arena = slab_metadata.arena_get(alloc); + + local_state.get_meta_range().dealloc_range( + capptr::Arena::unsafe_from(&slab_metadata), sizeof(SlabMetadata)); + + local_state.get_object_range()->dealloc_range(arena, size); + } + + template + SNMALLOC_FAST_PATH static const PagemapEntry& get_metaentry(address_t p) + { + return Pagemap::template get_metaentry(p); + } + + static size_t get_current_usage() + { + Stats stats_state; + return stats_state.get_current_usage(); + } + + static size_t get_peak_usage() + { + Stats stats_state; + return stats_state.get_peak_usage(); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend/base_constants.h b/src/snmalloc/backend/base_constants.h new file mode 100644 index 000000000..92385a960 --- /dev/null +++ b/src/snmalloc/backend/base_constants.h @@ -0,0 +1,21 @@ + + +#pragma once + +#include "../backend/backend.h" + +namespace snmalloc +{ + /** + * Base range configuration contains common parts of other ranges. + */ + struct BaseLocalStateConstants + { + protected: + // Size of requests that the global cache should use + static constexpr size_t GlobalCacheSizeBits = 24; + + // Size of requests that the local cache should use + static constexpr size_t LocalCacheSizeBits = 21; + }; +} // namespace snmalloc \ No newline at end of file diff --git a/src/snmalloc/backend/fixedglobalconfig.h b/src/snmalloc/backend/fixedglobalconfig.h new file mode 100644 index 000000000..b3d42db16 --- /dev/null +++ b/src/snmalloc/backend/fixedglobalconfig.h @@ -0,0 +1,115 @@ +#pragma once + +#include "../backend_helpers/backend_helpers.h" +#include "standard_range.h" + +namespace snmalloc +{ + /** + * A single fixed address range allocator configuration + */ + template + class FixedRangeConfig final : public CommonConfig + { + public: + using PagemapEntry = DefaultPagemapEntry; + + private: + using ConcretePagemap = + FlatPagemap; + + using Pagemap = BasicPagemap; + + public: + using LocalState = StandardLocalState; + + using GlobalPoolState = PoolState>; + + using Backend = BackendAllocator; + using Pal = PAL; + + private: + inline static GlobalPoolState alloc_pool; + + public: + static GlobalPoolState& pool() + { + return alloc_pool; + } + + /* + * The obvious + * `static constexpr Flags Options{.HasDomesticate = true};` fails on + * Ubuntu 18.04 with an error "sorry, unimplemented: non-trivial + * designated initializers not supported". + * The following was copied from domestication.cc test with the following + * comment: + * C++, even as late as C++20, has some really quite strict limitations on + * designated initializers. However, as of C++17, we can have constexpr + * lambdas and so can use more of the power of the statement fragment of + * C++, and not just its initializer fragment, to initialize a non-prefix + * subset of the flags (in any order, at that). + */ + static constexpr Flags Options = []() constexpr + { + Flags opts = {}; + opts.HasDomesticate = true; + return opts; + } + (); + + // This needs to be a forward reference as the + // thread local state will need to know about this. + // This may allocate, so must be called once a thread + // local allocator exists. + static void register_clean_up() + { + snmalloc::register_clean_up(); + } + + static void init(LocalState* local_state, void* base, size_t length) + { + UNUSED(local_state); + + auto [heap_base, heap_length] = + Pagemap::concretePagemap.init(base, length); + + Pagemap::register_range(address_cast(heap_base), heap_length); + + // Push memory into the global range. + range_to_pow_2_blocks( + capptr::Arena::unsafe_from(heap_base), + heap_length, + [&](capptr::Arena p, size_t sz, bool) { + typename LocalState::GlobalR g; + g.dealloc_range(p, sz); + }); + } + + /* Verify that a pointer points into the region managed by this config */ + template + static SNMALLOC_FAST_PATH CapPtr< + T, + typename B::template with_wildness> + capptr_domesticate(LocalState* ls, CapPtr p) + { + static_assert(B::wildness == capptr::dimension::Wildness::Wild); + + static const size_t sz = sizeof( + std::conditional, void>, void*, T>); + + UNUSED(ls); + auto address = address_cast(p); + auto [base, length] = Pagemap::get_bounds(); + if ((address - base > (length - sz)) || (length < sz)) + { + return nullptr; + } + + return CapPtr< + T, + typename B::template with_wildness>:: + unsafe_from(p.unsafe_ptr()); + } + }; +} diff --git a/src/snmalloc/backend/globalconfig.h b/src/snmalloc/backend/globalconfig.h new file mode 100644 index 000000000..322536a1a --- /dev/null +++ b/src/snmalloc/backend/globalconfig.h @@ -0,0 +1,163 @@ +#pragma once +// If you define SNMALLOC_PROVIDE_OWN_CONFIG then you must provide your own +// definition of `snmalloc::Alloc` before including any files that include +// `snmalloc.h` or consume the global allocation APIs. +#ifndef SNMALLOC_PROVIDE_OWN_CONFIG + +# include "../backend_helpers/backend_helpers.h" +# include "backend.h" +# include "meta_protected_range.h" +# include "standard_range.h" + +# if defined(SNMALLOC_CHECK_CLIENT) && !defined(OPEN_ENCLAVE) +/** + * Protect meta data blocks by allocating separate from chunks for + * user allocations. This involves leaving gaps in address space. + * This is less efficient, so should only be applied for the checked + * build. + * + * On Open Enclave the address space is limited, so we disable this + * feature. + */ +# define SNMALLOC_META_PROTECTED +# endif + +namespace snmalloc +{ + // Forward reference to thread local cleanup. + void register_clean_up(); + + /** + * The default configuration for a global snmalloc. It contains all the + * datastructures to manage the memory from the OS. It had several internal + * public types for various aspects of the code. + * The most notable are: + * + * Backend - Manages the memory coming from the platform. + * LocalState - the per-thread/per-allocator state that may perform local + * caching of reserved memory. This also specifies the various Range types + * used to manage the memory. + * + * The Configuration sets up a Pagemap for the backend to use, and the state + * required to build new allocators (GlobalPoolState). + */ + class StandardConfig final : public CommonConfig + { + using GlobalPoolState = PoolState>; + + public: + using Pal = DefaultPal; + using PagemapEntry = DefaultPagemapEntry; + + private: + using ConcretePagemap = + FlatPagemap; + + using Pagemap = BasicPagemap; + + /** + * This specifies where this configurations sources memory from. + * + * Takes account of any platform specific constraints like whether + * mmap/virtual alloc calls can be consolidated. + * @{ + */ +# if defined(_WIN32) || defined(__CHERI_PURE_CAPABILITY__) + static constexpr bool CONSOLIDATE_PAL_ALLOCS = false; +# else + static constexpr bool CONSOLIDATE_PAL_ALLOCS = true; +# endif + + using Base = Pipe< + PalRange, + PagemapRegisterRange>; + /** + * @} + */ + public: + /** + * Use one of the default range configurations + */ +# ifdef SNMALLOC_META_PROTECTED + using LocalState = MetaProtectedRangeLocalState; +# else + using LocalState = StandardLocalState; +# endif + + /** + * Use the default backend. + */ + using Backend = BackendAllocator; + + private: + SNMALLOC_REQUIRE_CONSTINIT + inline static GlobalPoolState alloc_pool; + + /** + * Specifies if the Configuration has been initialised. + */ + SNMALLOC_REQUIRE_CONSTINIT + inline static std::atomic initialised{false}; + + /** + * Used to prevent two threads attempting to initialise the configuration + */ + SNMALLOC_REQUIRE_CONSTINIT + inline static FlagWord initialisation_lock{}; + + public: + /** + * Provides the state to create new allocators. + */ + static GlobalPoolState& pool() + { + return alloc_pool; + } + + static constexpr Flags Options{}; + + // Performs initialisation for this configuration + // of allocators. Needs to be idempotent, + // and concurrency safe. + static void ensure_init() + { + FlagLock lock{initialisation_lock}; +# ifdef SNMALLOC_TRACING + message<1024>("Run init_impl"); +# endif + + if (initialised) + return; + + LocalEntropy entropy; + entropy.init(); + // Initialise key for remote deallocation lists + key_global = FreeListKey(entropy.get_free_list_key()); + + // Need to initialise pagemap. + Pagemap::concretePagemap.init(); + + initialised = true; + } + + static bool is_initialised() + { + return initialised; + } + + // This needs to be a forward reference as the + // thread local state will need to know about this. + // This may allocate, so should only be called once + // a thread local allocator is available. + static void register_clean_up() + { + snmalloc::register_clean_up(); + } + }; + + /** + * Create allocator type for this configuration. + */ + using Alloc = snmalloc::LocalAllocator; +} // namespace snmalloc +#endif diff --git a/src/snmalloc/backend/meta_protected_range.h b/src/snmalloc/backend/meta_protected_range.h new file mode 100644 index 000000000..5c5795cc0 --- /dev/null +++ b/src/snmalloc/backend/meta_protected_range.h @@ -0,0 +1,129 @@ +#pragma once + +#include "../backend/backend.h" +#include "base_constants.h" + +namespace snmalloc +{ + /** + * Range that carefully ensures meta-data and object data cannot be in + * the same memory range. Once memory has is used for either meta-data + * or object data it can never be recycled to the other. + * + * This configuration also includes guard pages and randomisation. + * + * PAL is the underlying PAL that is used to Commit memory ranges. + * + * Base is where memory is sourced from. + * + * MinSizeBits is the minimum request size that can be passed to Base. + * On Windows this 16 as VirtualAlloc cannot reserve less than 64KiB. + * Alternative configurations might make this 2MiB so that huge pages + * can be used. + */ + template< + typename PAL, + typename Pagemap, + typename Base, + size_t MinSizeBits = MinBaseSizeBits()> + struct MetaProtectedRangeLocalState : BaseLocalStateConstants + { + private: + // Global range of memory + using GlobalR = Pipe< + Base, + LargeBuddyRange< + GlobalCacheSizeBits, + bits::BITS - 1, + Pagemap, + MinSizeBits>, + LogRange<2>, + GlobalRange>; + + static constexpr size_t page_size_bits = + bits::next_pow2_bits_const(PAL::page_size); + + static constexpr size_t max_page_chunk_size_bits = + bits::max(page_size_bits, MIN_CHUNK_BITS); + + // Central source of object-range, does not pass back to GlobalR as + // that would allow flows from Objects to Meta-data, and thus UAF + // would be able to corrupt meta-data. + using CentralObjectRange = Pipe< + GlobalR, + LargeBuddyRange, + LogRange<3>, + GlobalRange, + CommitRange, + StatsRange>; + + // Controls the padding around the meta-data range. + // The larger the padding range the more randomisation that + // can be used. + static constexpr size_t SubRangeRatioBits = 6; + + // Centralised source of meta-range + using CentralMetaRange = Pipe< + GlobalR, + SubRange, // Use SubRange to introduce guard + // pages. + LargeBuddyRange< + GlobalCacheSizeBits, + bits::BITS - 1, + Pagemap, + page_size_bits>, + CommitRange, + // In case of huge pages, we don't want to give each thread its own huge + // page, so commit in the global range. + LargeBuddyRange< + max_page_chunk_size_bits, + max_page_chunk_size_bits, + Pagemap, + page_size_bits>, + LogRange<4>, + GlobalRange, + StatsRange>; + + // Local caching of object range + using ObjectRange = Pipe< + CentralObjectRange, + LargeBuddyRange< + LocalCacheSizeBits, + LocalCacheSizeBits, + Pagemap, + page_size_bits>, + LogRange<5>>; + + // Local caching of meta-data range + using MetaRange = Pipe< + CentralMetaRange, + LargeBuddyRange< + LocalCacheSizeBits - SubRangeRatioBits, + bits::BITS - 1, + Pagemap>, + SmallBuddyRange>; + + ObjectRange object_range; + + MetaRange meta_range; + + public: + using Stats = StatsCombiner; + + ObjectRange* get_object_range() + { + return &object_range; + } + + MetaRange& get_meta_range() + { + return meta_range; + } + + // Create global range that can service small meta-data requests. + // Don't want to add the SmallBuddyRange to the CentralMetaRange as that + // would require committing memory inside the main global lock. + using GlobalMetaRange = + Pipe; + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend/standard_range.h b/src/snmalloc/backend/standard_range.h new file mode 100644 index 000000000..225a9a3e5 --- /dev/null +++ b/src/snmalloc/backend/standard_range.h @@ -0,0 +1,80 @@ + + +#pragma once + +#include "../backend/backend.h" +#include "base_constants.h" + +namespace snmalloc +{ + /** + * Default configuration that does not provide any meta-data protection. + * + * PAL is the underlying PAL that is used to Commit memory ranges. + * + * Base is where memory is sourced from. + * + * MinSizeBits is the minimum request size that can be passed to Base. + * On Windows this 16 as VirtualAlloc cannot reserve less than 64KiB. + * Alternative configurations might make this 2MiB so that huge pages + * can be used. + */ + template< + typename PAL, + typename Pagemap, + typename Base = EmptyRange<>, + size_t MinSizeBits = MinBaseSizeBits()> + struct StandardLocalState : BaseLocalStateConstants + { + // Global range of memory, expose this so can be filled by init. + using GlobalR = Pipe< + Base, + LargeBuddyRange< + GlobalCacheSizeBits, + bits::BITS - 1, + Pagemap, + MinSizeBits>, + LogRange<2>, + GlobalRange>; + + // Track stats of the committed memory + using Stats = Pipe, StatsRange>; + + private: + static constexpr size_t page_size_bits = + bits::next_pow2_bits_const(PAL::page_size); + + public: + // Source for object allocations and metadata + // Use buddy allocators to cache locally. + using LargeObjectRange = Pipe< + Stats, + LargeBuddyRange< + LocalCacheSizeBits, + LocalCacheSizeBits, + Pagemap, + page_size_bits>>; + + private: + using ObjectRange = Pipe; + + ObjectRange object_range; + + public: + // Expose a global range for the initial allocation of meta-data. + using GlobalMetaRange = Pipe; + + // Where we get user allocations from. + LargeObjectRange* get_object_range() + { + return object_range.template ancestor(); + } + + // Where we get meta-data allocations from. + ObjectRange& get_meta_range() + { + // Use the object range to service meta-data requests. + return object_range; + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/backend_helpers.h b/src/snmalloc/backend_helpers/backend_helpers.h new file mode 100644 index 000000000..fa4a708f7 --- /dev/null +++ b/src/snmalloc/backend_helpers/backend_helpers.h @@ -0,0 +1,16 @@ +#include "../mem/mem.h" +#include "buddy.h" +#include "commitrange.h" +#include "commonconfig.h" +#include "defaultpagemapentry.h" +#include "empty_range.h" +#include "globalrange.h" +#include "largebuddyrange.h" +#include "logrange.h" +#include "pagemap.h" +#include "pagemapregisterrange.h" +#include "palrange.h" +#include "range_helpers.h" +#include "smallbuddyrange.h" +#include "statsrange.h" +#include "subrange.h" diff --git a/src/snmalloc/backend_helpers/buddy.h b/src/snmalloc/backend_helpers/buddy.h new file mode 100644 index 000000000..1c9b8e4af --- /dev/null +++ b/src/snmalloc/backend_helpers/buddy.h @@ -0,0 +1,120 @@ +#pragma once + +#include "../ds/ds.h" + +namespace snmalloc +{ + /** + * Class representing a buddy allocator + * + * Underlying node `Rep` representation is passed in. + * + * The allocator can handle blocks between inclusive MIN_SIZE_BITS and + * exclusive MAX_SIZE_BITS. + */ + template + class Buddy + { + std::array, MAX_SIZE_BITS - MIN_SIZE_BITS> trees; + + size_t to_index(size_t size) + { + auto log = snmalloc::bits::next_pow2_bits(size); + SNMALLOC_ASSERT(log >= MIN_SIZE_BITS); + SNMALLOC_ASSERT(log < MAX_SIZE_BITS); + + return log - MIN_SIZE_BITS; + } + + void validate_block(typename Rep::Contents addr, size_t size) + { + SNMALLOC_ASSERT(bits::is_pow2(size)); + SNMALLOC_ASSERT(addr == Rep::align_down(addr, size)); + UNUSED(addr, size); + } + + public: + constexpr Buddy() = default; + /** + * Add a block to the buddy allocator. + * + * Blocks needs to be power of two size and aligned to the same power of + * two. + * + * Returns null, if the block is successfully added. Otherwise, returns the + * consolidated block that is MAX_SIZE_BITS big, and hence too large for + * this allocator. + */ + typename Rep::Contents add_block(typename Rep::Contents addr, size_t size) + { + auto idx = to_index(size); + + validate_block(addr, size); + + auto buddy = Rep::buddy(addr, size); + + auto path = trees[idx].get_root_path(); + bool contains_buddy = trees[idx].find(path, buddy); + + if (contains_buddy) + { + // Only check if we can consolidate after we know the buddy is in + // the buddy allocator. This is required to prevent possible segfaults + // from looking at the buddies meta-data, which we only know exists + // once we have found it in the red-black tree. + if (Rep::can_consolidate(addr, size)) + { + trees[idx].remove_path(path); + + // Add to next level cache + size *= 2; + addr = Rep::align_down(addr, size); + if (size == bits::one_at_bit(MAX_SIZE_BITS)) + // Too big for this buddy allocator. + return addr; + return add_block(addr, size); + } + + // Re-traverse as the path was to the buddy, + // but the representation says we cannot combine. + // We must find the correct place for this element. + // Something clever could be done here, but it's not worth it. + // path = trees[idx].get_root_path(); + trees[idx].find(path, addr); + } + trees[idx].insert_path(path, addr); + return Rep::null; + } + + /** + * Removes a block of size from the buddy allocator. + * + * Return Rep::null if this cannot be satisfied. + */ + typename Rep::Contents remove_block(size_t size) + { + auto idx = to_index(size); + + auto addr = trees[idx].remove_min(); + if (addr != Rep::null) + { + validate_block(addr, size); + return addr; + } + + if (size * 2 == bits::one_at_bit(MAX_SIZE_BITS)) + // Too big for this buddy allocator + return Rep::null; + + auto bigger = remove_block(size * 2); + if (bigger == Rep::null) + return Rep::null; + + auto second = Rep::offset(bigger, size); + + // Split large block + add_block(second, size); + return bigger; + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/cheri_slabmetadata_mixin.h b/src/snmalloc/backend_helpers/cheri_slabmetadata_mixin.h new file mode 100644 index 000000000..28e6f6ded --- /dev/null +++ b/src/snmalloc/backend_helpers/cheri_slabmetadata_mixin.h @@ -0,0 +1,85 @@ +#pragma once +#include "../pal/pal.h" + +namespace snmalloc +{ + /** + * In CHERI, we must retain, internal to the allocator, the authority to + * entire backing arenas, as there is no architectural mechanism to splice + * together two capabilities. Additionally, these capabilities will retain + * the VMAP software permission, conveying our authority to manipulate the + * address space mappings for said arenas. + * + * We stash these pointers inside the SlabMetadata structures for parts of + * the address space for which SlabMetadata exists. (In other parts of the + * system, we will stash them directly in the pagemap.) This requires that + * we inherit from the FrontendSlabMetadata. + */ + template + class StrictProvenanceSlabMetadataMixin : public SlabMetadata + { + template + friend class BackendAllocator; + + capptr::Arena arena; + + /* Set the arena pointer */ + void arena_set(capptr::Arena a) + { + arena = a; + } + + /* + * Retrieve the stashed pointer for a chunk; the caller must ensure that + * this is the correct arena for the indicated chunk. The latter is unused + * except in debug builds, as there is no architectural amplification. + */ + capptr::Arena arena_get(capptr::Alloc c) + { + SNMALLOC_ASSERT(address_cast(arena) == address_cast(c)); + UNUSED(c); + return arena; + } + }; + + /** + * A dummy implementation of StrictProvenanceBackendSlabMetadata that has no + * computational content, for use on non-StrictProvenance architectures. + */ + template + struct LaxProvenanceSlabMetadataMixin : public SlabMetadata + { + /* On non-StrictProvenance architectures, there's nothing to do */ + void arena_set(capptr::Arena) {} + + /* Just a type sleight of hand, "amplifying" the non-existant bounds */ + capptr::Arena arena_get(capptr::Alloc c) + { + return capptr::Arena::unsafe_from(c.unsafe_ptr()); + } + }; + +#ifdef __cpp_concepts + /** + * Rather than having the backend test backend_strict_provenance in several + * places and doing sleights of hand with the type system, we encapsulate + * the amplification + */ + template + concept IsSlabMeta_Arena = requires(T* t, capptr::Arena p) + { + { + t->arena_set(p) + } + ->ConceptSame; + } + &&requires(T* t, capptr::Alloc p) + { + { + t->arena_get(p) + } + ->ConceptSame>; + }; +#endif + +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/commitrange.h b/src/snmalloc/backend_helpers/commitrange.h new file mode 100644 index 000000000..2bbd7a583 --- /dev/null +++ b/src/snmalloc/backend_helpers/commitrange.h @@ -0,0 +1,53 @@ +#pragma once +#include "../pal/pal.h" +#include "empty_range.h" +#include "range_helpers.h" + +namespace snmalloc +{ + template + struct CommitRange + { + template + class Type : public ContainsParent + { + using ContainsParent::parent; + + public: + static constexpr bool Aligned = ParentRange::Aligned; + + static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; + + using ChunkBounds = typename ParentRange::ChunkBounds; + static_assert( + ChunkBounds::address_space_control == + capptr::dimension::AddressSpaceControl::Full); + + constexpr Type() = default; + + CapPtr alloc_range(size_t size) + { + SNMALLOC_ASSERT_MSG( + (size % PAL::page_size) == 0, + "size ({}) must be a multiple of page size ({})", + size, + PAL::page_size); + auto range = parent.alloc_range(size); + if (range != nullptr) + PAL::template notify_using(range.unsafe_ptr(), size); + return range; + } + + void dealloc_range(CapPtr base, size_t size) + { + SNMALLOC_ASSERT_MSG( + (size % PAL::page_size) == 0, + "size ({}) must be a multiple of page size ({})", + size, + PAL::page_size); + PAL::notify_not_using(base.unsafe_ptr(), size); + parent.dealloc_range(base, size); + } + }; + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/commonconfig.h b/src/snmalloc/backend_helpers/commonconfig.h new file mode 100644 index 000000000..119d6c844 --- /dev/null +++ b/src/snmalloc/backend_helpers/commonconfig.h @@ -0,0 +1,129 @@ +#pragma once + +#include "../mem/mem.h" + +namespace snmalloc +{ + // Forward reference to thread local cleanup. + void register_clean_up(); + + /** + * Options for a specific snmalloc configuration. Every globals object must + * have one `constexpr` instance of this class called `Options`. This should + * be constructed to explicitly override any of the defaults. A + * configuration that does not need to override anything would simply declare + * this as a field of the global object: + * + * ```c++ + * constexpr static snmalloc::Flags Options{}; + * ``` + * + * A global configuration that wished to use out-of-line message queues but + * accept the defaults for everything else would instead do this: + * + * ```c++ + * constexpr static snmalloc::Flags Options{.IsQueueInline = false}; + * ``` + * + * To maintain backwards source compatibility in future versions, any new + * option added here should have its default set to be whatever snmalloc was + * doing before the new option was added. + */ + struct Flags + { + /** + * Should allocators have inline message queues? If this is true then + * the `CoreAllocator` is responsible for allocating the + * `RemoteAllocator` that contains its message queue. If this is false + * then the `RemoteAllocator` must be separately allocated and provided + * to the `CoreAllocator` before it is used. + * + * Setting this to `false` currently requires also setting + * `LocalAllocSupportsLazyInit` to false so that the `CoreAllocator` can + * be provided to the `LocalAllocator` fully initialised but in the + * future it may be possible to allocate the `RemoteAllocator` via + * `alloc_meta_data` or a similar API in the back end. + */ + bool IsQueueInline = true; + + /** + * Does the `CoreAllocator` own a `Backend::LocalState` object? If this is + * true then the `CoreAllocator` is responsible for allocating and + * deallocating a local state object, otherwise the surrounding code is + * responsible for creating it. + * + * Use cases that set this to false will probably also need to set + * `LocalAllocSupportsLazyInit` to false so that they can provide the local + * state explicitly during allocator creation. + */ + bool CoreAllocOwnsLocalState = true; + + /** + * Are `CoreAllocator` allocated by the pool allocator? If not then the + * code embedding this snmalloc configuration is responsible for allocating + * `CoreAllocator` instances. + * + * Users setting this flag must also set `LocalAllocSupportsLazyInit` to + * false currently because there is no alternative mechanism for allocating + * core allocators. This may change in future versions. + */ + bool CoreAllocIsPoolAllocated = true; + + /** + * Do `LocalAllocator` instances in this configuration support lazy + * initialisation? If so, then the first exit from a fast path will + * trigger allocation of a `CoreAllocator` and associated state. If not + * then the code embedding this configuration of snmalloc is responsible + * for allocating core allocators. + */ + bool LocalAllocSupportsLazyInit = true; + + /** + * Are the front and back pointers to the message queue in a RemoteAllocator + * considered to be capptr_bounds::Wildness::Tame (as opposed to Wild)? + * That is, is it presumed that clients or other potentialadversaries cannot + * access the front and back pointers themselves, even if they can access + * the queue nodes themselves (which are always considered Wild)? + */ + bool QueueHeadsAreTame = true; + + /** + * Does the backend provide a capptr_domesticate function to sanity check + * pointers? If so it will be called when untrusted pointers are consumed + * (on dealloc and in freelists) otherwise a no-op version is provided. + */ + bool HasDomesticate = false; + }; + + /** + * Class containing definitions that are likely to be used by all except for + * the most unusual back-end implementations. This can be subclassed as a + * convenience for back-end implementers, but is not required. + */ + class CommonConfig + { + public: + /** + * Special remote that should never be used as a real remote. + * This is used to initialise allocators that should always hit the + * remote path for deallocation. Hence moving a branch off the critical + * path. + */ + SNMALLOC_REQUIRE_CONSTINIT + inline static RemoteAllocator unused_remote; + }; + + template + static constexpr size_t MinBaseSizeBits() + { + if constexpr (pal_supports) + { + return bits::next_pow2_bits_const(PAL::minimum_alloc_size); + } + else + { + return MIN_CHUNK_BITS; + } + } +} // namespace snmalloc +#include "../mem/remotecache.h" diff --git a/src/snmalloc/backend_helpers/defaultpagemapentry.h b/src/snmalloc/backend_helpers/defaultpagemapentry.h new file mode 100644 index 000000000..b63f26f90 --- /dev/null +++ b/src/snmalloc/backend_helpers/defaultpagemapentry.h @@ -0,0 +1,68 @@ +#pragma once + +#include "../mem/mem.h" +#include "cheri_slabmetadata_mixin.h" + +namespace snmalloc +{ + /** + * Example of type stored in the pagemap. + * The following class could be replaced by: + * + * ``` + * using DefaultPagemapEntry = FrontendMetaEntry; + * ``` + * + * The full form here provides an example of how to extend the pagemap + * entries. It also guarantees that the front end never directly + * constructs meta entries, it only ever reads them or modifies them in + * place. + */ + template + class DefaultPagemapEntryT : public FrontendMetaEntry + { + /** + * The private initialising constructor is usable only by this back end. + */ + template + friend class BackendAllocator; + + /** + * The private default constructor is usable only by the pagemap. + */ + template + friend class FlatPagemap; + + /** + * The only constructor that creates newly initialised meta entries. + * This is callable only by the back end. The front end may copy, + * query, and update these entries, but it may not create them + * directly. This contract allows the back end to store any arbitrary + * metadata in meta entries when they are first constructed. + */ + SNMALLOC_FAST_PATH + DefaultPagemapEntryT(SlabMetadata* meta, uintptr_t ras) + : FrontendMetaEntry(meta, ras) + {} + + /** + * Copy assignment is used only by the pagemap. + */ + DefaultPagemapEntryT& operator=(const DefaultPagemapEntryT& other) + { + FrontendMetaEntry::operator=(other); + return *this; + } + + /** + * Default constructor. This must be callable from the pagemap. + */ + SNMALLOC_FAST_PATH DefaultPagemapEntryT() = default; + }; + + using DefaultPagemapEntry = DefaultPagemapEntryT, + LaxProvenanceSlabMetadataMixin>>; + +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/empty_range.h b/src/snmalloc/backend_helpers/empty_range.h new file mode 100644 index 000000000..db91b9dc1 --- /dev/null +++ b/src/snmalloc/backend_helpers/empty_range.h @@ -0,0 +1,23 @@ +#pragma once +#include "../ds_core/ds_core.h" + +namespace snmalloc +{ + template + class EmptyRange + { + public: + static constexpr bool Aligned = true; + + static constexpr bool ConcurrencySafe = true; + + using ChunkBounds = B; + + constexpr EmptyRange() = default; + + CapPtr alloc_range(size_t) + { + return nullptr; + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/globalrange.h b/src/snmalloc/backend_helpers/globalrange.h new file mode 100644 index 000000000..760ad21d7 --- /dev/null +++ b/src/snmalloc/backend_helpers/globalrange.h @@ -0,0 +1,47 @@ +#pragma once + +#include "../ds/ds.h" +#include "empty_range.h" + +namespace snmalloc +{ + /** + * Makes the supplied ParentRange into a global variable, + * and protects access with a lock. + */ + struct GlobalRange + { + template> + class Type : public StaticParent + { + using StaticParent::parent; + + /** + * This is infrequently used code, a spin lock simplifies the code + * considerably, and should never be on the fast path. + */ + SNMALLOC_REQUIRE_CONSTINIT static inline FlagWord spin_lock{}; + + public: + static constexpr bool Aligned = ParentRange::Aligned; + + static constexpr bool ConcurrencySafe = true; + + using ChunkBounds = typename ParentRange::ChunkBounds; + + constexpr Type() = default; + + CapPtr alloc_range(size_t size) + { + FlagLock lock(spin_lock); + return parent.alloc_range(size); + } + + void dealloc_range(CapPtr base, size_t size) + { + FlagLock lock(spin_lock); + parent.dealloc_range(base, size); + } + }; + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/largebuddyrange.h b/src/snmalloc/backend_helpers/largebuddyrange.h new file mode 100644 index 000000000..d1446d725 --- /dev/null +++ b/src/snmalloc/backend_helpers/largebuddyrange.h @@ -0,0 +1,395 @@ +#pragma once + +#include "../ds/ds.h" +#include "../mem/mem.h" +#include "buddy.h" +#include "empty_range.h" +#include "range_helpers.h" + +#include + +namespace snmalloc +{ + /** + * Class for using the pagemap entries for the buddy allocator. + */ + template + class BuddyChunkRep + { + public: + /* + * The values we store in our rbtree are the addresses of (combined spans + * of) chunks of the address space; as such, bits in (MIN_CHUNK_SIZE - 1) + * are unused and so the RED_BIT is packed therein. However, in practice, + * these are not "just any" uintptr_t-s, but specifically the uintptr_t-s + * inside the Pagemap's BackendAllocator::Entry structures. + * + * The BackendAllocator::Entry provides us with helpers that guarantee that + * we use only the bits that we are allowed to. + * @{ + */ + using Handle = MetaEntryBase::BackendStateWordRef; + using Contents = uintptr_t; + ///@} + + /** + * The bit that we will use to mark an entry as red. + * This has constraints in two directions, it must not be one of the + * reserved bits from the perspective of the meta entry and it must not be + * a bit that is a valid part of the address of a chunk. + * @{ + */ + static constexpr address_t RED_BIT = 1 << 8; + + static_assert(RED_BIT < MIN_CHUNK_SIZE); + static_assert(MetaEntryBase::is_backend_allowed_value( + MetaEntryBase::Word::One, RED_BIT)); + static_assert(MetaEntryBase::is_backend_allowed_value( + MetaEntryBase::Word::Two, RED_BIT)); + ///@} + + /// The value of a null node, as returned by `get` + static constexpr Contents null = 0; + /// The value of a null node, as stored in a `uintptr_t`. + static constexpr Contents root = 0; + + /** + * Set the value. Preserve the red/black colour. + */ + static void set(Handle ptr, Contents r) + { + ptr = r | (static_cast(ptr.get()) & RED_BIT); + } + + /** + * Returns the value, stripping out the red/black colour. + */ + static Contents get(const Handle ptr) + { + return ptr.get() & ~RED_BIT; + } + + /** + * Returns a pointer to the tree node for the specified address. + */ + static Handle ref(bool direction, Contents k) + { + // Special case for accessing the null entry. We want to make sure + // that this is never modified by the back end, so we make it point to + // a constant entry and use the MMU to trap even in release modes. + static const Contents null_entry = 0; + if (SNMALLOC_UNLIKELY(address_cast(k) == 0)) + { + return {const_cast(&null_entry)}; + } + auto& entry = Pagemap::template get_metaentry_mut(address_cast(k)); + if (direction) + return entry.get_backend_word(Pagemap::Entry::Word::One); + + return entry.get_backend_word(Pagemap::Entry::Word::Two); + } + + static bool is_red(Contents k) + { + return (ref(true, k).get() & RED_BIT) == RED_BIT; + } + + static void set_red(Contents k, bool new_is_red) + { + if (new_is_red != is_red(k)) + { + auto v = ref(true, k); + v = v.get() ^ RED_BIT; + } + SNMALLOC_ASSERT(is_red(k) == new_is_red); + } + + static Contents offset(Contents k, size_t size) + { + return k + size; + } + + static Contents buddy(Contents k, size_t size) + { + return k ^ size; + } + + static Contents align_down(Contents k, size_t size) + { + return k & ~(size - 1); + } + + static bool compare(Contents k1, Contents k2) + { + return k1 > k2; + } + + static bool equal(Contents k1, Contents k2) + { + return k1 == k2; + } + + static uintptr_t printable(Contents k) + { + return k; + } + + /** + * Convert the pointer wrapper into something that the snmalloc debug + * printing code can print. + */ + static address_t printable(Handle k) + { + return k.printable_address(); + } + + /** + * Returns the name for use in debugging traces. Not used in normal builds + * (release or debug), only when tracing is enabled. + */ + static const char* name() + { + return "BuddyChunkRep"; + } + + static bool can_consolidate(Contents k, size_t size) + { + // Need to know both entries exist in the pagemap. + // This must only be called if that has already been + // ascertained. + // The buddy could be in a part of the pagemap that has + // not been registered and thus could segfault on access. + auto larger = bits::max(k, buddy(k, size)); + auto& entry = + Pagemap::template get_metaentry_mut(address_cast(larger)); + return !entry.is_boundary(); + } + }; + + /** + * Used to represent a consolidating range of memory. Uses a buddy allocator + * to consolidate adjacent blocks. + * + * ParentRange - Represents the range to get memory from to fill this range. + * + * REFILL_SIZE_BITS - Maximum size of a refill, may ask for less during warm + * up phase. + * + * MAX_SIZE_BITS - Maximum size that this range will store. + * + * Pagemap - How to access the pagemap, which is used to store the red black + * tree nodes for the buddy allocators. + * + * MIN_REFILL_SIZE_BITS - The minimum size that the ParentRange can be asked + * for + */ + template< + size_t REFILL_SIZE_BITS, + size_t MAX_SIZE_BITS, + SNMALLOC_CONCEPT(IsWritablePagemap) Pagemap, + size_t MIN_REFILL_SIZE_BITS = 0> + class LargeBuddyRange + { + static_assert( + REFILL_SIZE_BITS <= MAX_SIZE_BITS, "REFILL_SIZE_BITS > MAX_SIZE_BITS"); + static_assert( + MIN_REFILL_SIZE_BITS <= REFILL_SIZE_BITS, + "MIN_REFILL_SIZE_BITS > REFILL_SIZE_BITS"); + + /** + * Maximum size of a refill + */ + static constexpr size_t REFILL_SIZE = bits::one_at_bit(REFILL_SIZE_BITS); + + /** + * Minimum size of a refill + */ + static constexpr size_t MIN_REFILL_SIZE = + bits::one_at_bit(MIN_REFILL_SIZE_BITS); + + public: + template> + class Type : public ContainsParent + { + using ContainsParent::parent; + + /** + * The size of memory requested so far. + * + * This is used to determine the refill size. + */ + size_t requested_total = 0; + + /** + * Buddy allocator used to represent this range of memory. + */ + Buddy, MIN_CHUNK_BITS, MAX_SIZE_BITS> buddy_large; + + /** + * The parent might not support deallocation if this buddy allocator + * covers the whole range. Uses template insanity to make this work. + */ + template + std::enable_if_t + parent_dealloc_range(capptr::Arena base, size_t size) + { + static_assert( + MAX_SIZE_BITS != (bits::BITS - 1), "Don't set SFINAE parameter"); + parent.dealloc_range(base, size); + } + + void dealloc_overflow(capptr::Arena overflow) + { + if constexpr (MAX_SIZE_BITS != (bits::BITS - 1)) + { + if (overflow != nullptr) + { + parent.dealloc_range(overflow, bits::one_at_bit(MAX_SIZE_BITS)); + } + } + else + { + if (overflow != nullptr) + abort(); + } + } + + /** + * Add a range of memory to the address space. + * Divides blocks into power of two sizes with natural alignment + */ + void add_range(capptr::Arena base, size_t length) + { + range_to_pow_2_blocks( + base, length, [this](capptr::Arena base, size_t align, bool) { + auto overflow = + capptr::Arena::unsafe_from(reinterpret_cast( + buddy_large.add_block(base.unsafe_uintptr(), align))); + + dealloc_overflow(overflow); + }); + } + + capptr::Arena refill(size_t size) + { + if (ParentRange::Aligned) + { + // Use amount currently requested to determine refill size. + // This will gradually increase the usage of the parent range. + // So small examples can grow local caches slowly, and larger + // examples will grow them by the refill size. + // + // The heuristic is designed to allocate the following sequence for + // 16KiB requests 16KiB, 16KiB, 32Kib, 64KiB, ..., REFILL_SIZE/2, + // REFILL_SIZE, REFILL_SIZE, ... Hence if this if they are coming from + // a contiguous aligned range, then they could be consolidated. This + // depends on the ParentRange behaviour. + size_t refill_size = bits::min(REFILL_SIZE, requested_total); + refill_size = bits::max(refill_size, MIN_REFILL_SIZE); + refill_size = bits::max(refill_size, size); + refill_size = bits::next_pow2(refill_size); + + auto refill_range = parent.alloc_range(refill_size); + if (refill_range != nullptr) + { + requested_total += refill_size; + add_range(pointer_offset(refill_range, size), refill_size - size); + } + return refill_range; + } + + // Note the unaligned parent path does not use + // requested_total in the heuristic for the initial size + // this is because the request needs to introduce alignment. + // Currently the unaligned variant is not used as a local cache. + // So the gradual growing of refill_size is not needed. + + // Need to overallocate to get the alignment right. + bool overflow = false; + size_t needed_size = bits::umul(size, 2, overflow); + if (overflow) + { + return nullptr; + } + + auto refill_size = bits::max(needed_size, REFILL_SIZE); + while (needed_size <= refill_size) + { + auto refill = parent.alloc_range(refill_size); + + if (refill != nullptr) + { + requested_total += refill_size; + add_range(refill, refill_size); + + SNMALLOC_ASSERT(refill_size < bits::one_at_bit(MAX_SIZE_BITS)); + static_assert( + (REFILL_SIZE < bits::one_at_bit(MAX_SIZE_BITS)) || + ParentRange::Aligned, + "Required to prevent overflow."); + + return alloc_range(size); + } + + refill_size >>= 1; + } + + return nullptr; + } + + public: + static constexpr bool Aligned = true; + + static constexpr bool ConcurrencySafe = false; + + /* The large buddy allocator always deals in Arena-bounded pointers. */ + using ChunkBounds = capptr::bounds::Arena; + static_assert( + std::is_same_v); + + constexpr Type() = default; + + capptr::Arena alloc_range(size_t size) + { + SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE); + SNMALLOC_ASSERT(bits::is_pow2(size)); + + if (size >= (bits::one_at_bit(MAX_SIZE_BITS) - 1)) + { + if (ParentRange::Aligned) + return parent.alloc_range(size); + + return nullptr; + } + + auto result = capptr::Arena::unsafe_from( + reinterpret_cast(buddy_large.remove_block(size))); + + if (result != nullptr) + return result; + + return refill(size); + } + + void dealloc_range(capptr::Arena base, size_t size) + { + SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE); + SNMALLOC_ASSERT(bits::is_pow2(size)); + + if constexpr (MAX_SIZE_BITS != (bits::BITS - 1)) + { + if (size >= (bits::one_at_bit(MAX_SIZE_BITS) - 1)) + { + parent_dealloc_range(base, size); + return; + } + } + + auto overflow = + capptr::Arena::unsafe_from(reinterpret_cast( + buddy_large.add_block(base.unsafe_uintptr(), size))); + dealloc_overflow(overflow); + } + }; + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/logrange.h b/src/snmalloc/backend_helpers/logrange.h new file mode 100644 index 000000000..0a3f907de --- /dev/null +++ b/src/snmalloc/backend_helpers/logrange.h @@ -0,0 +1,61 @@ +#pragma once + +#include "empty_range.h" +#include "range_helpers.h" + +namespace snmalloc +{ + /** + * RangeName is an integer to specify which range is being logged. Strings can + * be used as template parameters. + * + * ParentRange is what the range is logging calls to. + */ + template + struct LogRange + { + template> + class Type : public ContainsParent + { + using ContainsParent::parent; + + public: + static constexpr bool Aligned = ParentRange::Aligned; + + static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; + + using ChunkBounds = typename ParentRange::ChunkBounds; + + constexpr Type() = default; + + CapPtr alloc_range(size_t size) + { +#ifdef SNMALLOC_TRACING + message<1024>("Call alloc_range({}) on {}", size, RangeName); +#endif + auto range = parent.alloc_range(size); +#ifdef SNMALLOC_TRACING + message<1024>( + "{} = alloc_range({}) in {}", range.unsafe_ptr(), size, RangeName); +#endif + return range; + } + + void dealloc_range(CapPtr base, size_t size) + { +#ifdef SNMALLOC_TRACING + message<1024>( + "dealloc_range({}, {}}) on {}", base.unsafe_ptr(), size, RangeName); +#endif + parent.dealloc_range(base, size); +#ifdef SNMALLOC_TRACING + message<1024>( + "Done dealloc_range({}, {}})! on {}", + base.unsafe_ptr(), + size, + RangeName); +#endif + } + }; + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/pagemap.h b/src/snmalloc/backend_helpers/pagemap.h new file mode 100644 index 000000000..55535a640 --- /dev/null +++ b/src/snmalloc/backend_helpers/pagemap.h @@ -0,0 +1,424 @@ +#pragma once + +#include "../ds/ds.h" +#include "../mem/mem.h" + +#include +#include + +namespace snmalloc +{ + /** + * Simple pagemap that for each GRANULARITY_BITS of the address range + * stores a T. + */ + template + class FlatPagemap + { + private: + static constexpr size_t SHIFT = GRANULARITY_BITS; + + /** + * Before init is called will contain a single entry + * that is the default value. This is needed so that + * various calls do not have to check for nullptr. + * free(nullptr) + * and + * malloc_usable_size(nullptr) + * do not require an allocation to have ocurred before + * they are called. + */ + inline static const T default_value{}; + + /** + * The representation of the page map. + * + * Initially a single element to ensure nullptr operations + * work. + */ + T* body{const_cast(&default_value)}; + + /** + * The representation of the pagemap, but nullptr if it has not been + * initialised. Used to combine init checking and lookup. + */ + T* body_opt{nullptr}; + + /** + * If `has_bounds` is set, then these should contain the + * bounds of the heap that is being managed by this pagemap. + */ + address_t base{0}; + size_t size{0}; + + public: + /** + * Ensure this range of pagemap is accessible + */ + void register_range(address_t p, size_t length) + { + // Calculate range in pagemap that is associated to this space. + auto first = &body[p >> SHIFT]; + auto last = &body[(p + length + bits::one_at_bit(SHIFT) - 1) >> SHIFT]; + + // Commit OS pages associated to the range. + auto page_start = pointer_align_down(first); + auto page_end = pointer_align_up(last); + size_t using_size = pointer_diff(page_start, page_end); + PAL::template notify_using(page_start, using_size); + } + + constexpr FlatPagemap() = default; + + /** + * For pagemaps that cover an entire fixed address space, return the size + * that they must be. This allows the caller to allocate the correct + * amount of memory to be passed to `init`. This is not available for + * fixed-range pagemaps, whose size depends on dynamic configuration. + */ + template + static constexpr std::enable_if_t required_size() + { + static_assert( + has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); + constexpr size_t COVERED_BITS = PAL::address_bits - GRANULARITY_BITS; + constexpr size_t ENTRIES = bits::one_at_bit(COVERED_BITS); + return ENTRIES * sizeof(T); + } + + /** + * Initialise with pre-allocated memory. + * + * This is currently disabled for bounded pagemaps but may be reenabled if + * `required_size` is enabled for the has-bounds case. + */ + template + std::enable_if_t init(T* address) + { + static_assert( + has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); + body = address; + body_opt = address; + } + + /** + * Initialise the pagemap with bounds. + * + * Returns usable range after pagemap has been allocated + */ + template + std::enable_if_t> + init(void* b, size_t s) + { + static_assert( + has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); +#ifdef SNMALLOC_TRACING + message<1024>("Pagemap.init {} ({})", b, s); +#endif + SNMALLOC_ASSERT(s != 0); + // TODO take account of pagemap size in the calculation of how big it + // needs to be. + + // Align the start and end. We won't store for the very ends as they + // are not aligned to a chunk boundary. + auto heap_base = pointer_align_up(b, bits::one_at_bit(GRANULARITY_BITS)); + auto end = pointer_align_down( + pointer_offset(b, s), bits::one_at_bit(GRANULARITY_BITS)); + size = pointer_diff(heap_base, end); + + // Put pagemap at start of range. + // TODO CHERI capability bound here! + body = static_cast(b); + body_opt = body; + // Advance by size of pagemap. + // Note that base needs to be aligned to GRANULARITY for the rest of the + // code to work + // TODO CHERI capability bound here! + heap_base = pointer_align_up( + pointer_offset(b, (size >> SHIFT) * sizeof(T)), + bits::one_at_bit(GRANULARITY_BITS)); + base = address_cast(heap_base); + SNMALLOC_ASSERT( + base == bits::align_up(base, bits::one_at_bit(GRANULARITY_BITS))); + + return {heap_base, pointer_diff(heap_base, end)}; + } + + /** + * Initialise the pagemap without bounds. + */ + template + std::enable_if_t init() + { + static_assert( + has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); + static constexpr size_t REQUIRED_SIZE = required_size(); + +#ifdef SNMALLOC_CHECK_CLIENT + // Allocate a power of two extra to allow the placement of the + // pagemap be difficult to guess. + size_t additional_size = bits::next_pow2(REQUIRED_SIZE) * 4; + size_t request_size = REQUIRED_SIZE + additional_size; +#else + size_t request_size = REQUIRED_SIZE; +#endif + + auto new_body_untyped = PAL::reserve(request_size); + + if (new_body_untyped == nullptr) + { + PAL::error("Failed to initialise snmalloc."); + } + +#ifdef SNMALLOC_CHECK_CLIENT + // Begin pagemap at random offset within the additionally allocated space. + static_assert(bits::is_pow2(sizeof(T)), "Next line assumes this."); + size_t offset = get_entropy64() & (additional_size - sizeof(T)); + auto new_body = pointer_offset(new_body_untyped, offset); + + if constexpr (pal_supports) + { + void* start_page = pointer_align_down(new_body); + void* end_page = pointer_align_up( + pointer_offset(new_body, REQUIRED_SIZE)); + // Only commit readonly memory for this range, if the platform supports + // lazy commit. Otherwise, this would be a lot of memory to have + // mapped. + PAL::notify_using_readonly( + start_page, pointer_diff(start_page, end_page)); + } +#else + auto new_body = static_cast(new_body_untyped); +#endif + // Ensure bottom page is committed + // ASSUME: new memory is zeroed. + PAL::template notify_using( + pointer_align_down(new_body), OS_PAGE_SIZE); + + // Set up zero page + new_body[0] = body[0]; + + body = new_body; + body_opt = new_body; + } + + template + std::enable_if_t> get_bounds() + { + static_assert( + has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); + + return {base, size}; + } + + /** + * Get the number of entries. + */ + [[nodiscard]] constexpr size_t num_entries() const + { + if constexpr (has_bounds) + { + return size >> GRANULARITY_BITS; + } + else + { + return bits::one_at_bit(PAL::address_bits - GRANULARITY_BITS); + } + } + + /** + * If the location has not been used before, then + * `potentially_out_of_range` should be set to true. + * This will ensure there is a location for the + * read/write. + */ + template + T& get_mut(address_t p) + { + if constexpr (potentially_out_of_range) + { + if (SNMALLOC_UNLIKELY(body_opt == nullptr)) + return const_cast(default_value); + } + + if constexpr (has_bounds) + { + if (p - base > size) + { + if constexpr (potentially_out_of_range) + { + return const_cast(default_value); + } + else + { + // Out of range null should + // still return the default value. + if (p == 0) + return const_cast(default_value); + PAL::error("Internal error: Pagemap read access out of range."); + } + } + p = p - base; + } + + // If this is potentially_out_of_range, then the pages will not have + // been mapped. With Lazy commit they will at least be mapped read-only + // Note that: this means external pointer on Windows will be slow. + if constexpr (potentially_out_of_range && !pal_supports) + { + register_range(p, 1); + } + + if constexpr (potentially_out_of_range) + return body_opt[p >> SHIFT]; + else + return body[p >> SHIFT]; + } + + /** + * If the location has not been used before, then + * `potentially_out_of_range` should be set to true. + * This will ensure there is a location for the + * read/write. + */ + template + const T& get(address_t p) + { + return get_mut(p); + } + + /** + * Check if the pagemap has been initialised. + */ + [[nodiscard]] bool is_initialised() const + { + return body_opt != nullptr; + } + + /** + * Return the starting address corresponding to a given entry within the + * Pagemap. Also checks that the reference actually points to a valid entry. + */ + [[nodiscard]] address_t get_address(const T& t) const + { + address_t entry_offset = address_cast(&t) - address_cast(body); + address_t entry_index = entry_offset / sizeof(T); + SNMALLOC_ASSERT( + entry_offset % sizeof(T) == 0 && entry_index < num_entries()); + return base + (entry_index << GRANULARITY_BITS); + } + + void set(address_t p, const T& t) + { +#ifdef SNMALLOC_TRACING + message<1024>("Pagemap.Set {}", p); +#endif + if constexpr (has_bounds) + { + if (p - base > size) + { + PAL::error("Internal error: Pagemap write access out of range."); + } + p = p - base; + } + + body[p >> SHIFT] = t; + } + }; + + /** + * This is a generic implementation of the backend's interface to the page + * map. It takes a concrete page map implementation (probably FlatPagemap + * above) and entry type. It is friends with the backend passed in as a + * template parameter so that the backend can initialise the concrete page map + * and use set_metaentry which no one else should use. + */ + template< + typename PAL, + typename ConcreteMap, + typename PagemapEntry, + bool fixed_range> + class BasicPagemap + { + public: + /** + * Export the type stored in the pagemap. + */ + using Entry = PagemapEntry; + + /** + * Instance of the concrete pagemap, accessible to the backend so that + * it can call the init method whose type dependent on fixed_range. + */ + SNMALLOC_REQUIRE_CONSTINIT + static inline ConcreteMap concretePagemap; + + /** + * Set the metadata associated with a chunk. + */ + SNMALLOC_FAST_PATH + static void set_metaentry(address_t p, size_t size, const Entry& t) + { + for (address_t a = p; a < p + size; a += MIN_CHUNK_SIZE) + { + concretePagemap.set(a, t); + } + } + + /** + * Get the metadata associated with a chunk. + * + * Set template parameter to true if it not an error + * to access a location that is not backed by a chunk. + */ + template + SNMALLOC_FAST_PATH static const auto& get_metaentry(address_t p) + { + return concretePagemap.template get(p); + } + + /** + * Get the metadata associated with a chunk. + * + * Set template parameter to true if it not an error + * to access a location that is not backed by a chunk. + */ + template + SNMALLOC_FAST_PATH static auto& get_metaentry_mut(address_t p) + { + return concretePagemap.template get_mut(p); + } + + /** + * Register a range in the pagemap as in-use, requiring it to allow writing + * to the underlying memory. + */ + static void register_range(address_t p, size_t sz) + { + concretePagemap.register_range(p, sz); + } + + /** + * Return the bounds of the memory this back-end manages as a pair of + * addresses (start then end). This is available iff this is a + * fixed-range Backend. + */ + template + static SNMALLOC_FAST_PATH + std::enable_if_t> + get_bounds() + { + static_assert(fixed_range_ == fixed_range, "Don't set SFINAE parameter!"); + + return concretePagemap.get_bounds(); + } + + /** + * Return whether the pagemap is initialised, ready for access. + */ + static bool is_initialised() + { + return concretePagemap.is_initialised(); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/pagemapregisterrange.h b/src/snmalloc/backend_helpers/pagemapregisterrange.h new file mode 100644 index 000000000..9a0019e33 --- /dev/null +++ b/src/snmalloc/backend_helpers/pagemapregisterrange.h @@ -0,0 +1,46 @@ +#pragma once + +#include "../pal/pal.h" +#include "empty_range.h" +#include "range_helpers.h" + +namespace snmalloc +{ + template< + SNMALLOC_CONCEPT(IsWritablePagemapWithRegister) Pagemap, + bool CanConsolidate = true> + struct PagemapRegisterRange + { + template> + class Type : public ContainsParent + { + using ContainsParent::parent; + + public: + constexpr Type() = default; + + static constexpr bool Aligned = ParentRange::Aligned; + + static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; + + using ChunkBounds = typename ParentRange::ChunkBounds; + + CapPtr alloc_range(size_t size) + { + auto base = parent.alloc_range(size); + + if (base != nullptr) + Pagemap::register_range(address_cast(base), size); + + if (!CanConsolidate) + { + // Mark start of allocation in pagemap. + auto& entry = Pagemap::get_metaentry_mut(address_cast(base)); + entry.set_boundary(); + } + + return base; + } + }; + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/palrange.h b/src/snmalloc/backend_helpers/palrange.h new file mode 100644 index 000000000..ade65294a --- /dev/null +++ b/src/snmalloc/backend_helpers/palrange.h @@ -0,0 +1,51 @@ +#pragma once +#include "../pal/pal.h" + +namespace snmalloc +{ + template + class PalRange + { + public: + static constexpr bool Aligned = pal_supports; + + // Note we have always assumed the Pals to provide a concurrency safe + // API. If in the future this changes, then this would + // need to be changed. + static constexpr bool ConcurrencySafe = true; + + using ChunkBounds = capptr::bounds::Arena; + + constexpr PalRange() = default; + + capptr::Arena alloc_range(size_t size) + { + if (bits::next_pow2_bits(size) >= bits::BITS - 1) + { + return nullptr; + } + + if constexpr (pal_supports) + { + SNMALLOC_ASSERT(size >= PAL::minimum_alloc_size); + auto result = capptr::Arena::unsafe_from( + PAL::template reserve_aligned(size)); + +#ifdef SNMALLOC_TRACING + message<1024>("Pal range alloc: {} ({})", result.unsafe_ptr(), size); +#endif + return result; + } + else + { + auto result = capptr::Arena::unsafe_from(PAL::reserve(size)); + +#ifdef SNMALLOC_TRACING + message<1024>("Pal range alloc: {} ({})", result.unsafe_ptr(), size); +#endif + + return result; + } + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/range_helpers.h b/src/snmalloc/backend_helpers/range_helpers.h new file mode 100644 index 000000000..1534bb8df --- /dev/null +++ b/src/snmalloc/backend_helpers/range_helpers.h @@ -0,0 +1,133 @@ +#pragma once + +#include "../ds_core/ds_core.h" + +namespace snmalloc +{ + template + void range_to_pow_2_blocks(CapPtr base, size_t length, F f) + { + auto end = pointer_offset(base, length); + base = pointer_align_up(base, bits::one_at_bit(MIN_BITS)); + end = pointer_align_down(end, bits::one_at_bit(MIN_BITS)); + length = pointer_diff(base, end); + + bool first = true; + + // Find the minimum set of maximally aligned blocks in this range. + // Each block's alignment and size are equal. + while (length >= sizeof(void*)) + { + size_t base_align_bits = bits::ctz(address_cast(base)); + size_t length_align_bits = (bits::BITS - 1) - bits::clz(length); + size_t align_bits = bits::min(base_align_bits, length_align_bits); + size_t align = bits::one_at_bit(align_bits); + + /* + * Now that we have found a maximally-aligned block, we can set bounds + * and be certain that we won't hit representation imprecision. + */ + f(base, align, first); + first = false; + + base = pointer_offset(base, align); + length -= align; + } + } + + /** + * Forward definition to allow multiple template specialisations. + * + * This struct is used to recursively compose ranges. + */ + template + struct PipeImpl; + + /** + * Base case of one range that needs nothing. + */ + template + struct PipeImpl + { + using result = Only; + }; + + /** + * Recursive case of applying a base range as an argument to the + * next, and then using that as the new base range. + */ + template + struct PipeImpl + { + public: + using result = + typename PipeImpl, Rest...>::result; + }; + + /** + * Nice type so the caller doesn't need to call result explicitly. + */ + template + using Pipe = typename PipeImpl::result; + + /** + * Helper class for allowing a range to be navigated to find an + * ancestor of a specific type. The parent is an instance field. + */ + template + class ContainsParent + { + protected: + Parent parent{}; + + public: + /** + * Returns the outermost Ancestor with the correct type. + * + * Fails to compile if no such ancestor exists. + */ + template + Anc* ancestor() + { + if constexpr (std::is_same_v) + { + return &parent; + } + else + { + return parent.template ancestor(); + } + } + }; + + /** + * Helper class for allowing a range to be navigated to find an + * ancestor of a specific type. The parent is a static field. + */ + template + class StaticParent + { + protected: + SNMALLOC_REQUIRE_CONSTINIT inline static Parent parent{}; + + public: + /** + * Returns the outermost Ancestor with the correct type. + * + * Fails to compile if no such ancestor exists. + */ + template + Anc* ancestor() + { + if constexpr (std::is_same_v) + { + return &parent; + } + else + { + return parent.template ancestor(); + } + } + }; + +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/smallbuddyrange.h b/src/snmalloc/backend_helpers/smallbuddyrange.h new file mode 100644 index 000000000..ce540558f --- /dev/null +++ b/src/snmalloc/backend_helpers/smallbuddyrange.h @@ -0,0 +1,245 @@ +#pragma once + +#include "../pal/pal.h" +#include "empty_range.h" +#include "range_helpers.h" + +namespace snmalloc +{ + /** + * struct for representing the redblack nodes + * directly inside the meta data. + */ + template + struct FreeChunk + { + CapPtr left; + CapPtr right; + }; + + /** + * Class for using the allocations own space to store in the RBTree. + */ + template + class BuddyInplaceRep + { + public: + using Handle = CapPtr, bounds>*; + using Contents = CapPtr, bounds>; + + static constexpr Contents null = nullptr; + static constexpr Contents root = nullptr; + + static constexpr address_t MASK = 1; + static void set(Handle ptr, Contents r) + { + SNMALLOC_ASSERT((address_cast(r) & MASK) == 0); + if (r == nullptr) + *ptr = CapPtr, bounds>::unsafe_from( + reinterpret_cast*>((*ptr).unsafe_uintptr() & MASK)); + else + // Preserve lower bit. + *ptr = pointer_offset(r, (address_cast(*ptr) & MASK)) + .template as_static>(); + } + + static Contents get(Handle ptr) + { + return pointer_align_down<2, FreeChunk>((*ptr).as_void()); + } + + static Handle ref(bool direction, Contents r) + { + if (direction) + return &r->left; + + return &r->right; + } + + static bool is_red(Contents k) + { + if (k == nullptr) + return false; + return (address_cast(*ref(false, k)) & MASK) == MASK; + } + + static void set_red(Contents k, bool new_is_red) + { + if (new_is_red != is_red(k)) + { + auto r = ref(false, k); + auto old_addr = pointer_align_down<2, FreeChunk>(r->as_void()); + + if (new_is_red) + { + if (old_addr == nullptr) + *r = CapPtr, bounds>::unsafe_from( + reinterpret_cast*>(MASK)); + else + *r = pointer_offset(old_addr, MASK) + .template as_static>(); + } + else + { + *r = old_addr; + } + SNMALLOC_ASSERT(is_red(k) == new_is_red); + } + } + + static Contents offset(Contents k, size_t size) + { + return pointer_offset(k, size).template as_static>(); + } + + static Contents buddy(Contents k, size_t size) + { + // This is just doing xor size, but with what API + // exists on capptr. + auto base = pointer_align_down>(k.as_void(), size * 2); + auto offset = (address_cast(k) & size) ^ size; + return pointer_offset(base, offset) + .template as_static>(); + } + + static Contents align_down(Contents k, size_t size) + { + return pointer_align_down>(k.as_void(), size); + } + + static bool compare(Contents k1, Contents k2) + { + return address_cast(k1) > address_cast(k2); + } + + static bool equal(Contents k1, Contents k2) + { + return address_cast(k1) == address_cast(k2); + } + + static address_t printable(Contents k) + { + return address_cast(k); + } + + /** + * Return the holder in some format suitable for printing by snmalloc's + * debug log mechanism. Used only when used in tracing mode, not normal + * debug or release builds. Raw pointers are printable already, so this is + * the identity function. + */ + static Handle printable(Handle k) + { + return k; + } + + /** + * Return a name for use in tracing mode. Unused in any other context. + */ + static const char* name() + { + return "BuddyInplaceRep"; + } + + static bool can_consolidate(Contents k, size_t size) + { + UNUSED(k, size); + return true; + } + }; + + struct SmallBuddyRange + { + template> + class Type : public ContainsParent + { + public: + using ChunkBounds = typename ParentRange::ChunkBounds; + + private: + using ContainsParent::parent; + + static constexpr size_t MIN_BITS = + bits::next_pow2_bits_const(sizeof(FreeChunk)); + + Buddy, MIN_BITS, MIN_CHUNK_BITS> buddy_small; + + /** + * Add a range of memory to the address space. + * Divides blocks into power of two sizes with natural alignment + */ + void add_range(CapPtr base, size_t length) + { + range_to_pow_2_blocks( + base, + length, + [this](CapPtr base, size_t align, bool) { + CapPtr overflow = + buddy_small + .add_block( + base.template as_reinterpret>(), align) + .template as_reinterpret(); + if (overflow != nullptr) + parent.dealloc_range(overflow, bits::one_at_bit(MIN_CHUNK_BITS)); + }); + } + + CapPtr refill(size_t size) + { + auto refill = parent.alloc_range(MIN_CHUNK_SIZE); + + if (refill != nullptr) + add_range(pointer_offset(refill, size), MIN_CHUNK_SIZE - size); + + return refill; + } + + public: + static constexpr bool Aligned = true; + static_assert(ParentRange::Aligned, "ParentRange must be aligned"); + + static constexpr bool ConcurrencySafe = false; + + constexpr Type() = default; + + CapPtr alloc_range(size_t size) + { + SNMALLOC_ASSERT(size < MIN_CHUNK_SIZE); + + auto result = buddy_small.remove_block(size); + if (result != nullptr) + { + result->left = nullptr; + result->right = nullptr; + return result.template as_reinterpret(); + } + return refill(size); + } + + CapPtr alloc_range_with_leftover(size_t size) + { + SNMALLOC_ASSERT(size < MIN_CHUNK_SIZE); + + auto rsize = bits::next_pow2(size); + + auto result = alloc_range(rsize); + + if (result == nullptr) + return nullptr; + + auto remnant = pointer_offset(result, size); + + add_range(remnant, rsize - size); + + return result.template as_reinterpret(); + } + + void dealloc_range(CapPtr base, size_t size) + { + SNMALLOC_ASSERT(size < MIN_CHUNK_SIZE); + + add_range(base, size); + } + }; + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/statsrange.h b/src/snmalloc/backend_helpers/statsrange.h new file mode 100644 index 000000000..8548be9cb --- /dev/null +++ b/src/snmalloc/backend_helpers/statsrange.h @@ -0,0 +1,83 @@ +#pragma once + +#include "empty_range.h" +#include "range_helpers.h" + +#include + +namespace snmalloc +{ + /** + * Used to measure memory usage. + */ + struct StatsRange + { + template> + class Type : public ContainsParent + { + using ContainsParent::parent; + + static inline std::atomic current_usage{}; + static inline std::atomic peak_usage{}; + + public: + static constexpr bool Aligned = ParentRange::Aligned; + + static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; + + using ChunkBounds = typename ParentRange::ChunkBounds; + + constexpr Type() = default; + + CapPtr alloc_range(size_t size) + { + auto result = parent.alloc_range(size); + if (result != nullptr) + { + auto prev = current_usage.fetch_add(size); + auto curr = peak_usage.load(); + while (curr < prev + size) + { + if (peak_usage.compare_exchange_weak(curr, prev + size)) + break; + } + } + return result; + } + + void dealloc_range(CapPtr base, size_t size) + { + current_usage -= size; + parent.dealloc_range(base, size); + } + + size_t get_current_usage() + { + return current_usage.load(); + } + + size_t get_peak_usage() + { + return peak_usage.load(); + } + }; + }; + + template + class StatsCombiner + { + StatsR1 r1{}; + StatsR2 r2{}; + + public: + size_t get_current_usage() + { + return r1.get_current_usage() + r2.get_current_usage(); + } + + size_t get_peak_usage() + { + return r1.get_peak_usage() + r2.get_peak_usage(); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/subrange.h b/src/snmalloc/backend_helpers/subrange.h new file mode 100644 index 000000000..8d886a2b8 --- /dev/null +++ b/src/snmalloc/backend_helpers/subrange.h @@ -0,0 +1,51 @@ +#pragma once +#include "../mem/mem.h" +#include "empty_range.h" + +namespace snmalloc +{ + /** + * Creates an area inside a large allocation that is larger by + * 2^RATIO_BITS. Will not return a the block at the start or + * the end of the large allocation. + */ + template + struct SubRange + { + template> + class Type : public ContainsParent + { + using ContainsParent::parent; + + public: + constexpr Type() = default; + + static constexpr bool Aligned = ParentRange::Aligned; + + static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; + + using ChunkBounds = typename ParentRange::ChunkBounds; + + CapPtr alloc_range(size_t sub_size) + { + SNMALLOC_ASSERT(bits::is_pow2(sub_size)); + + auto full_size = sub_size << RATIO_BITS; + auto overblock = parent.alloc_range(full_size); + if (overblock == nullptr) + return nullptr; + + size_t offset_mask = full_size - sub_size; + // Don't use first or last block in the larger reservation + // Loop required to get uniform distribution. + size_t offset; + do + { + offset = get_entropy64() & offset_mask; + } while ((offset == 0) || (offset == offset_mask)); + + return pointer_offset(overblock, offset); + } + }; + }; +} // namespace snmalloc diff --git a/src/ds/aba.h b/src/snmalloc/ds/aba.h similarity index 84% rename from src/ds/aba.h rename to src/snmalloc/ds/aba.h index 5e6632328..51c447035 100644 --- a/src/ds/aba.h +++ b/src/snmalloc/ds/aba.h @@ -1,7 +1,6 @@ #pragma once -#include "bits.h" -#include "ptrwrap.h" +#include "../aal/aal.h" /** * This file contains an abstraction of ABA protection. This API should be @@ -24,24 +23,20 @@ namespace snmalloc // fall back to locked implementation. #if defined(PLATFORM_IS_X86) && \ !(defined(GCC_NOT_CLANG) && defined(OPEN_ENCLAVE)) - template< - typename T, - Construction c = RequiresInit, - template typename Ptr = Pointer, - template typename AtomicPtr = AtomicPointer> + template class ABA { public: struct alignas(2 * sizeof(std::size_t)) Linked { - Ptr ptr; - uintptr_t aba; + T* ptr{nullptr}; + uintptr_t aba{0}; }; struct Independent { - AtomicPtr ptr; - std::atomic aba; + std::atomic ptr{nullptr}; + std::atomic aba{0}; }; static_assert( @@ -59,13 +54,9 @@ namespace snmalloc }; public: - ABA() - { - if constexpr (c == RequiresInit) - init(nullptr); - } + constexpr ABA() : independent() {} - void init(Ptr x) + void init(T* x) { independent.ptr.store(x, std::memory_order_relaxed); independent.aba.store(0, std::memory_order_relaxed); @@ -97,12 +88,12 @@ namespace snmalloc */ Cmp(Linked old, ABA* parent) : old(old), parent(parent) {} - Ptr ptr() + T* ptr() { return old.ptr; } - bool store_conditional(Ptr value) + bool store_conditional(T* value) { # if defined(_MSC_VER) && defined(SNMALLOC_VA_BITS_64) auto result = _InterlockedCompareExchange128( @@ -120,7 +111,7 @@ namespace snmalloc std::atomic& addr = parent->linked; auto result = addr.compare_exchange_weak( - old, xchg, std::memory_order_relaxed, std::memory_order_relaxed); + old, xchg, std::memory_order_acq_rel, std::memory_order_relaxed); # endif return result; } @@ -137,7 +128,7 @@ namespace snmalloc }; // This method is used in Verona - Ptr peek() + T* peek() { return independent.ptr.load(std::memory_order_relaxed); } @@ -146,19 +137,15 @@ namespace snmalloc /** * Naive implementation of ABA protection using a spin lock. */ - template< - typename T, - Construction c = RequiresInit, - template typename Ptr = Pointer, - template typename AtomicPtr = AtomicPointer> + template class ABA { - AtomicPtr ptr = nullptr; + std::atomic ptr = nullptr; std::atomic_flag lock = ATOMIC_FLAG_INIT; public: // This method is used in Verona - void init(Ptr x) + void init(T* x) { ptr.store(x, std::memory_order_relaxed); } @@ -184,12 +171,12 @@ namespace snmalloc ABA* parent; public: - Ptr ptr() + T* ptr() { return parent->ptr; } - bool store_conditional(Ptr t) + bool store_conditional(T* t) { parent->ptr = t; return true; @@ -205,7 +192,7 @@ namespace snmalloc }; // This method is used in Verona - Ptr peek() + T* peek() { return ptr.load(std::memory_order_relaxed); } diff --git a/src/snmalloc/ds/allocconfig.h b/src/snmalloc/ds/allocconfig.h new file mode 100644 index 000000000..51d5b415f --- /dev/null +++ b/src/snmalloc/ds/allocconfig.h @@ -0,0 +1,68 @@ +#pragma once + +namespace snmalloc +{ + // 0 intermediate bits results in power of 2 small allocs. 1 intermediate + // bit gives additional sizeclasses at the midpoint between each power of 2. + // 2 intermediate bits gives 3 intermediate sizeclasses, etc. + static constexpr size_t INTERMEDIATE_BITS = +#ifdef USE_INTERMEDIATE_BITS + USE_INTERMEDIATE_BITS +#else + 2 +#endif + ; + + // The remaining values are derived, not configurable. + static constexpr size_t POINTER_BITS = + bits::next_pow2_bits_const(sizeof(uintptr_t)); + + // Used to isolate values on cache lines to prevent false sharing. + static constexpr size_t CACHELINE_SIZE = 64; + + // Minimum allocation size is space for two pointers. + static_assert(bits::next_pow2_const(sizeof(void*)) == sizeof(void*)); + static constexpr size_t MIN_ALLOC_SIZE = 2 * sizeof(void*); + static constexpr size_t MIN_ALLOC_BITS = bits::ctz_const(MIN_ALLOC_SIZE); + + // Minimum slab size. + static constexpr size_t MIN_CHUNK_BITS = static_cast(14); + static constexpr size_t MIN_CHUNK_SIZE = bits::one_at_bit(MIN_CHUNK_BITS); + + // Minimum number of objects on a slab +#ifdef SNMALLOC_CHECK_CLIENT + static constexpr size_t MIN_OBJECT_COUNT = 13; +#else + static constexpr size_t MIN_OBJECT_COUNT = 4; +#endif + + // Maximum size of an object that uses sizeclasses. + static constexpr size_t MAX_SMALL_SIZECLASS_BITS = 16; + static constexpr size_t MAX_SMALL_SIZECLASS_SIZE = + bits::one_at_bit(MAX_SMALL_SIZECLASS_BITS); + + static_assert( + MAX_SMALL_SIZECLASS_SIZE >= MIN_CHUNK_SIZE, + "Large sizes need to be representable by as a multiple of MIN_CHUNK_SIZE"); + + // Number of slots for remote deallocation. + static constexpr size_t REMOTE_SLOT_BITS = 8; + static constexpr size_t REMOTE_SLOTS = 1 << REMOTE_SLOT_BITS; + static constexpr size_t REMOTE_MASK = REMOTE_SLOTS - 1; + + static_assert( + INTERMEDIATE_BITS < MIN_ALLOC_BITS, + "INTERMEDIATE_BITS must be less than MIN_ALLOC_BITS"); + static_assert( + MIN_ALLOC_SIZE >= (sizeof(void*) * 2), + "MIN_ALLOC_SIZE must be sufficient for two pointers"); + + // Return remote small allocs when the local cache reaches this size. + static constexpr int64_t REMOTE_CACHE = +#ifdef USE_REMOTE_CACHE + USE_REMOTE_CACHE +#else + 1 << MIN_CHUNK_BITS +#endif + ; +} // namespace snmalloc diff --git a/src/snmalloc/ds/ds.h b/src/snmalloc/ds/ds.h new file mode 100644 index 000000000..432277dcb --- /dev/null +++ b/src/snmalloc/ds/ds.h @@ -0,0 +1,11 @@ +/** + * Data structures used by snmalloc. + * + */ +#pragma once +#include "../pal/pal.h" +#include "aba.h" +#include "allocconfig.h" +#include "flaglock.h" +#include "mpmcstack.h" +#include "singleton.h" diff --git a/src/snmalloc/ds/flaglock.h b/src/snmalloc/ds/flaglock.h new file mode 100644 index 000000000..4a539e636 --- /dev/null +++ b/src/snmalloc/ds/flaglock.h @@ -0,0 +1,136 @@ +#pragma once + +#include "../aal/aal.h" +#include "../pal/pal.h" + +#include +#include + +namespace snmalloc +{ + /** + * @brief The DebugFlagWord struct + * Wrapper for std::atomic_flag so that we can examine + * the re-entrancy problem at debug mode. + */ + struct DebugFlagWord + { + using ThreadIdentity = DefaultPal::ThreadIdentity; + + /** + * @brief flag + * The underlying atomic field. + */ + std::atomic_bool flag{false}; + + constexpr DebugFlagWord() = default; + + template + constexpr DebugFlagWord(Args&&... args) : flag(std::forward(args)...) + {} + + /** + * @brief set_owner + * Record the identity of the locker. + */ + void set_owner() + { + SNMALLOC_ASSERT(ThreadIdentity() == owner); + owner = get_thread_identity(); + } + + /** + * @brief clear_owner + * Set the identity to null. + */ + void clear_owner() + { + SNMALLOC_ASSERT(get_thread_identity() == owner); + owner = ThreadIdentity(); + } + + /** + * @brief assert_not_owned_by_current_thread + * Assert the lock should not be held already by current thread. + */ + void assert_not_owned_by_current_thread() + { + SNMALLOC_ASSERT(get_thread_identity() != owner); + } + + private: + /** + * @brief owner + * We use the Pal to provide the ThreadIdentity. + */ + std::atomic owner = ThreadIdentity(); + + /** + * @brief get_thread_identity + * @return The identity of current thread. + */ + static ThreadIdentity get_thread_identity() + { + return DefaultPal::get_tid(); + } + }; + + /** + * @brief The ReleaseFlagWord struct + * The shares the same structure with DebugFlagWord but + * all member functions associated with ownership checkings + * are empty so that they can be optimised out at Release mode. + */ + struct ReleaseFlagWord + { + std::atomic_bool flag{false}; + + constexpr ReleaseFlagWord() = default; + + template + constexpr ReleaseFlagWord(Args&&... args) + : flag(std::forward(args)...) + {} + + void set_owner() {} + void clear_owner() {} + void assert_not_owned_by_current_thread() {} + }; + +#ifdef NDEBUG + using FlagWord = ReleaseFlagWord; +#else + using FlagWord = DebugFlagWord; +#endif + + class FlagLock + { + private: + FlagWord& lock; + + public: + FlagLock(FlagWord& lock) : lock(lock) + { + while (lock.flag.exchange(true, std::memory_order_acquire)) + { + // assert_not_owned_by_current_thread is only called when the first + // acquiring is failed; which means the lock is already held somewhere + // else. + lock.assert_not_owned_by_current_thread(); + // This loop is better for spin-waiting because it won't issue + // expensive write operation (xchg for example). + while (lock.flag.load(std::memory_order_relaxed)) + { + Aal::pause(); + } + } + lock.set_owner(); + } + + ~FlagLock() + { + lock.clear_owner(); + lock.flag.store(false, std::memory_order_release); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/ds/mpmcstack.h b/src/snmalloc/ds/mpmcstack.h new file mode 100644 index 000000000..cd005e9bf --- /dev/null +++ b/src/snmalloc/ds/mpmcstack.h @@ -0,0 +1,105 @@ +#pragma once + +#include "../ds_core/ds_core.h" +#include "aba.h" +#include "allocconfig.h" + +#if defined(__has_feature) +# if __has_feature(thread_sanitizer) +# define SNMALLOC_THREAD_SANITIZER_ENABLED +# endif +#endif + +namespace snmalloc +{ + template + class MPMCStack + { + using ABAT = ABA; + + private: + alignas(CACHELINE_SIZE) ABAT stack; + +#ifdef SNMALLOC_THREAD_SANITIZER_ENABLED + __attribute__((no_sanitize("thread"))) static T* + racy_read(std::atomic& ptr) + { + // reinterpret_cast is required as TSAN still instruments + // std::atomic operations, even if you disable TSAN on + // the function. + return *reinterpret_cast(&ptr); + } +#else + static T* racy_read(std::atomic& ptr) + { + return ptr.load(std::memory_order_relaxed); + } +#endif + + public: + constexpr MPMCStack() = default; + + void push(T* item) + { + static_assert( + std::is_same>::value, + "T->next must be an std::atomic"); + + return push(item, item); + } + + void push(T* first, T* last) + { + // Pushes an item on the stack. + auto cmp = stack.read(); + + do + { + auto top = cmp.ptr(); + last->next.store(top, std::memory_order_release); + } while (!cmp.store_conditional(first)); + } + + T* pop() + { + // Returns the next item. If the returned value is decommitted, it is + // possible for the read of top->next to segfault. + auto cmp = stack.read(); + T* top; + T* next; + + do + { + top = cmp.ptr(); + + if (top == nullptr) + break; + + // The following read can race with non-atomic accesses + // this is undefined behaviour. There is no way to use + // CAS sensibly that conforms to the standard with optimistic + // concurrency. + next = racy_read(top->next); + } while (!cmp.store_conditional(next)); + + return top; + } + + T* pop_all() + { + // Returns all items as a linked list, leaving an empty stack. + auto cmp = stack.read(); + T* top; + + do + { + top = cmp.ptr(); + + if (top == nullptr) + break; + } while (!cmp.store_conditional(nullptr)); + + return top; + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/ds/singleton.h b/src/snmalloc/ds/singleton.h new file mode 100644 index 000000000..c85635d39 --- /dev/null +++ b/src/snmalloc/ds/singleton.h @@ -0,0 +1,51 @@ +#pragma once + +#include "../ds_core/ds_core.h" +#include "flaglock.h" + +#include +#include +#include +#include + +namespace snmalloc +{ + /* + * In some use cases we need to run before any of the C++ runtime has been + * initialised. This singleton class is designed to not depend on the + * runtime. + */ + template + class Singleton + { + inline static FlagWord flag; + inline static std::atomic initialised{false}; + inline static Object obj; + + public: + /** + * If argument is non-null, then it is assigned the value + * true, if this is the first call to get. + * At most one call will be first. + */ + inline SNMALLOC_SLOW_PATH static Object& get(bool* first = nullptr) + { + // If defined should be initially false; + SNMALLOC_ASSERT(first == nullptr || *first == false); + + if (SNMALLOC_UNLIKELY(!initialised.load(std::memory_order_acquire))) + { + FlagLock lock(flag); + if (!initialised) + { + init(&obj); + initialised.store(true, std::memory_order_release); + if (first != nullptr) + *first = true; + } + } + return obj; + } + }; + +} // namespace snmalloc diff --git a/src/ds/bits.h b/src/snmalloc/ds_core/bits.h similarity index 82% rename from src/ds/bits.h rename to src/snmalloc/ds_core/bits.h index 777a5234d..f1dc4ffd6 100644 --- a/src/ds/bits.h +++ b/src/snmalloc/ds_core/bits.h @@ -5,13 +5,16 @@ // #define USE_LZCNT -#include "../aal/aal.h" -#include "../pal/pal_consts.h" #include "defines.h" #include +#include #include #include +#if defined(_MSC_VER) +# include +#endif + #ifdef pause # undef pause #endif @@ -30,12 +33,16 @@ namespace snmalloc namespace bits { - static constexpr size_t BITS = sizeof(size_t) * 8; - - static constexpr bool is64() - { - return BITS == 64; - } + /** + * The number of bits in a `size_t`. Many of the functions in the + * `snmalloc::bits` namespace are defined to operate over `size_t`, mapping + * to the correct compiler builtins irrespective of the size. This size + * does *not* imply the number of bits of virtual address space that are + * actually allowed to be set. `Aal::bits` provides an + * architecture-specific definition of the number of bits of address space + * that exist. + */ + static constexpr size_t BITS = sizeof(size_t) * CHAR_BIT; /** * Returns a value of type T that has a single bit set, @@ -52,14 +59,12 @@ namespace snmalloc return (static_cast(1)) << shift; } - static constexpr size_t ADDRESS_BITS = is64() ? 48 : 32; - - SNMALLOC_FAST_PATH size_t clz(size_t x) + inline SNMALLOC_FAST_PATH size_t clz(size_t x) { SNMALLOC_ASSERT(x != 0); // Calling with 0 is UB on some implementations #if defined(_MSC_VER) # ifdef USE_LZCNT -# ifdef SNMALLOC_VA_BITS_64 +# ifdef _WIN64 return __lzcnt64(x); # else return __lzcnt((uint32_t)x); @@ -67,10 +72,10 @@ namespace snmalloc # else unsigned long index; -# ifdef SNMALLOC_VA_BITS_64 - _BitScanReverse64(&index, x); +# ifdef _WIN64 + _BitScanReverse64(&index, static_cast(x)); # else - _BitScanReverse(&index, (unsigned long)x); + _BitScanReverse(&index, static_cast(x)); # endif return BITS - index - 1; @@ -108,10 +113,10 @@ namespace snmalloc inline size_t rotr(size_t x, size_t n) { #if defined(_MSC_VER) -# ifdef SNMALLOC_VA_BITS_64 - return _rotr64(x, (int)n); +# ifdef _WIN64 + return _rotr64(static_cast(x), static_cast(n)); # else - return _rotr((uint32_t)x, (int)n); + return _rotr(static_cast(x), static_cast(n)); # endif #else return rotr_const(x, n); @@ -121,10 +126,10 @@ namespace snmalloc inline size_t rotl(size_t x, size_t n) { #if defined(_MSC_VER) -# ifdef SNMALLOC_VA_BITS_64 - return _rotl64(x, (int)n); +# ifdef _WIN64 + return _rotl64(static_cast(x), static_cast(n)); # else - return _rotl((uint32_t)x, (int)n); + return _rotl(static_cast(x), static_cast(n)); # endif #else return rotl_const(x, n); @@ -152,11 +157,11 @@ namespace snmalloc { SNMALLOC_ASSERT(x != 0); // Calling with 0 is UB on some implementations -#if defined(_MSC_VER) -# ifdef SNMALLOC_VA_BITS_64 - return _tzcnt_u64(x); +#if defined(_MSC_VER) && !defined(__clang__) +# ifdef _WIN64 + return _tzcnt_u64(static_cast(x)); # else - return _tzcnt_u32((uint32_t)x); + return _tzcnt_u32(static_cast(x)); # endif #else if constexpr (std::is_same_v) @@ -198,14 +203,14 @@ namespace snmalloc overflow = __builtin_mul_overflow(x, y, &prod); return prod; #elif defined(_MSC_VER) -# if defined(SNMALLOC_VA_BITS_64) +# ifdef _WIN64 size_t high_prod; size_t prod = _umul128(x, y, &high_prod); overflow = high_prod != 0; return prod; # else - size_t prod; - overflow = S_OK != UIntMult(x, y, &prod); + UINT prod; + overflow = S_OK != UIntMult(UINT(x), UINT(y), &prod); return prod; # endif #else @@ -220,7 +225,7 @@ namespace snmalloc return (x & (x - 1)) == 0; } - SNMALLOC_FAST_PATH size_t next_pow2(size_t x) + inline SNMALLOC_FAST_PATH size_t next_pow2(size_t x) { // Correct for numbers [0..MAX_SIZE >> 1). // Returns 1 for x > (MAX_SIZE >> 1). @@ -247,12 +252,20 @@ namespace snmalloc return one_at_bit(BITS - clz_const(x - 1)); } - constexpr size_t next_pow2_bits_const(size_t x) + constexpr size_t prev_pow2_const(size_t x) + { + if (x <= 2) + return x; + + return one_at_bit(BITS - (clz_const(x + 1) + 1)); + } + + inline constexpr size_t next_pow2_bits_const(size_t x) { return BITS - clz_const(x - 1); } - constexpr SNMALLOC_FAST_PATH size_t + inline constexpr SNMALLOC_FAST_PATH size_t align_down(size_t value, size_t alignment) { SNMALLOC_ASSERT(is_pow2(alignment)); @@ -262,7 +275,8 @@ namespace snmalloc return value; } - constexpr SNMALLOC_FAST_PATH size_t align_up(size_t value, size_t alignment) + inline constexpr SNMALLOC_FAST_PATH size_t + align_up(size_t value, size_t alignment) { SNMALLOC_ASSERT(is_pow2(alignment)); diff --git a/src/snmalloc/ds_core/concept.h b/src/snmalloc/ds_core/concept.h new file mode 100644 index 000000000..2e4824426 --- /dev/null +++ b/src/snmalloc/ds_core/concept.h @@ -0,0 +1,78 @@ +#pragma once + +#include + +/** + * C++20 concepts are referenced as if they were types in declarations within + * template parameters (e.g. "template ..."). That is, they + * take the place of the "typename"/"class" keyword on template parameters. + * If the compiler understands concepts, this macro expands as its argument; + * otherwise, it expands to the keyword "typename", so snmalloc templates that + * use concept-qualified parameters should use this to remain compatible across + * C++ versions: "template" + */ +#ifdef __cpp_concepts +# define SNMALLOC_CONCEPT(c) c +#else +# define SNMALLOC_CONCEPT(c) typename +#endif + +#ifdef __cpp_concepts +namespace snmalloc +{ + /** + * C++20 concepts are more than just new syntax; there's a new support + * library specified as well. As C++20 is quite new, however, there are some + * environments, notably Clang, that understand the syntax but do not yet + * offer the library. Fortunately, alternate pronouciations are possible. + */ +# ifdef _cpp_lib_concepts + /** + * ConceptSame is true if T and U are the same type and false otherwise. + * When specifying a concept, use ConceptSame to indicate that an + * expression must evaluate precisely to the type U. + */ + template + concept ConceptSame = std::same_as; +# else + template + concept ConceptSame = std::is_same::value; +# endif + + /** + * Equivalence mod std::remove_reference + */ + template + concept ConceptSameModRef = + ConceptSame, std::remove_reference_t>; + + /** + * Some of the types in snmalloc are circular in their definition and use + * templating as a lazy language to carefully tie knots and only pull on the + * whole mess once it's assembled. Unfortunately, concepts amount to eagerly + * demanding the result of the computation. If concepts come into play during + * the circular definition, they may see an incomplete type and so fail (with + * "incomplete type ... used in type trait expression" or similar). However, + * it turns out that SFINAE gives us a way to detect whether a template + * parameter refers to an incomplete type, and short circuit evaluation means + * we can bail on concept checking if we find ourselves in this situation. + * + * See https://devblogs.microsoft.com/oldnewthing/20190710-00/?p=102678 + * + * Unfortunately, C++20 concepts are not first-order things and, in + * particular, cannot themselves be template parameters. So while we would + * love to write a generic Lazy combinator, + * + * template concept C, typename T> + * concept Lazy = !is_type_complete_v || C(); + * + * this will instead have to be inlined at every definition (and referred to + * explicitly at call sites) until C++23 or later. + */ + template + constexpr bool is_type_complete_v = false; + template + constexpr bool is_type_complete_v> = true; + +} // namespace snmalloc +#endif diff --git a/src/snmalloc/ds_core/defines.h b/src/snmalloc/ds_core/defines.h new file mode 100644 index 000000000..98fbf39aa --- /dev/null +++ b/src/snmalloc/ds_core/defines.h @@ -0,0 +1,225 @@ +#pragma once +#include + +#if defined(_MSC_VER) && !defined(__clang__) +// 28 is FAST_FAIL_INVALID_BUFFER_ACCESS. Not using the symbolic constant to +// avoid depending on winnt.h +# include // for __fastfail +# define SNMALLOC_FAST_FAIL() __fastfail(28) +# define ALWAYSINLINE __forceinline +# define NOINLINE __declspec(noinline) +# define SNMALLOC_LIKELY(x) !!(x) +# define SNMALLOC_UNLIKELY(x) !!(x) +# define SNMALLOC_SLOW_PATH NOINLINE +# define SNMALLOC_FAST_PATH ALWAYSINLINE +/** + * Fast path with inline linkage. MSVC assumes that `__forceinline` implies + * `inline` and complains if you specify `SNMALLOC_FAST_PATH` and `inline`. + */ +# define SNMALLOC_FAST_PATH_INLINE ALWAYSINLINE +# if _MSC_VER >= 1927 && !defined(SNMALLOC_USE_CXX17) +# define SNMALLOC_FAST_PATH_LAMBDA [[msvc::forceinline]] +# else +# define SNMALLOC_FAST_PATH_LAMBDA +# endif +# define SNMALLOC_PURE +# define SNMALLOC_COLD +# define SNMALLOC_REQUIRE_CONSTINIT +# define SNMALLOC_UNUSED_FUNCTION +# define SNMALLOC_USED_FUNCTION +#else +# define SNMALLOC_FAST_FAIL() __builtin_trap() +# define SNMALLOC_LIKELY(x) __builtin_expect(!!(x), 1) +# define SNMALLOC_UNLIKELY(x) __builtin_expect(!!(x), 0) +# define ALWAYSINLINE __attribute__((always_inline)) +# define NOINLINE __attribute__((noinline)) +# define SNMALLOC_SLOW_PATH NOINLINE +# define SNMALLOC_FAST_PATH ALWAYSINLINE +/** + * Fast path with inline linkage. GCC assumes that + * `__attribute__((always_inline))` is orthogonal to `inline` and complains if + * you specify `SNMALLOC_FAST_PATH` and don't specify `inline` in places where + * `inline` would be required for the one definition rule. The error message + * in this case is confusing: always-inline function may not be inlined. If + * you see this error message when using `SNMALLOC_FAST_PATH` then switch to + * `SNMALLOC_FAST_PATH_INLINE`. + */ +# define SNMALLOC_FAST_PATH_INLINE ALWAYSINLINE inline +# define SNMALLOC_FAST_PATH_LAMBDA SNMALLOC_FAST_PATH +# define SNMALLOC_PURE __attribute__((const)) +# define SNMALLOC_COLD __attribute__((cold)) +# define SNMALLOC_UNUSED_FUNCTION __attribute((unused)) +# define SNMALLOC_USED_FUNCTION __attribute((used)) +# ifdef __clang__ +# define SNMALLOC_REQUIRE_CONSTINIT \ + [[clang::require_constant_initialization]] +# else +# define SNMALLOC_REQUIRE_CONSTINIT +# endif +#endif + +#if defined(__cpp_constinit) && __cpp_constinit >= 201907 +# define SNMALLOC_CONSTINIT_FN constinit +# define SNMALLOC_CONSTINIT_STATIC constinit const +#else +# define SNMALLOC_CONSTINIT_FN constexpr +# define SNMALLOC_CONSTINIT_STATIC constexpr +#endif + +#if defined(__cpp_consteval) +# define SNMALLOC_CONSTEVAL consteval +#else +# define SNMALLOC_CONSTEVAL constexpr +#endif + +#if !defined(__clang__) && defined(__GNUC__) +# define GCC_NOT_CLANG +#endif + +#ifdef GCC_NOT_CLANG +# if __GNUC__ >= 8 +# define GCC_VERSION_EIGHT_PLUS +# endif +#endif + +#ifdef __APPLE__ +# define SNMALLOC_FORCE_BSS __attribute__((section("__DATA,__bss"))) +#elif defined(__ELF__) +# define SNMALLOC_FORCE_BSS __attribute__((section(".bss"))) +#else +# define SNMALLOC_FORCE_BSS +#endif + +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +namespace snmalloc +{ +#ifdef NDEBUG + static constexpr bool DEBUG = false; +#else + static constexpr bool DEBUG = true; +#endif + + // Forwards reference so that the platform can define how to handle errors. + [[noreturn]] SNMALLOC_COLD void error(const char* const str); +} // namespace snmalloc + +#define TOSTRING(expr) TOSTRING2(expr) +#define TOSTRING2(expr) #expr + +#ifdef NDEBUG +# define SNMALLOC_ASSERT_MSG(...) \ + {} +#else +# define SNMALLOC_ASSERT_MSG(expr, fmt, ...) \ + do \ + { \ + if (!(expr)) \ + { \ + snmalloc::report_fatal_error( \ + "assert fail: {} in {} on {} " fmt "\n", \ + #expr, \ + __FILE__, \ + TOSTRING(__LINE__), \ + ##__VA_ARGS__); \ + } \ + } while (0) +#endif +#define SNMALLOC_ASSERT(expr) SNMALLOC_ASSERT_MSG(expr, "") + +#define SNMALLOC_CHECK_MSG(expr, fmt, ...) \ + do \ + { \ + if (!(expr)) \ + { \ + snmalloc::report_fatal_error( \ + "Check fail: {} in {} on {} " fmt "\n", \ + #expr, \ + __FILE__, \ + TOSTRING(__LINE__), \ + ##__VA_ARGS__); \ + } \ + } while (0) + +#define SNMALLOC_CHECK(expr) SNMALLOC_CHECK_MSG(expr, "") + +#ifndef NDEBUG +# define SNMALLOC_ASSUME(x) SNMALLOC_ASSERT(x) +#else +# if __has_builtin(__builtin_assume) +# define SNMALLOC_ASSUME(x) __builtin_assume((x)) +# elif defined(_MSC_VER) +# define SNMALLOC_ASSUME(x) __assume((x)); +# elif defined(__GNUC__) +# define SNMALLOC_ASSUME(x) \ + if (!(x)) \ + __builtin_unreachable(); +# else +# define SNMALLOC_ASSUME(x) \ + do \ + { \ + } while (0) +# endif +#endif + +namespace snmalloc +{ + /** + * Forward declaration so that this can be called before the pal header is + * included. + */ + template + [[noreturn]] inline void report_fatal_error(Args... args); + + /** + * Forward declaration so that this can be called before the pal header is + * included. + */ + template + inline void message(Args... args); + + template + SNMALLOC_FAST_PATH_INLINE void UNUSED(Args&&...) + {} + + template + inline SNMALLOC_FAST_PATH void + check_client_error(const char* const str, Args... args) + { + //[[clang::musttail]] + return snmalloc::report_fatal_error(str, args...); + } + + template + inline SNMALLOC_FAST_PATH void + check_client_impl(bool test, const char* const str, Args... args) + { + if (SNMALLOC_UNLIKELY(!test)) + { + if constexpr (!DEBUG) + { + UNUSED(str, args...); + SNMALLOC_FAST_FAIL(); + } + else + { + check_client_error(str, args...); + } + } + } + +#ifdef SNMALLOC_CHECK_CLIENT + static constexpr bool CHECK_CLIENT = true; +#else + static constexpr bool CHECK_CLIENT = false; +#endif +} // namespace snmalloc + +#ifdef SNMALLOC_CHECK_CLIENT +# define snmalloc_check_client(test, str, ...) \ + snmalloc::check_client_impl(test, str, ##__VA_ARGS__) +#else +# define snmalloc_check_client(test, str, ...) +#endif diff --git a/src/snmalloc/ds_core/ds_core.h b/src/snmalloc/ds_core/ds_core.h new file mode 100644 index 000000000..672f7d1b0 --- /dev/null +++ b/src/snmalloc/ds_core/ds_core.h @@ -0,0 +1,16 @@ +#pragma once +/** + * The core definitions for snmalloc. These provide some basic helpers that do + * not depend on anything except for a working C++ implementation. + * + * Files in this directory may not include anything from any other directory in + * snmalloc. + */ + +#include "bits.h" +#include "concept.h" +#include "defines.h" +#include "helpers.h" +#include "ptrwrap.h" +#include "redblacktree.h" +#include "seqset.h" diff --git a/src/snmalloc/ds_core/helpers.h b/src/snmalloc/ds_core/helpers.h new file mode 100644 index 000000000..7b2cca2c6 --- /dev/null +++ b/src/snmalloc/ds_core/helpers.h @@ -0,0 +1,467 @@ +#pragma once + +#include "bits.h" + +#include +#include +#include +#include +#include + +namespace snmalloc +{ + /** + * Wrapper for wrapping values. + * + * Wraps on read. This allows code to trust the value is in range, even when + * there is a memory corruption. + */ + template + class Mod + { + static_assert(bits::is_pow2(length), "Must be a power of two."); + + private: + T value = 0; + + public: + operator T() + { + return static_cast(value & (length - 1)); + } + + Mod& operator=(const T v) + { + value = v; + return *this; + } + }; + +#ifdef SNMALLOC_CHECK_CLIENT + template + class ModArray + { + /** + * Align the elements, so that access is cheaper. + */ + struct alignas(bits::next_pow2_const(sizeof(T))) TWrap + { + T v; + }; + + static constexpr size_t rlength = bits::next_pow2_const(length); + std::array array; + + public: + constexpr const T& operator[](const size_t i) const + { + return array[i & (rlength - 1)].v; + } + + constexpr T& operator[](const size_t i) + { + return array[i & (rlength - 1)].v; + } + }; +#else + template + using ModArray = std::array; +#endif + + /** + * Helper class to execute a specified function on destruction. + */ + template + class OnDestruct + { + F f; + + public: + OnDestruct(F f) : f(f) {} + + ~OnDestruct() + { + f(); + } + }; + + /** + * Non-owning version of std::function. Wraps a reference to a callable object + * (eg. a lambda) and allows calling it through dynamic dispatch, with no + * allocation. This is useful in the allocator code paths, where we can't + * safely use std::function. + * + * Inspired by the C++ proposal: + * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0792r2.html + */ + template + struct function_ref; + template + struct function_ref + { + // The enable_if is used to stop this constructor from shadowing the default + // copy / move constructors. + template< + typename Fn, + typename = + std::enable_if_t, function_ref>>> + function_ref(Fn&& fn) + { + data_ = static_cast(&fn); + fn_ = execute; + } + + R operator()(Args... args) const + { + return fn_(data_, args...); + } + + private: + void* data_; + R (*fn_)(void*, Args...); + + template + static R execute(void* p, Args... args) + { + return (*static_cast>(p))(args...); + }; + }; + + template typename Ptr> + void ignore(Ptr t) + { + UNUSED(t); + } + + /** + * Sometimes we need atomics with trivial initializer. Unfortunately, this + * became harder to accomplish in C++20. Fortunately, our rules for accessing + * these are at least as strong as those required by C++20's atomic_ref: + * + * * The objects outlive any references to them + * + * * We always access the objects through references (though we'd be allowed + * to access them without if we knew there weren't other references) + * + * * We don't access sub-objects at all, much less concurrently through + * other references. + */ + template + class TrivialInitAtomic + { + static_assert( + std::is_trivially_default_constructible_v, + "TrivialInitAtomic should not attempt to call nontrivial constructors"); + +#ifdef __cpp_lib_atomic_ref + using Val = T; + using Ref = std::atomic_ref; +#else + using Val = std::atomic; + using Ref = std::atomic&; +#endif + Val v; + + public: + /** + * Construct a reference to this value; use .load and .store to manipulate + * the value. + */ + SNMALLOC_FAST_PATH Ref ref() + { +#ifdef __cpp_lib_atomic_ref + return std::atomic_ref(this->v); +#else + return this->v; +#endif + } + + SNMALLOC_FAST_PATH T + load(std::memory_order mo = std::memory_order_seq_cst) noexcept + { + return this->ref().load(mo); + } + + SNMALLOC_FAST_PATH void + store(T n, std::memory_order mo = std::memory_order_seq_cst) noexcept + { + return this->ref().store(n, mo); + } + + SNMALLOC_FAST_PATH bool compare_exchange_strong( + T& exp, T des, std::memory_order mo = std::memory_order_seq_cst) noexcept + { + return this->ref().compare_exchange_strong(exp, des, mo); + } + + SNMALLOC_FAST_PATH T + exchange(T des, std::memory_order mo = std::memory_order_seq_cst) noexcept + { + return this->ref().exchange(des, mo); + } + + template + SNMALLOC_FAST_PATH + typename std::enable_if::value, Q>::type + fetch_add( + Q arg, std::memory_order mo = std::memory_order_seq_cst) noexcept + { + return this->ref().fetch_add(arg, mo); + } + }; + + static_assert(sizeof(TrivialInitAtomic) == sizeof(char)); + static_assert(alignof(TrivialInitAtomic) == alignof(char)); + + /** + * Helper class for building fatal errors. Used by `report_fatal_error` to + * build an on-stack buffer containing the formatted string. + */ + template + class MessageBuilder + { + /** + * The buffer that is used to store the formatted output. + */ + std::array buffer; + + /** + * Space in the buffer, excluding a trailing null terminator. + */ + static constexpr size_t SafeLength = BufferSize - 1; + + /** + * The insert position within `buffer`. + */ + size_t insert = 0; + + /** + * Add argument `i` from the tuple `args` to the output. This is + * implemented recursively because the different tuple elements can have + * different types and so the code for dispatching will depend on the type + * at the index. The compiler will lower this to a jump table in optimised + * builds. + */ + template + void add_tuple_arg(size_t i, const std::tuple& args) + { + if (i == I) + { + append(std::get(args)); + } + else if constexpr (I != 0) + { + add_tuple_arg(i, args); + } + } + + /** + * Append a single character into the buffer. This is the single primitive + * operation permitted on the buffer and performs bounds checks to ensure + * that there is space for the character and for a null terminator. + */ + void append_char(char c) + { + if (insert < SafeLength) + { + buffer[insert++] = c; + } + } + + /** + * Append a string to the buffer. + */ + void append(std::string_view sv) + { + for (auto c : sv) + { + append_char(c); + } + } + + /* + * TODO: This is not quite the right thing we want to check, but it + * suffices on all currently-supported platforms and CHERI. We'd rather + * compare UINTPTR_WIDTH and ULLONG_WIDTH, I think, but those don't + * exist until C's FP Ext 1 TS (merged into C2x). + */ +#ifdef __CHERI_PURE_CAPABILITY__ + /** + * Append an intptr_t to the buffer as a hex string + */ + void append(intptr_t v) + { + append(reinterpret_cast(v)); + } + + /** + * Append a uintptr_t to the buffer as a hex string + */ + void append(uintptr_t v) + { + append(reinterpret_cast(v)); + } +#endif + + /** + * Append a raw pointer to the buffer as a hex string. + */ + void append(void* ptr) + { + append(static_cast(reinterpret_cast(ptr))); + // TODO: CHERI bits. + } + + /** + * Append a signed integer to the buffer, as a decimal string. + */ + void append(long long s) + { + if (s < 0) + { + append_char('-'); + s = 0 - s; + } + std::array buf; + const char digits[] = "0123456789"; + for (long i = long(buf.size() - 1); i >= 0; i--) + { + buf[static_cast(i)] = digits[s % 10]; + s /= 10; + } + bool skipZero = true; + for (auto c : buf) + { + if (skipZero && (c == '0')) + { + continue; + } + skipZero = false; + append_char(c); + } + if (skipZero) + { + append_char('0'); + } + } + + /** + * Append a size to the buffer, as a hex string. + */ + void append(unsigned long long s) + { + append_char('0'); + append_char('x'); + std::array buf; + const char hexdigits[] = "0123456789abcdef"; + // Length of string including null terminator + static_assert(sizeof(hexdigits) == 0x11); + for (long i = long(buf.size() - 1); i >= 0; i--) + { + buf[static_cast(i)] = hexdigits[s & 0xf]; + s >>= 4; + } + bool skipZero = true; + for (auto c : buf) + { + if (skipZero && (c == '0')) + { + continue; + } + skipZero = false; + append_char(c); + } + if (skipZero) + { + append_char('0'); + } + } + + /** + * Overload to force `long` to be promoted to `long long`. + */ + void append(long x) + { + append(static_cast(x)); + } + + /** + * Overload to force `unsigned long` to be promoted to `unsigned long long`. + */ + void append(unsigned long x) + { + append(static_cast(x)); + } + + /** + * Overload to force `int` to be promoted to `long long`. + */ + void append(int x) + { + append(static_cast(x)); + } + + /** + * Overload to force `unsigned int` to be promoted to `unsigned long long`. + */ + void append(unsigned int x) + { + append(static_cast(x)); + } + + public: + /** + * Constructor. Takes a format string and the arguments to output. + */ + template + SNMALLOC_FAST_PATH MessageBuilder(const char* fmt, Args... args) + { + buffer[SafeLength] = 0; + size_t arg = 0; + auto args_tuple = std::forward_as_tuple(args...); + for (const char* s = fmt; *s != 0; ++s) + { + if (s[0] == '{' && s[1] == '}') + { + add_tuple_arg(arg++, args_tuple); + ++s; + } + else + { + append_char(*s); + } + } + append_char('\0'); + } + + /** + * Constructor for trivial format strings (no arguments). This exists to + * allow `MessageBuilder` to be used with macros without special casing + * the single-argument version. + */ + SNMALLOC_FAST_PATH MessageBuilder(const char* fmt) + { + buffer[SafeLength] = 0; + for (const char* s = fmt; *s != 0; ++s) + { + append_char(*s); + } + append_char('\0'); + } + + /** + * Return the error buffer. + */ + const char* get_message() + { + return buffer.data(); + } + }; + + /** + * Convenience type that has no fields / methods. + */ + struct Empty + {}; + +} // namespace snmalloc diff --git a/src/snmalloc/ds_core/ptrwrap.h b/src/snmalloc/ds_core/ptrwrap.h new file mode 100644 index 000000000..612adbd7d --- /dev/null +++ b/src/snmalloc/ds_core/ptrwrap.h @@ -0,0 +1,551 @@ +#pragma once + +#include "concept.h" +#include "defines.h" + +#include + +namespace snmalloc +{ + /* + * reinterpret_cast<> is a powerful primitive that, excitingly, does not + * require the programmer to annotate the expected *source* type. We + * therefore wrap its use to interconvert between uintptr_t and pointer types. + */ + + /** + * Convert a pointer to a uintptr_t. Template argument inference is + * prohibited. + */ + template + SNMALLOC_FAST_PATH_INLINE uintptr_t + unsafe_to_uintptr(std::enable_if_t* p) + { + return reinterpret_cast(p); + } + + /** Convert a uintptr_t to a T*. */ + template + SNMALLOC_FAST_PATH_INLINE T* unsafe_from_uintptr(uintptr_t p) + { + return reinterpret_cast(p); + } + + /** + * To assist in providing a uniform interface regardless of pointer wrapper, + * we also export intrinsic pointer and atomic pointer aliases, as the postfix + * type constructor '*' does not work well as a template parameter and we + * don't have inline type-level functions. + */ + template + using Pointer = T*; + + template + using AtomicPointer = std::atomic; + + /** + * Summaries of StrictProvenance metadata. We abstract away the particular + * size and any offset into the bounds. + */ + + namespace capptr + { + namespace dimension + { + /* + * Describe the spatial extent (intended to be) authorized by a pointer. + * + * Bounds dimensions are sorted so that < reflects authority. + */ + enum class Spatial + { + /** + * Bounded to a particular allocation (which might be Large!) + */ + Alloc, + /** + * Bounded to one or more particular chunk granules + */ + Chunk, + /** + * Unbounded return from the kernel. These correspond, on CHERI + * platforms, to kernel-side address space reservations. + */ + Arena + }; + + /** + * On some platforms (e.g., CHERI), pointers can be checked to see whether + * they authorize control of the address space. See the PAL's + * capptr_to_user_address_control(). + */ + enum class AddressSpaceControl + { + /** + * All intended control constraints have been applied. For example, on + * CheriBSD, the VMMAP permission has been stripped and so this CapPtr<> + * cannot authorize manipulation of the address space itself, though it + * continues to authorize loads and stores. + */ + User, + /** + * No control constraints have been applied. On CheriBSD, specifically, + * this grants control of the address space (via mmap and friends) and + * in Cornucopia exempts the pointer from revocation (as long as the + * mapping remains in place, but snmalloc does not presently tear down + * its own mappings.) + */ + Full + }; + + /** + * Distinguish pointers proximate provenance: pointers given to us by + * clients can be arbitrarily malformed while pointers from the kernel or + * internally can be presumed well-formed. See the Backend's + * capptr_domesticate(). + */ + enum class Wildness + { + /** + * The purported "pointer" here may just be a pile of bits. On CHERI + * architectures, for example, it may not have a set tag or may be out + * of bounds. + */ + Wild, + /** + * Either this pointer has provenance from the kernel or it has been + * checked by capptr_dewild. + */ + Tame + }; + } // namespace dimension + + /** + * The aggregate type of a bound: a Cartesian product of the individual + * dimensions, above. + */ + template< + dimension::Spatial S, + dimension::AddressSpaceControl AS, + dimension::Wildness W> + struct bound + { + static constexpr enum dimension::Spatial spatial = S; + static constexpr enum dimension::AddressSpaceControl + address_space_control = AS; + static constexpr enum dimension::Wildness wildness = W; + + /** + * Set just the spatial component of the bounds + */ + template + using with_spatial = bound; + + /** + * Set just the address space control component of the bounds + */ + template + using with_address_space_control = bound; + + /** + * Set just the wild component of the bounds + */ + template + using with_wildness = bound; + + /* The dimensions here are not used completely orthogonally */ + static_assert( + !(W == dimension::Wildness::Wild) || + (S == dimension::Spatial::Alloc && + AS == dimension::AddressSpaceControl::User), + "Wild pointers must be annotated as tightly bounded"); + static_assert( + (S != dimension::Spatial::Arena) || + (W == dimension::Wildness::Tame && + AS == dimension::AddressSpaceControl::Full), + "Arena pointers must be restricted spatially before other dimensions"); + }; + + // clang-format off +#ifdef __cpp_concepts + /* + * This is spelled a little differently from our other concepts because GCC + * treats "{ T::spatial }" as generating a reference and then complains that + * it isn't "ConceptSame", though clang is perfectly happy + * with that spelling. Both seem happy with this formulation. + */ + template + concept IsBound = + ConceptSame && + ConceptSame && + ConceptSame; +#endif + // clang-format on + + /* + * Several combinations are used often enough that we give convenient + * aliases for them. + */ + namespace bounds + { + /** + * Internal access to an entire Arena. These exist only in the backend. + */ + using Arena = bound< + dimension::Spatial::Arena, + dimension::AddressSpaceControl::Full, + dimension::Wildness::Tame>; + + /** + * Internal access to a Chunk of memory. These flow across the boundary + * between back- and front-ends, for example. + */ + using Chunk = bound< + dimension::Spatial::Chunk, + dimension::AddressSpaceControl::Full, + dimension::Wildness::Tame>; + + /** + * User access to an entire Chunk. Used as an ephemeral state when + * returning a large allocation. See capptr_chunk_is_alloc. + */ + using ChunkUser = + Chunk::with_address_space_control; + + /** + * Internal access to just one allocation (usually, within a slab). + */ + using AllocFull = Chunk::with_spatial; + + /** + * User access to just one allocation (usually, within a slab). + */ + using Alloc = AllocFull::with_address_space_control< + dimension::AddressSpaceControl::User>; + + /** + * A wild (i.e., putative) CBAllocExport pointer handed back by the + * client. See capptr_from_client() and capptr_domesticate(). + */ + using AllocWild = Alloc::with_wildness; + } // namespace bounds + + /** + * Compute the AddressSpaceControl::User variant of a capptr::bound + * annotation. This is used by the PAL's capptr_to_user_address_control + * function to compute its return value's annotation. + */ + template + using user_address_control_type = + typename B::template with_address_space_control< + dimension::AddressSpaceControl::User>; + + /** + * Determine whether BI is a spatial refinement of BO. + * Chunk and ChunkD are considered eqivalent here. + */ + template< + SNMALLOC_CONCEPT(capptr::IsBound) BI, + SNMALLOC_CONCEPT(capptr::IsBound) BO> + SNMALLOC_CONSTEVAL bool is_spatial_refinement() + { + if (BI::address_space_control != BO::address_space_control) + { + return false; + } + + if (BI::wildness != BO::wildness) + { + return false; + } + + return BO::spatial <= BI::spatial; + } + } // namespace capptr + + /** + * A pointer annotated with a "phantom type parameter" carrying a static + * summary of its StrictProvenance metadata. + */ + template + class CapPtr + { + T* unsafe_capptr; + + public: + /** + * nullptr is implicitly constructable at any bounds type + */ + constexpr SNMALLOC_FAST_PATH CapPtr(const std::nullptr_t n) + : unsafe_capptr(n) + {} + + constexpr SNMALLOC_FAST_PATH CapPtr() : CapPtr(nullptr) {} + + private: + /** + * all other constructions must be explicit + * + * Unfortunately, MSVC gets confused if an Allocator is instantiated in a + * way that never needs initialization (as our sandbox test does, for + * example) and, in that case, declares this constructor unreachable, + * presumably after some heroic feat of inlining that has also lost any + * semblance of context. See the blocks tagged "CapPtr-vs-MSVC" for where + * this has been observed. + */ +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable : 4702) +#endif + constexpr explicit SNMALLOC_FAST_PATH CapPtr(T* p) : unsafe_capptr(p) {} +#ifdef _MSC_VER +# pragma warning(pop) +#endif + + public: + /** + * The CapPtr constructor is not sufficiently intimidating, given that it + * can be used to break annotation correctness. Expose it with a better + * name. + */ + static constexpr SNMALLOC_FAST_PATH CapPtr unsafe_from(T* p) + { + return CapPtr(p); + } + + /** + * Allow static_cast<>-s that preserve bounds but vary the target type. + */ + template + [[nodiscard]] SNMALLOC_FAST_PATH CapPtr as_static() const + { + return CapPtr::unsafe_from( + static_cast(this->unsafe_capptr)); + } + + [[nodiscard]] SNMALLOC_FAST_PATH CapPtr as_void() const + { + return this->as_static(); + } + + /** + * A more aggressive bounds-preserving cast, using reinterpret_cast + */ + template + [[nodiscard]] SNMALLOC_FAST_PATH CapPtr as_reinterpret() const + { + return CapPtr::unsafe_from( + reinterpret_cast(this->unsafe_capptr)); + } + + SNMALLOC_FAST_PATH bool operator==(const CapPtr& rhs) const + { + return this->unsafe_capptr == rhs.unsafe_capptr; + } + + SNMALLOC_FAST_PATH bool operator!=(const CapPtr& rhs) const + { + return this->unsafe_capptr != rhs.unsafe_capptr; + } + + SNMALLOC_FAST_PATH bool operator<(const CapPtr& rhs) const + { + return this->unsafe_capptr < rhs.unsafe_capptr; + } + + SNMALLOC_FAST_PATH T* operator->() const + { + static_assert( + bounds::wildness != capptr::dimension::Wildness::Wild, + "Trying to dereference a Wild pointer"); + return this->unsafe_capptr; + } + + [[nodiscard]] SNMALLOC_FAST_PATH T* unsafe_ptr() const + { + return this->unsafe_capptr; + } + + [[nodiscard]] SNMALLOC_FAST_PATH uintptr_t unsafe_uintptr() const + { + return unsafe_to_uintptr(this->unsafe_capptr); + } + }; + + namespace capptr + { + /* + * Aliases for CapPtr<> types with particular bounds. + */ + + template + using Arena = CapPtr; + + template + using Chunk = CapPtr; + + template + using ChunkUser = CapPtr; + + template + using AllocFull = CapPtr; + + template + using Alloc = CapPtr; + + template + using AllocWild = CapPtr; + + } // namespace capptr + + static_assert(sizeof(capptr::Chunk) == sizeof(void*)); + static_assert(alignof(capptr::Chunk) == alignof(void*)); + + /** + * Sometimes (with large allocations) we really mean the entire chunk (or even + * several chunks) to be the allocation. + */ + template + inline SNMALLOC_FAST_PATH capptr::Alloc + capptr_chunk_is_alloc(capptr::ChunkUser p) + { + return capptr::Alloc::unsafe_from(p.unsafe_ptr()); + } + + /** + * With all the bounds and constraints in place, it's safe to extract a void + * pointer (to reveal to the client). Roughly dual to capptr_from_client(), + * but we stop oursevles from revealing anything not known to be domesticated. + */ + inline SNMALLOC_FAST_PATH void* capptr_reveal(capptr::Alloc p) + { + return p.unsafe_ptr(); + } + + /** + * Given a void* from the client, it's fine to call it AllocWild. + * Roughly dual to capptr_reveal(). + */ + static inline SNMALLOC_FAST_PATH capptr::AllocWild + capptr_from_client(void* p) + { + return capptr::AllocWild::unsafe_from(p); + } + + /** + * It's safe to mark any CapPtr as Wild. + */ + template + static inline SNMALLOC_FAST_PATH CapPtr< + T, + typename B::template with_wildness> + capptr_rewild(CapPtr p) + { + return CapPtr< + T, + typename B::template with_wildness>:: + unsafe_from(p.unsafe_ptr()); + } + + /** + * + * Wrap a std::atomic with bounds annotation and speak in terms of + * bounds-annotated pointers at the interface. + * + * Note the membranous sleight of hand being pulled here: this class puts + * annotations around an un-annotated std::atomic, to appease C++, yet + * will expose or consume only CapPtr with the same bounds annotation. + */ + template + class AtomicCapPtr + { + std::atomic unsafe_capptr; + + public: + /** + * nullptr is constructable at any bounds type + */ + constexpr SNMALLOC_FAST_PATH AtomicCapPtr(const std::nullptr_t n) + : unsafe_capptr(n) + {} + + /** + * Interconversion with CapPtr + */ + constexpr SNMALLOC_FAST_PATH AtomicCapPtr(CapPtr p) + : unsafe_capptr(p.unsafe_capptr) + {} + + operator CapPtr() const noexcept + { + return CapPtr(this->unsafe_capptr); + } + + // Our copy-assignment operator follows std::atomic and returns a copy of + // the RHS. Clang finds this surprising; we suppress the warning. + // NOLINTNEXTLINE(misc-unconventional-assign-operator) + SNMALLOC_FAST_PATH CapPtr operator=(CapPtr p) noexcept + { + this->store(p); + return p; + } + + SNMALLOC_FAST_PATH CapPtr + load(std::memory_order order = std::memory_order_seq_cst) noexcept + { + return CapPtr::unsafe_from(this->unsafe_capptr.load(order)); + } + + SNMALLOC_FAST_PATH void store( + CapPtr desired, + std::memory_order order = std::memory_order_seq_cst) noexcept + { + this->unsafe_capptr.store(desired.unsafe_ptr(), order); + } + + SNMALLOC_FAST_PATH CapPtr exchange( + CapPtr desired, + std::memory_order order = std::memory_order_seq_cst) noexcept + { + return CapPtr::unsafe_from( + this->unsafe_capptr.exchange(desired.unsafe_ptr(), order)); + } + + SNMALLOC_FAST_PATH bool operator==(const AtomicCapPtr& rhs) const + { + return this->unsafe_capptr == rhs.unsafe_capptr; + } + + SNMALLOC_FAST_PATH bool operator!=(const AtomicCapPtr& rhs) const + { + return this->unsafe_capptr != rhs.unsafe_capptr; + } + + SNMALLOC_FAST_PATH bool operator<(const AtomicCapPtr& rhs) const + { + return this->unsafe_capptr < rhs.unsafe_capptr; + } + }; + + namespace capptr + { + /* + * Aliases for AtomicCapPtr<> types with particular bounds. + */ + + template + using AtomicChunk = AtomicCapPtr; + + template + using AtomicChunkUser = AtomicCapPtr; + + template + using AtomicAllocFull = AtomicCapPtr; + + template + using AtomicAlloc = AtomicCapPtr; + + } // namespace capptr + +} // namespace snmalloc diff --git a/src/snmalloc/ds_core/redblacktree.h b/src/snmalloc/ds_core/redblacktree.h new file mode 100644 index 000000000..0d684698d --- /dev/null +++ b/src/snmalloc/ds_core/redblacktree.h @@ -0,0 +1,778 @@ +#pragma once + +#include +#include +#include +#include + +namespace snmalloc +{ +#ifdef __cpp_concepts + /** + * The representation must define two types. `Contents` defines some + * identifier that can be mapped to a node as a value type. `Handle` defines + * a reference to the storage, which can be used to update it. + * + * Conceptually, `Contents` is a node ID and `Handle` is a pointer to a node + * ID. + */ + template + concept RBRepTypes = requires() + { + typename Rep::Handle; + typename Rep::Contents; + }; + + /** + * The representation must define operations on the holder and contents + * types. It must be able to 'dereference' a holder with `get`, assign to it + * with `set`, set and query the red/black colour of a node with `set_red` and + * `is_red`. + * + * The `ref` method provides uniform access to the children of a node, + * returning a holder pointing to either the left or right child, depending on + * the direction parameter. + * + * The backend must also provide two constant values. + * `Rep::null` defines a value that, if returned from `get`, indicates a null + * value. `Rep::root` defines a value that, if constructed directly, indicates + * a null value and can therefore be used as the initial raw bit pattern of + * the root node. + */ + template + concept RBRepMethods = + requires(typename Rep::Handle hp, typename Rep::Contents k, bool b) + { + { + Rep::get(hp) + } + ->ConceptSame; + { + Rep::set(hp, k) + } + ->ConceptSame; + { + Rep::is_red(k) + } + ->ConceptSame; + { + Rep::set_red(k, b) + } + ->ConceptSame; + { + Rep::ref(b, k) + } + ->ConceptSame; + { + Rep::null + } + ->ConceptSameModRef; + { + typename Rep::Handle + { + const_cast< + std::remove_const_t>*>( + &Rep::root) + } + } + ->ConceptSame; + }; + + template + concept RBRep = // + RBRepTypes // + && RBRepMethods // + && ConceptSame< + decltype(Rep::null), + std::add_const_t>; +#endif + + /** + * Contains a self balancing binary tree. + * + * The template parameter Rep provides the representation of the nodes as a + * collection of functions and types that are requires. See the associated + * test for an example. + * + * run_checks enables invariant checking on the tree. Enabled in Debug. + * TRACE prints all the sets of the rebalancing operations. Only enabled by + * the test when debugging a specific failure. + */ + template< + SNMALLOC_CONCEPT(RBRep) Rep, + bool run_checks = DEBUG, + bool TRACE = false> + class RBTree + { + using H = typename Rep::Handle; + using K = typename Rep::Contents; + + // Container that behaves like a C++ Ref type to enable assignment + // to treat left, right and root uniformly. + class ChildRef + { + H ptr; + + public: + ChildRef() : ptr(nullptr) {} + + ChildRef(H p) : ptr(p) {} + + ChildRef(const ChildRef& other) = default; + + operator K() + { + return Rep::get(ptr); + } + + ChildRef& operator=(const ChildRef& other) = default; + + ChildRef& operator=(const K t) + { + // Use representations assigment, so we update the correct bits + // color and other things way also be stored in the Handle. + Rep::set(ptr, t); + return *this; + } + + /** + * Comparison operators. Note that these are nominal comparisons: + * they compare the identities of the references rather than the values + * referenced. + * comparison of the values held in these child references. + * @{ + */ + bool operator==(const ChildRef t) const + { + return ptr == t.ptr; + } + + bool operator!=(const ChildRef t) const + { + return ptr != t.ptr; + } + ///@} + + bool is_null() + { + return Rep::get(ptr) == Rep::null; + } + + /** + * Return the reference in some printable format defined by the + * representation. + */ + auto printable() + { + return Rep::printable(ptr); + } + }; + + // Root field of the tree + typename std::remove_const_t> + root{Rep::root}; + + static ChildRef get_dir(bool direction, K k) + { + return {Rep::ref(direction, k)}; + } + + ChildRef get_root() + { + return {H{&root}}; + } + + void invariant() + { + invariant(get_root()); + } + + /* + * Verify structural invariants. Returns the black depth of the `curr`ent + * node. + */ + int invariant(K curr, K lower = Rep::null, K upper = Rep::null) + { + if constexpr (!run_checks) + { + UNUSED(curr, lower, upper); + return 0; + } + else + { + if (curr == Rep::null) + return 1; + + if ( + ((lower != Rep::null) && Rep::compare(lower, curr)) || + ((upper != Rep::null) && Rep::compare(curr, upper))) + { + report_fatal_error( + "Invariant failed: {} is out of bounds {}..{}", + Rep::printable(curr), + Rep::printable(lower), + Rep::printable(upper)); + } + + if ( + Rep::is_red(curr) && + (Rep::is_red(get_dir(true, curr)) || + Rep::is_red(get_dir(false, curr)))) + { + report_fatal_error( + "Invariant failed: {} is red and has red child", + Rep::printable(curr)); + } + + int left_inv = invariant(get_dir(true, curr), lower, curr); + int right_inv = invariant(get_dir(false, curr), curr, upper); + + if (left_inv != right_inv) + { + report_fatal_error( + "Invariant failed: {} has different black depths", + Rep::printable(curr)); + } + + if (Rep::is_red(curr)) + return left_inv; + + return left_inv + 1; + } + } + + struct RBStep + { + ChildRef node; + bool dir = false; + + /** + * Update the step to point to a new node and direction. + */ + void set(ChildRef r, bool direction) + { + node = r; + dir = direction; + } + + /** + * Update the step to point to a new node and direction. + */ + void set(typename Rep::Handle r, bool direction) + { + set(ChildRef(r), direction); + } + }; + + public: + // Internal representation of a path in the tree. + // Exposed to allow for some composite operations to be defined + // externally. + class RBPath + { + friend class RBTree; + + std::array path; + size_t length = 0; + + RBPath(typename Rep::Handle root) : path{} + { + path[0].set(root, false); + length = 1; + } + + ChildRef ith(size_t n) + { + SNMALLOC_ASSERT(length >= n); + return path[length - n - 1].node; + } + + bool ith_dir(size_t n) + { + SNMALLOC_ASSERT(length >= n); + return path[length - n - 1].dir; + } + + ChildRef curr() + { + return ith(0); + } + + bool curr_dir() + { + return ith_dir(0); + } + + ChildRef parent() + { + return ith(1); + } + + bool parent_dir() + { + return ith_dir(1); + } + + ChildRef grand_parent() + { + return ith(2); + } + + // Extend path in `direction`. + // If `direction` contains `Rep::null`, do not extend the path. + // Returns false if path is not extended. + bool move(bool direction) + { + auto next = get_dir(direction, curr()); + if (next.is_null()) + return false; + path[length].set(next, direction); + length++; + return true; + } + + // Extend path in `direction`. + // If `direction` contains zero, do not extend the path. + // Returns false if path is extended with null. + bool move_inc_null(bool direction) + { + auto next = get_dir(direction, curr()); + path[length].set(next, direction); + length++; + return !(next.is_null()); + } + + // Remove top element from the path. + void pop() + { + SNMALLOC_ASSERT(length > 0); + length--; + } + + // If a path is changed in place, then some references can be stale. + // This rewalks the updated path, and corrects any internal references. + // `expected` is used to run the update, or if `false` used to check + // that no update is required. + void fixup(bool expected = true) + { + if (!run_checks && !expected) + return; + + // During a splice in remove the path can be invalidated, + // this refreshs the path so that the it refers to the spliced + // nodes fields. + // TODO optimise usage to avoid traversing whole path. + for (size_t i = 1; i < length; i++) + { + auto parent = path[i - 1].node; + auto& curr = path[i].node; + auto dir = path[i].dir; + auto actual = get_dir(dir, parent); + if (actual != curr) + { + if (!expected) + { + snmalloc::error("Performed an unexpected fixup."); + } + curr = actual; + } + } + } + + void print() + { + if constexpr (TRACE) + { + for (size_t i = 0; i < length; i++) + { + message<1024>( + " -> {} @ {} ({})", + Rep::printable(K(path[i].node)), + path[i].node.printable(), + path[i].dir); + } + } + } + }; + + private: + void debug_log(const char* msg, RBPath& path) + { + debug_log(msg, path, get_root()); + } + + void debug_log(const char* msg, RBPath& path, ChildRef base) + { + if constexpr (TRACE) + { + message<100>("------- {}", Rep::name()); + message<1024>(msg); + path.print(); + print(base); + } + else + { + UNUSED(msg, path, base); + } + } + + public: + constexpr RBTree() = default; + + void print() + { + print(get_root()); + } + + void print(ChildRef curr, const char* indent = "", size_t depth = 0) + { + if constexpr (TRACE) + { + if (curr.is_null()) + { + message<1024>("{}\\_null", indent); + return; + } + +#ifdef _MSC_VER + auto colour = Rep::is_red(curr) ? "R-" : "B-"; + auto reset = ""; +#else + auto colour = Rep::is_red(curr) ? "\e[1;31m" : "\e[1;34m"; + auto reset = "\e[0m"; +#endif + + message<1024>( + "{}\\_{}{}{}@{} ({})", + indent, + colour, + Rep::printable((K(curr))), + reset, + curr.printable(), + depth); + if (!(get_dir(true, curr).is_null() && get_dir(false, curr).is_null())) + { + auto s_indent = std::string(indent); + print(get_dir(true, curr), (s_indent + "|").c_str(), depth + 1); + print(get_dir(false, curr), (s_indent + " ").c_str(), depth + 1); + } + } + } + + bool find(RBPath& path, K value) + { + bool dir; + + if (path.curr().is_null()) + return false; + + do + { + if (Rep::equal(path.curr(), value)) + return true; + dir = Rep::compare(path.curr(), value); + } while (path.move_inc_null(dir)); + + return false; + } + + bool remove_path(RBPath& path) + { + ChildRef splice = path.curr(); + SNMALLOC_ASSERT(!(splice.is_null())); + + debug_log("Removing", path); + + /* + * Find immediately smaller leaf element (rightmost descendant of left + * child) to serve as the replacement for this node. We may not have a + * left subtree, so this may not move the path at all. + */ + path.move(true); + while (path.move(false)) + { + } + + K curr = path.curr(); + + { + // Locally extract right-child-less replacement, replacing it with its + // left child, if any + K child = get_dir(true, path.curr()); + // Unlink target replacing with possible child. + path.curr() = child; + } + + bool leaf_red = Rep::is_red(curr); + + if (path.curr() != splice) + { + // If we had a left child, replace ourselves with the extracted value + // from above + Rep::set_red(curr, Rep::is_red(splice)); + get_dir(true, curr) = K(get_dir(true, splice)); + get_dir(false, curr) = K(get_dir(false, splice)); + splice = curr; + path.fixup(); + } + + debug_log("Splice done", path); + + // TODO: Clear node contents? + + // Red leaf removal requires no rebalancing. + if (leaf_red) + return true; + + // Now in the double black case. + // End of path is considered double black, that is, one black element + // shorter than satisfies the invariant. The following algorithm moves up + // the path until it finds a close red element or the root. If we convert + // the tree to one, in which the root is double black, then the algorithm + // is complete, as there is nothing to be out of balance with. Otherwise, + // we are searching for nearby red elements so we can rotate the tree to + // rebalance. The following slides nicely cover the case analysis below + // https://www.cs.purdue.edu/homes/ayg/CS251/slides/chap13c.pdf + while (path.curr() != ChildRef(H{&root})) + { + K parent = path.parent(); + bool cur_dir = path.curr_dir(); + K sibling = get_dir(!cur_dir, parent); + + /* Handle red sibling case. + * This performs a rotation to give a black sibling. + * + * p s(b) + * / \ / \ + * c s(r) --> p(r) m + * / \ / \ + * n m c n + * + * By invariant we know that p, n and m are all initially black. + */ + if (Rep::is_red(sibling)) + { + debug_log("Red sibling", path, path.parent()); + K nibling = get_dir(cur_dir, sibling); + get_dir(!cur_dir, parent) = nibling; + get_dir(cur_dir, sibling) = parent; + Rep::set_red(parent, true); + Rep::set_red(sibling, false); + path.parent() = sibling; + // Manually fix path. Using path.fixup would alter the complexity + // class. + path.pop(); + path.move(cur_dir); + path.move_inc_null(cur_dir); + path.fixup(false); + debug_log("Red sibling - done", path, path.parent()); + continue; + } + + /* Handle red nibling case 1. + *

+ * / \ / \ + * c s --> p rn + * / \ / \ + * on rn c on + */ + if (Rep::is_red(get_dir(!cur_dir, sibling))) + { + debug_log("Red nibling 1", path, path.parent()); + K r_nibling = get_dir(!cur_dir, sibling); + K o_nibling = get_dir(cur_dir, sibling); + get_dir(cur_dir, sibling) = parent; + get_dir(!cur_dir, parent) = o_nibling; + path.parent() = sibling; + Rep::set_red(r_nibling, false); + Rep::set_red(sibling, Rep::is_red(parent)); + Rep::set_red(parent, false); + debug_log("Red nibling 1 - done", path, path.parent()); + break; + } + + /* Handle red nibling case 2. + *

+ * / \ / \ + * c s --> p s + * / \ / \ / \ + * rn on c rno rns on + * / \ + * rno rns + */ + if (Rep::is_red(get_dir(cur_dir, sibling))) + { + debug_log("Red nibling 2", path, path.parent()); + K r_nibling = get_dir(cur_dir, sibling); + K r_nibling_same = get_dir(cur_dir, r_nibling); + K r_nibling_opp = get_dir(!cur_dir, r_nibling); + get_dir(!cur_dir, parent) = r_nibling_same; + get_dir(cur_dir, sibling) = r_nibling_opp; + get_dir(cur_dir, r_nibling) = parent; + get_dir(!cur_dir, r_nibling) = sibling; + path.parent() = r_nibling; + Rep::set_red(r_nibling, Rep::is_red(parent)); + Rep::set_red(parent, false); + debug_log("Red nibling 2 - done", path, path.parent()); + break; + } + + // Handle black sibling and niblings, and red parent. + if (Rep::is_red(parent)) + { + debug_log("Black sibling and red parent case", path, path.parent()); + Rep::set_red(parent, false); + Rep::set_red(sibling, true); + debug_log( + "Black sibling and red parent case - done", path, path.parent()); + break; + } + // Handle black sibling and niblings and black parent. + debug_log( + "Black sibling, niblings and black parent case", path, path.parent()); + Rep::set_red(sibling, true); + path.pop(); + invariant(path.curr()); + debug_log( + "Black sibling, niblings and black parent case - done", + path, + path.curr()); + } + return true; + } + + // Insert an element at the given path. + void insert_path(RBPath& path, K value) + { + SNMALLOC_ASSERT(path.curr().is_null()); + path.curr() = value; + get_dir(true, path.curr()) = Rep::null; + get_dir(false, path.curr()) = Rep::null; + Rep::set_red(value, true); + + debug_log("Insert ", path); + + // Propogate double red up to rebalance. + // These notes were particularly clear for explaining insert + // https://www.cs.cmu.edu/~fp/courses/15122-f10/lectures/17-rbtrees.pdf + while (path.curr() != get_root()) + { + SNMALLOC_ASSERT(Rep::is_red(path.curr())); + if (!Rep::is_red(path.parent())) + { + invariant(); + return; + } + bool curr_dir = path.curr_dir(); + K curr = path.curr(); + K parent = path.parent(); + K grand_parent = path.grand_parent(); + SNMALLOC_ASSERT(!Rep::is_red(grand_parent)); + if (path.parent_dir() == curr_dir) + { + debug_log("Insert - double red case 1", path, path.grand_parent()); + /* Same direction case + * G - grand parent + * P - parent + * C - current + * S - sibling + * + * G P + * / \ / \ + * A P --> G C + * / \ / \ + * S C A S + */ + K sibling = get_dir(!curr_dir, parent); + Rep::set_red(curr, false); + get_dir(curr_dir, grand_parent) = sibling; + get_dir(!curr_dir, parent) = grand_parent; + path.grand_parent() = parent; + debug_log( + "Insert - double red case 1 - done", path, path.grand_parent()); + } + else + { + debug_log("Insert - double red case 2", path, path.grand_parent()); + /* G - grand parent + * P - parent + * C - current + * Cg - Current child for grand parent + * Cp - Current child for parent + * + * G C + * / \ / \ + * A P G P + * / \ --> / \ / \ + * C B A Cg Cp B + * / \ + * Cg Cp + */ + K child_g = get_dir(curr_dir, curr); + K child_p = get_dir(!curr_dir, curr); + + Rep::set_red(parent, false); + path.grand_parent() = curr; + get_dir(curr_dir, curr) = grand_parent; + get_dir(!curr_dir, curr) = parent; + get_dir(curr_dir, parent) = child_p; + get_dir(!curr_dir, grand_parent) = child_g; + debug_log( + "Insert - double red case 2 - done", path, path.grand_parent()); + } + + // Move to what replaced grand parent. + path.pop(); + path.pop(); + invariant(path.curr()); + } + Rep::set_red(get_root(), false); + invariant(); + } + + K remove_min() + { + if (get_root().is_null()) + return Rep::null; + + auto path = get_root_path(); + while (path.move(true)) + { + } + + K result = path.curr(); + + remove_path(path); + return result; + } + + bool remove_elem(K value) + { + if (get_root().is_null()) + return false; + + auto path = get_root_path(); + if (!find(path, value)) + return false; + + remove_path(path); + return true; + } + + bool insert_elem(K value) + { + auto path = get_root_path(); + + if (find(path, value)) + return false; + + insert_path(path, value); + return true; + } + + RBPath get_root_path() + { + return RBPath(H{&root}); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/ds_core/seqset.h b/src/snmalloc/ds_core/seqset.h new file mode 100644 index 000000000..22a0fcd07 --- /dev/null +++ b/src/snmalloc/ds_core/seqset.h @@ -0,0 +1,188 @@ +#pragma once + +#include "../ds_core/ds_core.h" + +#include +#include + +namespace snmalloc +{ + /** + * Simple sequential set of T. + * + * Linked using the T::next field. + * + * Can be used in either Fifo or Lifo mode, which is + * specified by template parameter. + */ + template + class SeqSet + { + /** + * This sequence structure is intrusive, in that it requires the use of a + * `next` field in the elements it manages, but, unlike some other intrusive + * designs, it does not require the use of a `container_of`-like construct, + * because its pointers point to the element, not merely the intrusive + * member. + * + * In some cases, the next pointer is provided by a superclass but the list + * is templated over the subclass. The `SeqSet` enforces the invariant that + * only instances of the subclass can be added to the list and so can safely + * down-cast the type of `.next` to `T*`. As such, we require only that the + * `next` field is a pointer to `T` or some superclass of `T`. + * %{ + */ + using NextPtr = decltype(std::declval().next); + static_assert( + std::is_base_of_v, T>, + "T->next must be a queue pointer to T"); + ///@} + + /** + * Field representation for Fifo behaviour. + */ + struct FieldFifo + { + NextPtr head{nullptr}; + }; + + /** + * Field representation for Lifo behaviour. + */ + struct FieldLifo + { + NextPtr head{nullptr}; + NextPtr* end{&head}; + }; + + /** + * Field indirection to actual representation. + * Different numbers of fields are required for the + * two behaviours. + */ + std::conditional_t v; + + /** + * Check for empty + */ + SNMALLOC_FAST_PATH bool is_empty() + { + if constexpr (Fifo) + { + return v.head == nullptr; + } + else + { + SNMALLOC_ASSERT(v.end != nullptr); + return &(v.head) == v.end; + } + } + + public: + /** + * Empty queue + */ + constexpr SeqSet() = default; + + /** + * Remove an element from the queue + * + * Assumes queue is non-empty + */ + SNMALLOC_FAST_PATH T* pop() + { + SNMALLOC_ASSERT(!this->is_empty()); + auto result = v.head; + if constexpr (Fifo) + { + v.head = result->next; + } + else + { + if (&(v.head->next) == v.end) + v.end = &(v.head); + else + v.head = v.head->next; + } + // This cast is safe if the ->next pointers in all of the objects in the + // list are managed by this class because object types are checked on + // insertion. + return static_cast(result); + } + + /** + * Filter + * + * Removes all elements that f returns true for. + * If f returns true, then filter is not allowed to look at the + * object again, and f is responsible for its lifetime. + */ + template + SNMALLOC_FAST_PATH void filter(Fn&& f) + { + // Check for empty case. + if (is_empty()) + return; + + NextPtr* prev = &(v.head); + + while (true) + { + if constexpr (Fifo) + { + if (*prev == nullptr) + break; + } + + NextPtr curr = *prev; + // Note must read curr->next before calling `f` as `f` is allowed to + // mutate that field. + NextPtr next = curr->next; + if (f(static_cast(curr))) + { + // Remove element; + *prev = next; + } + else + { + // Keep element + prev = &(curr->next); + } + if constexpr (!Fifo) + { + if (&(curr->next) == v.end) + break; + } + } + if constexpr (!Fifo) + { + v.end = prev; + } + } + + /** + * Add an element to the queue. + */ + SNMALLOC_FAST_PATH void insert(T* item) + { + if constexpr (Fifo) + { + item->next = v.head; + v.head = item; + } + else + { + *(v.end) = item; + v.end = &(item->next); + } + } + + /** + * Peek at next element in the set. + */ + SNMALLOC_FAST_PATH const T* peek() + { + return static_cast(v.head); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/global/bounds_checks.h b/src/snmalloc/global/bounds_checks.h new file mode 100644 index 000000000..378f5439a --- /dev/null +++ b/src/snmalloc/global/bounds_checks.h @@ -0,0 +1,103 @@ +#pragma once +#include "threadalloc.h" + +namespace snmalloc +{ + /** + * Should we check loads? This defaults to on in debug builds, off in + * release (store-only checks) and can be overridden by defining the macro + * `SNMALLOC_CHECK_LOADS` to true or false. + */ + static constexpr bool CheckReads = +#ifdef SNMALLOC_CHECK_LOADS + SNMALLOC_CHECK_LOADS +#else + DEBUG +#endif + ; + + /** + * Should we fail fast when we encounter an error? With this set to true, we + * just issue a trap instruction and crash the process once we detect an + * error. With it set to false we print a helpful error message and then crash + * the process. The process may be in an undefined state by the time the + * check fails, so there are potentially security implications to turning this + * off. It defaults to false and can be overridden by defining the macro + * `SNMALLOC_FAIL_FAST` to true. + * + * Current default to true will help with adoption experience. + */ + static constexpr bool FailFast = +#ifdef SNMALLOC_FAIL_FAST + SNMALLOC_FAIL_FAST +#else + false +#endif + ; + + /** + * Report an error message for a failed bounds check and then abort the + * program. + * `p` is the input pointer and `len` is the offset from this pointer of the + * bounds. `msg` is the message that will be reported along with the + * start and end of the real object's bounds. + * + * Note that this function never returns. We do not mark it [[NoReturn]] + * so as to generate better code, because [[NoReturn]] prevents tailcails + * in GCC and Clang. + * + * The function claims to return a FakeReturn, this is so it can be tail + * called where the bound checked function returns a value, for instance, in + * memcpy it is specialised to void*. + */ + template + SNMALLOC_SLOW_PATH SNMALLOC_UNUSED_FUNCTION inline FakeReturn + report_fatal_bounds_error(const void* ptr, size_t len, const char* msg) + { + if constexpr (FailFast) + { + UNUSED(ptr, len, msg); + SNMALLOC_FAST_FAIL(); + } + else + { + auto& alloc = ThreadAlloc::get(); + void* p = const_cast(ptr); + + auto range_end = pointer_offset(p, len); + auto object_end = alloc.template external_pointer(p); + report_fatal_error( + "Fatal Error!\n{}: \n\trange [{}, {})\n\tallocation [{}, " + "{})\nrange goes beyond allocation by {} bytes \n", + msg, + p, + range_end, + alloc.template external_pointer(p), + object_end, + pointer_diff(object_end, range_end)); + } + } + + /** + * Check whether a pointer + length is in the same object as the pointer. + * + * Returns true if the checks succeeds. + * + * The template parameter indicates whether the check should be performed. It + * defaults to true. If it is false, the check will always succeed. + */ + template + SNMALLOC_FAST_PATH_INLINE bool check_bounds(const void* ptr, size_t len) + { + if constexpr (PerformCheck) + { + auto& alloc = ThreadAlloc::get(); + return alloc.check_bounds(ptr, len); + } + else + { + UNUSED(ptr, len); + return true; + } + } +} // namespace snmalloc diff --git a/src/snmalloc/global/global.h b/src/snmalloc/global/global.h new file mode 100644 index 000000000..a2f1159a1 --- /dev/null +++ b/src/snmalloc/global/global.h @@ -0,0 +1,4 @@ +#include "bounds_checks.h" +#include "memcpy.h" +#include "scopedalloc.h" +#include "threadalloc.h" diff --git a/src/snmalloc/global/memcpy.h b/src/snmalloc/global/memcpy.h new file mode 100644 index 000000000..ee60cd239 --- /dev/null +++ b/src/snmalloc/global/memcpy.h @@ -0,0 +1,331 @@ +#pragma once +#include "../backend/globalconfig.h" +#include "bounds_checks.h" + +namespace snmalloc +{ + /** + * Copy a single element of a specified size. Uses a compiler builtin that + * expands to a single load and store. + */ + template + SNMALLOC_FAST_PATH_INLINE void copy_one(void* dst, const void* src) + { +#if __has_builtin(__builtin_memcpy_inline) + __builtin_memcpy_inline(dst, src, Size); +#else + // Define a structure of size `Size` that has alignment 1 and a default + // copy-assignment operator. We can then copy the data as this type. The + // compiler knows the exact width and so will generate the correct wide + // instruction for us (clang 10 and gcc 12 both generate movups for the + // 16-byte version of this when targeting SSE. + struct Block + { + char data[Size]; + }; + auto* d = static_cast(dst); + auto* s = static_cast(src); + *d = *s; +#endif + } + + /** + * Copy a block using the specified size. This copies as many complete + * chunks of size `Size` as are possible from `len`. + */ + template + SNMALLOC_FAST_PATH_INLINE void + block_copy(void* dst, const void* src, size_t len) + { + for (size_t i = 0; (i + Size) <= len; i += Size) + { + copy_one(pointer_offset(dst, i), pointer_offset(src, i)); + } + } + + /** + * Perform an overlapping copy of the end. This will copy one (potentially + * unaligned) `T` from the end of the source to the end of the destination. + * This may overlap other bits of the copy. + */ + template + SNMALLOC_FAST_PATH_INLINE void + copy_end(void* dst, const void* src, size_t len) + { + copy_one( + pointer_offset(dst, len - Size), pointer_offset(src, len - Size)); + } + + /** + * Predicate indicating whether the source and destination are sufficiently + * aligned to be copied as aligned chunks of `Size` bytes. + */ + template + SNMALLOC_FAST_PATH_INLINE bool is_aligned_memcpy(void* dst, const void* src) + { + return (pointer_align_down(const_cast(src)) == src) && + (pointer_align_down(dst) == dst); + } + + /** + * Copy a small size (`Size` bytes) as a sequence of power-of-two-sized loads + * and stores of decreasing size. `Word` is the largest size to attempt for a + * single copy. + */ + template + SNMALLOC_FAST_PATH_INLINE void small_copy(void* dst, const void* src) + { + static_assert(bits::is_pow2(Word), "Word size must be a power of two!"); + if constexpr (Size != 0) + { + if constexpr (Size >= Word) + { + copy_one(dst, src); + small_copy( + pointer_offset(dst, Word), pointer_offset(src, Word)); + } + else + { + small_copy(dst, src); + } + } + else + { + UNUSED(src); + UNUSED(dst); + } + } + + /** + * Generate small copies for all sizes up to `Size`, using `WordSize` as the + * largest size to copy in a single operation. + */ + template + SNMALLOC_FAST_PATH_INLINE void + small_copies(void* dst, const void* src, size_t len) + { + if (len == Size) + { + small_copy(dst, src); + } + if constexpr (Size > 0) + { + small_copies(dst, src, len); + } + } + + /** + * If the source and destination are the same displacement away from being + * aligned on a `BlockSize` boundary, do a small copy to ensure alignment and + * update `src`, `dst`, and `len` to reflect the remainder that needs + * copying. + * + * Note that this, like memcpy, requires that the source and destination do + * not overlap. It unconditionally copies `BlockSize` bytes, so a subsequent + * copy may not do the right thing. + */ + template + SNMALLOC_FAST_PATH_INLINE void + unaligned_start(void*& dst, const void*& src, size_t& len) + { + constexpr size_t block_mask = BlockSize - 1; + size_t src_addr = static_cast(reinterpret_cast(src)); + size_t dst_addr = static_cast(reinterpret_cast(dst)); + size_t src_offset = src_addr & block_mask; + if ((src_offset > 0) && (src_offset == (dst_addr & block_mask))) + { + size_t disp = BlockSize - src_offset; + small_copies(dst, src, disp); + src = pointer_offset(src, disp); + dst = pointer_offset(dst, disp); + len -= disp; + } + } + + /** + * Default architecture definition. Provides sane defaults. + */ + struct GenericArch + { + /** + * The largest register size that we can use for loads and stores. These + * types are expected to work for overlapping copies: we can always load + * them into a register and store them. Note that this is at the C abstract + * machine level: the compiler may spill temporaries to the stack, just not + * to the source or destination object. + */ + SNMALLOC_UNUSED_FUNCTION + static constexpr size_t LargestRegisterSize = + std::max(sizeof(uint64_t), sizeof(void*)); + + /** + * Hook for architecture-specific optimisations. Does nothing in the + * default case. + */ + static SNMALLOC_FAST_PATH_INLINE void + copy(void* dst, const void* src, size_t len) + { + // If this is a small size, use a jump table for small sizes. + if (len <= LargestRegisterSize) + { + small_copies(dst, src, len); + } + // Otherwise do a simple bulk copy loop. + else + { + block_copy(dst, src, len); + copy_end(dst, src, len); + } + } + }; + +#if defined(__x86_64__) || defined(_M_X64) + /** + * x86-64 architecture. Prefers SSE registers for small and medium copies + * and uses `rep movsb` for large ones. + */ + struct X86_64Arch + { + /** + * The largest register size that we can use for loads and stores. These + * types are expected to work for overlapping copies: we can always load + * them into a register and store them. Note that this is at the C abstract + * machine level: the compiler may spill temporaries to the stack, just not + * to the source or destination object. + * + * We set this to 16 unconditionally for now because using AVX registers + * imposes stronger alignment requirements that seem to not be a net win. + */ + static constexpr size_t LargestRegisterSize = 16; + + /** + * Platform-specific copy hook. For large copies, use `rep movsb`. + */ + static SNMALLOC_FAST_PATH_INLINE void + copy(void* dst, const void* src, size_t len) + { + // If this is a small size, use a jump table for small sizes, like on the + // generic architecture case above. + if (len <= LargestRegisterSize) + { + small_copies(dst, src, len); + } + + // The Intel optimisation manual recommends doing this for sizes >256 + // bytes on modern systems and for all sizes on very modern systems. + // Testing shows that this is somewhat overly optimistic. + else if (SNMALLOC_UNLIKELY(len >= 512)) + { + // Align to cache-line boundaries if possible. + unaligned_start<64, LargestRegisterSize>(dst, src, len); + // Bulk copy. This is aggressively optimised on modern x86 cores. +# ifdef __GNUC__ + asm volatile("rep movsb" + : "+S"(src), "+D"(dst), "+c"(len) + : + : "memory"); +# elif defined(_MSC_VER) + __movsb( + static_cast(dst), + static_cast(src), + len); +# else +# error No inline assembly or rep movsb intrinsic for this compiler. +# endif + } + + // Otherwise do a simple bulk copy loop. + else + { + block_copy(dst, src, len); + copy_end(dst, src, len); + } + } + }; +#endif + +#if defined(__powerpc64__) + struct PPC64Arch + { + /** + * Modern POWER machines have vector registers + */ + static constexpr size_t LargestRegisterSize = 16; + + /** + * For large copies (128 bytes or above), use a copy loop that moves up to + * 128 bytes at once with pre-loop alignment up to 64 bytes. + */ + static SNMALLOC_FAST_PATH_INLINE void + copy(void* dst, const void* src, size_t len) + { + if (len < LargestRegisterSize) + { + block_copy<1>(dst, src, len); + } + else if (SNMALLOC_UNLIKELY(len >= 128)) + { + // Eight vector operations per loop + static constexpr size_t block_size = 128; + + // Cache-line align first + unaligned_start<64, LargestRegisterSize>(dst, src, len); + block_copy(dst, src, len); + copy_end(dst, src, len); + } + else + { + block_copy(dst, src, len); + copy_end(dst, src, len); + } + } + }; +#endif + + using DefaultArch = +#ifdef __x86_64__ + X86_64Arch +#elif defined(__powerpc64__) + PPC64Arch +#else + GenericArch +#endif + ; + + /** + * Snmalloc checked memcpy. The `Arch` parameter must provide: + * + * - A `size_t` value `LargestRegisterSize`, describing the largest size to + * use for single copies. + * - A `copy` function that takes (optionally, references to) the arguments + * of `memcpy` and returns `true` if it performs a copy, `false` + * otherwise. This can be used to special-case some or all sizes for a + * particular architecture. + */ + template< + bool Checked, + bool ReadsChecked = CheckReads, + typename Arch = DefaultArch> + SNMALLOC_FAST_PATH_INLINE void* memcpy(void* dst, const void* src, size_t len) + { + auto orig_dst = dst; + // 0 is a very common size for memcpy and we don't need to do external + // pointer checks if we hit it. It's also the fastest case, to encourage + // the compiler to favour the other cases. + if (SNMALLOC_UNLIKELY(len == 0)) + { + return dst; + } + + // Check the bounds of the arguments. + if (SNMALLOC_UNLIKELY(!check_bounds<(Checked && ReadsChecked)>(src, len))) + return report_fatal_bounds_error( + src, len, "memcpy with source out of bounds of heap allocation"); + if (SNMALLOC_UNLIKELY(!check_bounds(dst, len))) + return report_fatal_bounds_error( + dst, len, "memcpy with destination out of bounds of heap allocation"); + + Arch::copy(dst, src, len); + return orig_dst; + } +} // namespace snmalloc diff --git a/src/mem/slowalloc.h b/src/snmalloc/global/scopedalloc.h similarity index 65% rename from src/mem/slowalloc.h rename to src/snmalloc/global/scopedalloc.h index 87be4d10a..cb9f0fc8b 100644 --- a/src/mem/slowalloc.h +++ b/src/snmalloc/global/scopedalloc.h @@ -1,6 +1,9 @@ #pragma once +#include "../backend/globalconfig.h" -#include "globalalloc.h" +/** + * This header requires that Alloc has been defined. + */ namespace snmalloc { @@ -13,57 +16,68 @@ namespace snmalloc * This does not depend on thread-local storage working, so can be used for * bootstrapping. */ - struct SlowAllocator + struct ScopedAllocator { /** * The allocator that this wrapper will use. */ - Alloc* alloc; + Alloc alloc; + /** * Constructor. Claims an allocator from the global pool */ - SlowAllocator() : alloc(current_alloc_pool()->acquire()) {} + ScopedAllocator() + { + alloc.init(); + }; + /** * Copying is not supported, it could easily lead to accidental sharing of * allocators. */ - SlowAllocator(const SlowAllocator&) = delete; + ScopedAllocator(const ScopedAllocator&) = delete; + /** * Moving is not supported, though it would be easy to add if there's a use * case for it. */ - SlowAllocator(SlowAllocator&&) = delete; + ScopedAllocator(ScopedAllocator&&) = delete; + /** * Copying is not supported, it could easily lead to accidental sharing of * allocators. */ - SlowAllocator& operator=(const SlowAllocator&) = delete; + ScopedAllocator& operator=(const ScopedAllocator&) = delete; + /** * Moving is not supported, though it would be easy to add if there's a use * case for it. */ - SlowAllocator& operator=(SlowAllocator&&) = delete; + ScopedAllocator& operator=(ScopedAllocator&&) = delete; + /** * Destructor. Returns the allocator to the pool. */ - ~SlowAllocator() + ~ScopedAllocator() { - current_alloc_pool()->release(alloc); + alloc.flush(); } + /** * Arrow operator, allows methods exposed by `Alloc` to be called on the * wrapper. */ Alloc* operator->() { - return alloc; + return &alloc; } }; + /** - * Returns a new slow allocator. When the `SlowAllocator` goes out of scope, - * the underlying `Alloc` will be returned to the pool. + * Returns a new scoped allocator. When the `ScopedAllocator` goes out of + * scope, the underlying `Alloc` will be returned to the pool. */ - inline SlowAllocator get_slow_allocator() + inline ScopedAllocator get_scoped_allocator() { return {}; } diff --git a/src/snmalloc/global/threadalloc.h b/src/snmalloc/global/threadalloc.h new file mode 100644 index 000000000..d900fb272 --- /dev/null +++ b/src/snmalloc/global/threadalloc.h @@ -0,0 +1,185 @@ +#pragma once + +#include "../backend/globalconfig.h" + +#if defined(SNMALLOC_EXTERNAL_THREAD_ALLOC) +# define SNMALLOC_THREAD_TEARDOWN_DEFINED +#endif + +#if defined(SNMALLOC_USE_THREAD_CLEANUP) +# if defined(SNMALLOC_THREAD_TEARDOWN_DEFINED) +# error At most one out of method of thread teardown can be specified. +# else +# define SNMALLOC_THREAD_TEARDOWN_DEFINED +# endif +#endif + +#if defined(SNMALLOC_USE_PTHREAD_DESTRUCTORS) +# if defined(SNMALLOC_THREAD_TEARDOWN_DEFINED) +# error At most one out of method of thread teardown can be specified. +# else +# include +# define SNMALLOC_THREAD_TEARDOWN_DEFINED +# endif +#endif + +#if !defined(SNMALLOC_THREAD_TEARDOWN_DEFINED) +# define SNMALLOC_USE_CXX_THREAD_DESTRUCTORS +#endif +extern "C" void _malloc_thread_cleanup(); + +namespace snmalloc +{ +#ifdef SNMALLOC_EXTERNAL_THREAD_ALLOC + /** + * Version of the `ThreadAlloc` interface that does no management of thread + * local state. + * + * It assumes that Alloc has been defined, and `ThreadAllocExternal` class + * has access to snmalloc_core.h. + */ + class ThreadAlloc + { + protected: + static void register_cleanup() {} + + public: + static SNMALLOC_FAST_PATH Alloc& get() + { + return ThreadAllocExternal::get(); + } + }; + + /** + * Function passed as a template parameter to `Allocator` to allow lazy + * replacement. There is nothing to initialise in this case, so we expect + * this to never be called. + */ +# ifdef _MSC_VER +// 32Bit Windows release MSVC is determining this as having unreachable code for +// f(nullptr), which is true. But other platforms don't. Disabling the warning +// seems simplist. +# pragma warning(push) +# pragma warning(disable : 4702) +# endif + inline void register_clean_up() + { + error("Critical Error: This should never be called."); + } +# ifdef _MSC_VER +# pragma warning(pop) +# endif +#else + /** + * Holds the thread local state for the allocator. The state is constant + * initialised, and has no direct dectructor. Instead snmalloc will call + * `register_clean_up` on the slow path for bringing up thread local state. + * This is responsible for calling `teardown`, which effectively destructs the + * data structure, but in a way that allow it to still be used. + */ + class ThreadAlloc + { + public: + /** + * Handle on thread local allocator + * + * This structure will self initialise if it has not been called yet. + * It can be used during thread teardown, but its performance will be + * less good. + */ + static SNMALLOC_FAST_PATH Alloc& get() + { + SNMALLOC_REQUIRE_CONSTINIT static thread_local Alloc alloc; + return alloc; + } + }; + +# ifdef SNMALLOC_USE_PTHREAD_DESTRUCTORS + /** + * Used to give correct signature to teardown required by pthread_key. + */ + inline void pthread_cleanup(void*) + { + ThreadAlloc::get().teardown(); + } + + /** + * Used to give correct signature to teardown required by atexit. + */ + inline void pthread_cleanup_main_thread() + { + ThreadAlloc::get().teardown(); + } + + /** + * Used to give correct signature to the pthread call for the Singleton class. + */ + inline void pthread_create(pthread_key_t* key) noexcept + { + pthread_key_create(key, &pthread_cleanup); + // Main thread does not call pthread_cleanup if `main` returns or `exit` is + // called, so use an atexit handler to guarantee that the cleanup is run at + // least once. If the main thread exits with `pthread_exit` then it will be + // called twice but this case is already handled because other destructors + // can cause the per-thread allocator to be recreated. + atexit(&pthread_cleanup_main_thread); + } + + /** + * Performs thread local teardown for the allocator using the pthread library. + * + * This removes the dependence on the C++ runtime. + */ + inline void register_clean_up() + { + Singleton p_key; + // We need to set a non-null value, so that the destructor is called, + // we never look at the value. + static char p_teardown_val = 1; + pthread_setspecific(p_key.get(), &p_teardown_val); +# ifdef SNMALLOC_TRACING + message<1024>("Using pthread clean up"); +# endif + } +# elif defined(SNMALLOC_USE_CXX_THREAD_DESTRUCTORS) + /** + * This function is called by each thread once it starts using the + * thread local allocator. + * + * This implementation depends on nothing outside of a working C++ + * environment and so should be the simplest for initial bringup on an + * unsupported platform. + */ + inline void register_clean_up() + { + static thread_local OnDestruct dummy( + []() { ThreadAlloc::get().teardown(); }); + UNUSED(dummy); +# ifdef SNMALLOC_TRACING + message<1024>("Using C++ destructor clean up"); +# endif + } +# endif +#endif +} // namespace snmalloc + +#ifdef SNMALLOC_USE_THREAD_CLEANUP +/** + * Entry point that allows libc to call into the allocator for per-thread + * cleanup. + */ +SNMALLOC_USED_FUNCTION +inline void _malloc_thread_cleanup() +{ + snmalloc::ThreadAlloc::get().teardown(); +} + +namespace snmalloc +{ + /** + * No-op version of register_clean_up. This is called unconditionally by + * globalconfig but is not necessary when using a libc hook. + */ + inline void register_clean_up() {} +} +#endif diff --git a/src/snmalloc/mem/backend_concept.h b/src/snmalloc/mem/backend_concept.h new file mode 100644 index 000000000..951762704 --- /dev/null +++ b/src/snmalloc/mem/backend_concept.h @@ -0,0 +1,199 @@ +#pragma once + +#ifdef __cpp_concepts +# include "../ds/ds.h" + +# include +namespace snmalloc +{ + /** + * The core of the static pagemap accessor interface: {get,set}_metadata. + * + * get_metadata takes a boolean template parameter indicating whether it may + * be accessing memory that is not known to be committed. + */ + template + concept IsReadablePagemap = + requires(address_t addr, size_t sz, const typename Pagemap::Entry& t) + { + { + Pagemap::template get_metaentry(addr) + } + ->ConceptSame; + + { + Pagemap::template get_metaentry(addr) + } + ->ConceptSame; + }; + + /** + * The core of the static pagemap accessor interface: {get,set}_metadata. + * + * get_metadata_mut takes a boolean template parameter indicating whether it + * may be accessing memory that is not known to be committed. + * + * set_metadata updates the entry in the pagemap. + */ + template + concept IsWritablePagemap = IsReadablePagemap&& requires( + address_t addr, size_t sz, const typename Pagemap::Entry& t) + { + { + Pagemap::template get_metaentry_mut(addr) + } + ->ConceptSame; + + { + Pagemap::template get_metaentry_mut(addr) + } + ->ConceptSame; + + { + Pagemap::set_metaentry(addr, sz, t) + } + ->ConceptSame; + }; + + /** + * The pagemap can also be told to commit backing storage for a range of + * addresses. This is broken out to a separate concept so that we can + * annotate which functions expect to do this vs. which merely use the core + * interface above. In practice, use IsWritablePagemapWithRegister below, + * which combines this and the core concept, above. + */ + template + concept IsPagemapWithRegister = requires(address_t addr, size_t sz) + { + { + Pagemap::register_range(addr, sz) + } + ->ConceptSame; + }; + + /** + * The full pagemap accessor interface, with all of {get,set}_metadata and + * register_range. Use this to annotate callers that need the full interface + * and use IsReadablePagemap for callers that merely need {get,set}_metadata, + * but note that the difference is just for humans and not compilers (since + * concept checking is lower bounding and does not constrain the templatized + * code to use only those affordances given by the concept). + */ + template + concept IsWritablePagemapWithRegister = + IsReadablePagemap&& IsPagemapWithRegister; + + /** + * The configuration also defines domestication (that is, the difference + * between Tame and Wild CapPtr bounds). It exports the intended affordance + * for testing a Wild pointer and either returning nullptr or the original + * pointer, now Tame. + */ + template + concept IsConfigDomestication = + requires(typename Config::LocalState* ls, capptr::AllocWild ptr) + { + { + Config::capptr_domesticate(ls, ptr) + } + ->ConceptSame>; + + { + Config::capptr_domesticate(ls, ptr.template as_static()) + } + ->ConceptSame>; + }; + + class CommonConfig; + struct Flags; + + template + concept IsBackend = + requires(LocalState& local_state, size_t size, uintptr_t ras) + { + { + Backend::alloc_chunk(local_state, size, ras) + } + ->ConceptSame< + std::pair, typename Backend::SlabMetadata*>>; + } + &&requires(LocalState* local_state, size_t size) + { + { + Backend::template alloc_meta_data(local_state, size) + } + ->ConceptSame>; + } + &&requires( + LocalState& local_state, + typename Backend::SlabMetadata& slab_metadata, + capptr::Alloc alloc, + size_t size) + { + { + Backend::dealloc_chunk(local_state, slab_metadata, alloc, size) + } + ->ConceptSame; + } + &&requires(address_t p) + { + { + Backend::template get_metaentry(p) + } + ->ConceptSame; + + { + Backend::template get_metaentry(p) + } + ->ConceptSame; + }; + + /** + * Config objects of type T must obey a number of constraints. They + * must... + * + * * inherit from CommonConfig (see commonconfig.h) + * * specify which PAL is in use via T::Pal + * * define a T::LocalState type (and alias it as T::Pagemap::LocalState) + * * define T::Options of type snmalloc::Flags + * * expose the global allocator pool via T::pool() if pool allocation is + * used. + * + */ + template + concept IsConfig = std::is_base_of::value&& + IsPAL&& IsBackend< + typename Config::LocalState, + typename Config::PagemapEntry, + typename Config::Backend>&& requires() + { + typename Config::LocalState; + typename Config::Backend; + typename Config::PagemapEntry; + + { + Config::Options + } + ->ConceptSameModRef; + } + &&( + requires() { + Config::Options.CoreAllocIsPoolAllocated == true; + typename Config::GlobalPoolState; + { + Config::pool() + } + ->ConceptSame; + } || + requires() { Config::Options.CoreAllocIsPoolAllocated == false; }); + + /** + * The lazy version of the above; please see ds_core/concept.h and use + * sparingly. + */ + template + concept IsConfigLazy = !is_type_complete_v || IsConfig; + +} // namespace snmalloc + +#endif diff --git a/src/snmalloc/mem/backend_wrappers.h b/src/snmalloc/mem/backend_wrappers.h new file mode 100644 index 000000000..98320b2f9 --- /dev/null +++ b/src/snmalloc/mem/backend_wrappers.h @@ -0,0 +1,109 @@ +#pragma once +/** + * Several of the functions provided by the back end are optional. This file + * contains helpers that are templated on a back end and either call the + * corresponding function or do nothing. This allows the rest of the front end + * to assume that these functions always exist and avoid the need for `if + * constexpr` clauses everywhere. The no-op versions are always inlined and so + * will be optimised away. + */ + +#include "../ds_core/ds_core.h" + +namespace snmalloc +{ + /** + * SFINAE helper. Matched only if `T` implements `is_initialised`. Calls + * it if it exists. + */ + template + SNMALLOC_FAST_PATH auto call_is_initialised(T*, int) + -> decltype(T::is_initialised()) + { + return T::is_initialised(); + } + + /** + * SFINAE helper. Matched only if `T` does not implement `is_initialised`. + * Unconditionally returns true if invoked. + */ + template + SNMALLOC_FAST_PATH auto call_is_initialised(T*, long) + { + return true; + } + + namespace detail + { + /** + * SFINAE helper to detect the presence of capptr_domesticate function in + * backend. Returns true if there is a function with correct name and type. + */ + template< + SNMALLOC_CONCEPT(IsConfigDomestication) Config, + typename T, + SNMALLOC_CONCEPT(capptr::IsBound) B> + constexpr SNMALLOC_FAST_PATH_INLINE auto has_domesticate(int) + -> std::enable_if_t< + std::is_same_v< + decltype(Config::capptr_domesticate( + std::declval(), + std::declval>())), + CapPtr< + T, + typename B::template with_wildness< + capptr::dimension::Wildness::Tame>>>, + bool> + { + return true; + } + + /** + * SFINAE helper to detect the presence of capptr_domesticate function in + * backend. Returns false in case where above template does not match. + */ + template< + SNMALLOC_CONCEPT(IsConfig) Config, + typename T, + SNMALLOC_CONCEPT(capptr::IsBound) B> + constexpr SNMALLOC_FAST_PATH_INLINE bool has_domesticate(long) + { + return false; + } + } // namespace detail + + /** + * Wrapper that calls `Config::capptr_domesticate` if and only if + * Config::Options.HasDomesticate is true. If it is not implemented then + * this assumes that any wild pointer can be domesticated. + */ + template< + SNMALLOC_CONCEPT(IsConfig) Config, + typename T, + SNMALLOC_CONCEPT(capptr::IsBound) B> + SNMALLOC_FAST_PATH_INLINE auto + capptr_domesticate(typename Config::LocalState* ls, CapPtr p) + { + static_assert( + !detail::has_domesticate(0) || + Config::Options.HasDomesticate, + "Back end provides domesticate function but opts out of using it "); + + static_assert( + detail::has_domesticate(0) || + !Config::Options.HasDomesticate, + "Back end does not provide capptr_domesticate and requests its use"); + if constexpr (Config::Options.HasDomesticate) + { + return Config::capptr_domesticate(ls, p); + } + else + { + UNUSED(ls); + return CapPtr< + T, + typename B::template with_wildness>:: + unsafe_from(p.unsafe_ptr()); + } + } +} // namespace snmalloc diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h new file mode 100644 index 000000000..aa6adc5e9 --- /dev/null +++ b/src/snmalloc/mem/corealloc.h @@ -0,0 +1,972 @@ +#pragma once + +#include "../ds/ds.h" +#include "localcache.h" +#include "metadata.h" +#include "pool.h" +#include "remotecache.h" +#include "sizeclasstable.h" +#include "ticker.h" + +namespace snmalloc +{ + /** + * The core, stateful, part of a memory allocator. Each `LocalAllocator` + * owns one `CoreAllocator` once it is initialised. + * + * The template parameter provides all of the global configuration for this + * instantiation of snmalloc. This includes three options that apply to this + * class: + * + * - `CoreAllocIsPoolAllocated` defines whether this `CoreAlloc` + * configuration should support pool allocation. This defaults to true but + * a configuration that allocates allocators eagerly may opt out. + * - `CoreAllocOwnsLocalState` defines whether the `CoreAllocator` owns the + * associated `LocalState` object. If this is true (the default) then + * `CoreAllocator` embeds the LocalState object. If this is set to false + * then a `LocalState` object must be provided to the constructor. This + * allows external code to provide explicit configuration of the address + * range managed by this object. + * - `IsQueueInline` (defaults to true) defines whether the message queue + * (`RemoteAllocator`) for this class is inline or provided externally. If + * provided externally, then it must be set explicitly with + * `init_message_queue`. + */ + template + class CoreAllocator : public std::conditional_t< + Config::Options.CoreAllocIsPoolAllocated, + Pooled>, + Empty> + { + template + friend class LocalAllocator; + + /** + * Define local names for specialised versions of various types that are + * specialised for the back-end that we are using. + * @{ + */ + using BackendSlabMetadata = typename Config::Backend::SlabMetadata; + using PagemapEntry = typename Config::PagemapEntry; + /// }@ + + /** + * Per size class list of active slabs for this allocator. + */ + struct SlabMetadataCache + { +#ifdef SNMALLOC_CHECK_CLIENT + SeqSet available; +#else + // This is slightly faster in some cases, + // but makes memory reuse more predictable. + SeqSet available; +#endif + uint16_t unused = 0; + uint16_t length = 0; + } alloc_classes[NUM_SMALL_SIZECLASSES]; + + /** + * Local entropy source and current version of keys for + * this thread + */ + LocalEntropy entropy; + + /** + * Message queue for allocations being returned to this + * allocator + */ + std::conditional_t< + Config::Options.IsQueueInline, + RemoteAllocator, + RemoteAllocator*> + remote_alloc; + + /** + * The type used local state. This is defined by the back end. + */ + using LocalState = typename Config::LocalState; + + /** + * A local area of address space managed by this allocator. + * Used to reduce calls on the global address space. This is inline if the + * core allocator owns the local state or indirect if it is owned + * externally. + */ + std::conditional_t< + Config::Options.CoreAllocOwnsLocalState, + LocalState, + LocalState*> + backend_state; + + /** + * This is the thread local structure associated to this + * allocator. + */ + LocalCache* attached_cache; + + /** + * Ticker to query the clock regularly at a lower cost. + */ + Ticker ticker; + + /** + * The message queue needs to be accessible from other threads + * + * In the cross trust domain version this is the minimum amount + * of allocator state that must be accessible to other threads. + */ + auto* public_state() + { + if constexpr (Config::Options.IsQueueInline) + { + return &remote_alloc; + } + else + { + return remote_alloc; + } + } + + /** + * Return a pointer to the backend state. + */ + LocalState* backend_state_ptr() + { + if constexpr (Config::Options.CoreAllocOwnsLocalState) + { + return &backend_state; + } + else + { + return backend_state; + } + } + + /** + * Return this allocator's "truncated" ID, an integer useful as a hash + * value of this allocator. + * + * Specifically, this is the address of this allocator's message queue + * with the least significant bits missing, masked by SIZECLASS_MASK. + * This will be unique for Allocs with inline queues; Allocs with + * out-of-line queues must ensure that no two queues' addresses collide + * under this masking. + */ + size_t get_trunc_id() + { + return public_state()->trunc_id(); + } + + /** + * Abstracts access to the message queue to handle different + * layout configurations of the allocator. + */ + auto& message_queue() + { + return *public_state(); + } + + /** + * The message queue has non-trivial initialisation as it needs to + * be non-empty, so we prime it with a single allocation. + */ + void init_message_queue() + { + // Manufacture an allocation to prime the queue + // Using an actual allocation removes a conditional from a critical path. + auto dummy = capptr::Alloc(small_alloc_one(MIN_ALLOC_SIZE)) + .template as_static>(); + if (dummy == nullptr) + { + error("Critical error: Out-of-memory during initialisation."); + } + message_queue().init(dummy); + } + + /** + * There are a few internal corner cases where we need to allocate + * a small object. These are not on the fast path, + * - Allocating stub in the message queue + * Note this is not performance critical as very infrequently called. + */ + capptr::Alloc small_alloc_one(size_t size) + { + SNMALLOC_ASSERT(attached_cache != nullptr); + auto domesticate = + [this](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { + return capptr_domesticate(backend_state_ptr(), p); + }; + // Use attached cache, and fill it if it is empty. + return attached_cache->template alloc( + domesticate, + size, + [&](smallsizeclass_t sizeclass, freelist::Iter<>* fl) { + return small_alloc(sizeclass, *fl); + }); + } + + static SNMALLOC_FAST_PATH void alloc_new_list( + capptr::Chunk& bumpptr, + BackendSlabMetadata* meta, + size_t rsize, + size_t slab_size, + LocalEntropy& entropy) + { + auto slab_end = pointer_offset(bumpptr, slab_size + 1 - rsize); + + auto& key = entropy.get_free_list_key(); + + auto& b = meta->free_queue; + +#ifdef SNMALLOC_CHECK_CLIENT + // Structure to represent the temporary list elements + struct PreAllocObject + { + capptr::AllocFull next; + }; + // The following code implements Sattolo's algorithm for generating + // random cyclic permutations. This implementation is in the opposite + // direction, so that the original space does not need initialising. This + // is described as outside-in without citation on Wikipedia, appears to be + // Folklore algorithm. + + // Note the wide bounds on curr relative to each of the ->next fields; + // curr is not persisted once the list is built. + capptr::Chunk curr = + pointer_offset(bumpptr, 0).template as_static(); + curr->next = Aal::capptr_bound( + curr, rsize); + + uint16_t count = 1; + for (curr = + pointer_offset(curr, rsize).template as_static(); + curr.as_void() < slab_end; + curr = + pointer_offset(curr, rsize).template as_static()) + { + size_t insert_index = entropy.sample(count); + curr->next = std::exchange( + pointer_offset(bumpptr, insert_index * rsize) + .template as_static() + ->next, + Aal::capptr_bound( + curr, rsize)); + count++; + } + + // Pick entry into space, and then build linked list by traversing cycle + // to the start. Use ->next to jump from Chunk to Alloc. + auto start_index = entropy.sample(count); + auto start_ptr = pointer_offset(bumpptr, start_index * rsize) + .template as_static() + ->next; + auto curr_ptr = start_ptr; + do + { + b.add( + // Here begins our treatment of the heap as containing Wild pointers + freelist::Object::make( + capptr_to_user_address_control(curr_ptr.as_void())), + key, + entropy); + curr_ptr = curr_ptr->next; + } while (curr_ptr != start_ptr); +#else + auto p = bumpptr; + do + { + b.add( + // Here begins our treatment of the heap as containing Wild pointers + freelist::Object::make( + capptr_to_user_address_control( + Aal::capptr_bound( + p.as_void(), rsize))), + key); + p = pointer_offset(p, rsize); + } while (p < slab_end); +#endif + // This code consumes everything up to slab_end. + bumpptr = slab_end; + } + + capptr::Alloc + clear_slab(BackendSlabMetadata* meta, smallsizeclass_t sizeclass) + { + auto& key = entropy.get_free_list_key(); + freelist::Iter<> fl; + auto more = meta->free_queue.close(fl, key); + UNUSED(more); + auto local_state = backend_state_ptr(); + auto domesticate = [local_state](freelist::QueuePtr p) + SNMALLOC_FAST_PATH_LAMBDA { + return capptr_domesticate(local_state, p); + }; + capptr::Alloc p = + finish_alloc_no_zero(fl.take(key, domesticate), sizeclass); + +#ifdef SNMALLOC_CHECK_CLIENT + // Check free list is well-formed on platforms with + // integers as pointers. + size_t count = 1; // Already taken one above. + while (!fl.empty()) + { + fl.take(key, domesticate); + count++; + } + // Check the list contains all the elements + SNMALLOC_CHECK( + (count + more) == snmalloc::sizeclass_to_slab_object_count(sizeclass)); + + if (more > 0) + { + auto no_more = meta->free_queue.close(fl, key); + SNMALLOC_ASSERT(no_more == 0); + UNUSED(no_more); + + while (!fl.empty()) + { + fl.take(key, domesticate); + count++; + } + } + SNMALLOC_CHECK( + count == snmalloc::sizeclass_to_slab_object_count(sizeclass)); +#endif + // TODO: This is a capability amplification as we are saying we + // have the whole chunk. + auto start_of_slab = pointer_align_down( + p, snmalloc::sizeclass_to_slab_size(sizeclass)); + +#if defined(__CHERI_PURE_CAPABILITY__) && !defined(SNMALLOC_CHECK_CLIENT) + // Zero the whole slab. For CHERI we at least need to clear the freelist + // pointers to avoid leaking capabilities but we do not need to do it in + // the freelist order as for SNMALLOC_CHECK_CLIENT. Zeroing the whole slab + // may be more friendly to hw because it does not involve pointer chasing + // and is amenable to prefetching. + // FIXME: This should be a back-end method guarded on a feature flag. +#endif + +#ifdef SNMALLOC_TRACING + message<1024>( + "Slab {} is unused, Object sizeclass {}", + start_of_slab.unsafe_ptr(), + sizeclass); +#endif + return start_of_slab; + } + + template + SNMALLOC_SLOW_PATH void dealloc_local_slabs(smallsizeclass_t sizeclass) + { + // Return unused slabs of sizeclass_t back to global allocator + alloc_classes[sizeclass].available.filter([this, sizeclass](auto* meta) { + auto domesticate = + [this](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { + auto res = capptr_domesticate(backend_state_ptr(), p); +#ifdef SNMALLOC_TRACING + if (res.unsafe_ptr() != p.unsafe_ptr()) + printf( + "Domesticated %p to %p!\n", p.unsafe_ptr(), res.unsafe_ptr()); +#endif + return res; + }; + + if (meta->needed() != 0) + { + if (check_slabs) + { + meta->free_queue.validate(entropy.get_free_list_key(), domesticate); + } + return false; + } + + alloc_classes[sizeclass].length--; + alloc_classes[sizeclass].unused--; + + // TODO delay the clear to the next user of the slab, or teardown so + // don't touch the cache lines at this point in snmalloc_check_client. + auto start = clear_slab(meta, sizeclass); + + Config::Backend::dealloc_chunk( + get_backend_local_state(), + *meta, + start, + sizeclass_to_slab_size(sizeclass)); + + return true; + }); + } + + /** + * Slow path for deallocating an object locally. + * This is either waking up a slab that was not actively being used + * by this thread, or handling the final deallocation onto a slab, + * so it can be reused by other threads. + */ + SNMALLOC_SLOW_PATH void + dealloc_local_object_slow(capptr::Alloc p, const PagemapEntry& entry) + { + // TODO: Handle message queue on this path? + + auto* meta = entry.get_slab_metadata(); + + if (meta->is_large()) + { + // Handle large deallocation here. + size_t entry_sizeclass = entry.get_sizeclass().as_large(); + size_t size = bits::one_at_bit(entry_sizeclass); + +#ifdef SNMALLOC_TRACING + message<1024>("Large deallocation: {}", size); +#else + UNUSED(size); +#endif + + Config::Backend::dealloc_chunk( + get_backend_local_state(), *meta, p, size); + + return; + } + + smallsizeclass_t sizeclass = entry.get_sizeclass().as_small(); + + UNUSED(entropy); + if (meta->is_sleeping()) + { + // Slab has been woken up add this to the list of slabs with free space. + + // Wake slab up. + meta->set_not_sleeping(sizeclass); + + alloc_classes[sizeclass].available.insert(meta); + alloc_classes[sizeclass].length++; + +#ifdef SNMALLOC_TRACING + message<1024>("Slab is woken up"); +#endif + + ticker.check_tick(); + return; + } + + alloc_classes[sizeclass].unused++; + + // If we have several slabs, and it isn't too expensive as a proportion + // return to the global pool. + if ( + (alloc_classes[sizeclass].unused > 2) && + (alloc_classes[sizeclass].unused > + (alloc_classes[sizeclass].length >> 2))) + { + dealloc_local_slabs(sizeclass); + } + ticker.check_tick(); + } + + /** + * Check if this allocator has messages to deallocate blocks from another + * thread + */ + SNMALLOC_FAST_PATH bool has_messages() + { + return !(message_queue().is_empty()); + } + + /** + * Process remote frees into this allocator. + */ + template + SNMALLOC_SLOW_PATH decltype(auto) + handle_message_queue_inner(Action action, Args... args) + { + bool need_post = false; + auto local_state = backend_state_ptr(); + auto domesticate = [local_state](freelist::QueuePtr p) + SNMALLOC_FAST_PATH_LAMBDA { + return capptr_domesticate(local_state, p); + }; + auto cb = [this, + &need_post](freelist::HeadPtr msg) SNMALLOC_FAST_PATH_LAMBDA { +#ifdef SNMALLOC_TRACING + message<1024>("Handling remote"); +#endif + + auto& entry = + Config::Backend::template get_metaentry(snmalloc::address_cast(msg)); + + handle_dealloc_remote(entry, msg.as_void(), need_post); + + return true; + }; + + if constexpr (Config::Options.QueueHeadsAreTame) + { + /* + * The front of the queue has already been validated; just change the + * annotating type. + */ + auto domesticate_first = + [](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { + return freelist::HeadPtr::unsafe_from(p.unsafe_ptr()); + }; + message_queue().dequeue(key_global, domesticate_first, domesticate, cb); + } + else + { + message_queue().dequeue(key_global, domesticate, domesticate, cb); + } + + if (need_post) + { + post(); + } + + return action(args...); + } + + /** + * Dealloc a message either by putting for a forward, or + * deallocating locally. + * + * need_post will be set to true, if capacity is exceeded. + */ + void handle_dealloc_remote( + const PagemapEntry& entry, + CapPtr p, + bool& need_post) + { + // TODO this needs to not double count stats + // TODO this needs to not double revoke if using MTE + // TODO thread capabilities? + + if (SNMALLOC_LIKELY(entry.get_remote() == public_state())) + { + if (SNMALLOC_LIKELY( + dealloc_local_object_fast(entry, p.as_void(), entropy))) + return; + + dealloc_local_object_slow(p, entry); + } + else + { + if ( + !need_post && + !attached_cache->remote_dealloc_cache.reserve_space(entry)) + need_post = true; + attached_cache->remote_dealloc_cache + .template dealloc( + entry.get_remote()->trunc_id(), p.as_void(), key_global); + } + } + + /** + * Initialiser, shared code between the constructors for different + * configurations. + */ + void init() + { +#ifdef SNMALLOC_TRACING + message<1024>("Making an allocator."); +#endif + // Entropy must be first, so that all data-structures can use the key + // it generates. + // This must occur before any freelists are constructed. + entropy.init(); + + // Ignoring stats for now. + // stats().start(); + + if constexpr (Config::Options.IsQueueInline) + { + init_message_queue(); + message_queue().invariant(); + } + + if constexpr (DEBUG) + { + for (smallsizeclass_t i = 0; i < NUM_SMALL_SIZECLASSES; i++) + { + size_t size = sizeclass_to_size(i); + smallsizeclass_t sc1 = size_to_sizeclass(size); + smallsizeclass_t sc2 = size_to_sizeclass_const(size); + size_t size1 = sizeclass_to_size(sc1); + size_t size2 = sizeclass_to_size(sc2); + + SNMALLOC_CHECK(sc1 == i); + SNMALLOC_CHECK(sc1 == sc2); + SNMALLOC_CHECK(size1 == size); + SNMALLOC_CHECK(size1 == size2); + } + } + } + + public: + /** + * Constructor for the case that the core allocator owns the local state. + * SFINAE disabled if the allocator does not own the local state. + */ + template< + typename Config_ = Config, + typename = std::enable_if_t> + CoreAllocator(LocalCache* cache) : attached_cache(cache) + { + init(); + } + + /** + * Constructor for the case that the core allocator does not owns the local + * state. SFINAE disabled if the allocator does own the local state. + */ + template< + typename Config_ = Config, + typename = std::enable_if_t> + CoreAllocator(LocalCache* cache, LocalState* backend = nullptr) + : backend_state(backend), attached_cache(cache) + { + init(); + } + + /** + * If the message queue is not inline, provide it. This will then + * configure the message queue for use. + */ + template + std::enable_if_t init_message_queue(RemoteAllocator* q) + { + remote_alloc = q; + init_message_queue(); + message_queue().invariant(); + } + + /** + * Post deallocations onto other threads. + * + * Returns true if it actually performed a post, + * and false otherwise. + */ + SNMALLOC_FAST_PATH bool post() + { + // stats().remote_post(); // TODO queue not in line! + bool sent_something = + attached_cache->remote_dealloc_cache + .post( + backend_state_ptr(), public_state()->trunc_id(), key_global); + + return sent_something; + } + + template + SNMALLOC_FAST_PATH decltype(auto) + handle_message_queue(Action action, Args... args) + { + // Inline the empty check, but not necessarily the full queue handling. + if (SNMALLOC_LIKELY(!has_messages())) + { + return action(args...); + } + + return handle_message_queue_inner(action, args...); + } + + SNMALLOC_FAST_PATH void + dealloc_local_object(CapPtr p) + { + // PagemapEntry-s seen here are expected to have meaningful Remote + // pointers + auto& entry = + Config::Backend::template get_metaentry(snmalloc::address_cast(p)); + if (SNMALLOC_LIKELY(dealloc_local_object_fast(entry, p, entropy))) + return; + + dealloc_local_object_slow(p, entry); + } + + SNMALLOC_FAST_PATH static bool dealloc_local_object_fast( + const PagemapEntry& entry, + CapPtr p, + LocalEntropy& entropy) + { + auto meta = entry.get_slab_metadata(); + + SNMALLOC_ASSERT(!meta->is_unused()); + + snmalloc_check_client( + is_start_of_object(entry.get_sizeclass(), address_cast(p)), + "Not deallocating start of an object"); + + auto cp = p.as_static>(); + + auto& key = entropy.get_free_list_key(); + + // Update the head and the next pointer in the free list. + meta->free_queue.add(cp, key, entropy); + + return SNMALLOC_LIKELY(!meta->return_object()); + } + + template + SNMALLOC_SLOW_PATH capptr::Alloc + small_alloc(smallsizeclass_t sizeclass, freelist::Iter<>& fast_free_list) + { + // Look to see if we can grab a free list. + auto& sl = alloc_classes[sizeclass].available; + if (SNMALLOC_LIKELY(alloc_classes[sizeclass].length > 0)) + { +#ifdef SNMALLOC_CHECK_CLIENT + // Occassionally don't use the last list. + if (SNMALLOC_UNLIKELY(alloc_classes[sizeclass].length == 1)) + { + // If the slab has a lot of free space, then we shouldn't allocate a + // new slab. + auto min = alloc_classes[sizeclass] + .available.peek() + ->free_queue.min_list_length(); + if ((min * 2) < threshold_for_waking_slab(sizeclass)) + if (entropy.next_bit() == 0) + return small_alloc_slow(sizeclass, fast_free_list); + } +#endif + + auto meta = sl.pop(); + // Drop length of sl, and empty count if it was empty. + alloc_classes[sizeclass].length--; + if (meta->needed() == 0) + alloc_classes[sizeclass].unused--; + + auto domesticate = + [this](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { + return capptr_domesticate(backend_state_ptr(), p); + }; + auto [p, still_active] = BackendSlabMetadata::alloc_free_list( + domesticate, meta, fast_free_list, entropy, sizeclass); + + if (still_active) + { + alloc_classes[sizeclass].length++; + sl.insert(meta); + } + + auto r = finish_alloc(p, sizeclass); + return ticker.check_tick(r); + } + return small_alloc_slow(sizeclass, fast_free_list); + } + + /** + * Accessor for the local state. This hides whether the local state is + * stored inline or provided externally from the rest of the code. + */ + SNMALLOC_FAST_PATH + LocalState& get_backend_local_state() + { + if constexpr (Config::Options.CoreAllocOwnsLocalState) + { + return backend_state; + } + else + { + SNMALLOC_ASSERT(backend_state); + return *backend_state; + } + } + + template + SNMALLOC_SLOW_PATH capptr::Alloc small_alloc_slow( + smallsizeclass_t sizeclass, freelist::Iter<>& fast_free_list) + { + size_t rsize = sizeclass_to_size(sizeclass); + + // No existing free list get a new slab. + size_t slab_size = sizeclass_to_slab_size(sizeclass); + +#ifdef SNMALLOC_TRACING + message<1024>("small_alloc_slow rsize={} slab size={}", rsize, slab_size); +#endif + + auto [slab, meta] = Config::Backend::alloc_chunk( + get_backend_local_state(), + slab_size, + PagemapEntry::encode( + public_state(), sizeclass_t::from_small_class(sizeclass))); + + if (slab == nullptr) + { + return nullptr; + } + + // Set meta slab to empty. + meta->initialise(sizeclass); + + // Build a free list for the slab + alloc_new_list(slab, meta, rsize, slab_size, entropy); + + auto domesticate = + [this](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { + return capptr_domesticate(backend_state_ptr(), p); + }; + auto [p, still_active] = BackendSlabMetadata::alloc_free_list( + domesticate, meta, fast_free_list, entropy, sizeclass); + + if (still_active) + { + alloc_classes[sizeclass].length++; + alloc_classes[sizeclass].available.insert(meta); + } + + auto r = finish_alloc(p, sizeclass); + return ticker.check_tick(r); + } + + /** + * Flush the cached state and delayed deallocations + * + * Returns true if messages are sent to other threads. + */ + bool flush(bool destroy_queue = false) + { + SNMALLOC_ASSERT(attached_cache != nullptr); + auto local_state = backend_state_ptr(); + auto domesticate = [local_state](freelist::QueuePtr p) + SNMALLOC_FAST_PATH_LAMBDA { + return capptr_domesticate(local_state, p); + }; + + if (destroy_queue) + { + auto p_wild = message_queue().destroy(); + auto p_tame = domesticate(p_wild); + + while (p_tame != nullptr) + { + bool need_post = true; // Always going to post, so ignore. + auto n_tame = p_tame->atomic_read_next(key_global, domesticate); + const PagemapEntry& entry = + Config::Backend::get_metaentry(snmalloc::address_cast(p_tame)); + handle_dealloc_remote(entry, p_tame.as_void(), need_post); + p_tame = n_tame; + } + } + else + { + // Process incoming message queue + // Loop as normally only processes a batch + while (has_messages()) + handle_message_queue([]() {}); + } + + auto posted = attached_cache->flush( + backend_state_ptr(), + [&](capptr::Alloc p) { dealloc_local_object(p); }); + + // We may now have unused slabs, return to the global allocator. + for (smallsizeclass_t sizeclass = 0; sizeclass < NUM_SMALL_SIZECLASSES; + sizeclass++) + { + dealloc_local_slabs(sizeclass); + } + + return posted; + } + + // This allows the caching layer to be attached to an underlying + // allocator instance. + void attach(LocalCache* c) + { +#ifdef SNMALLOC_TRACING + message<1024>("Attach cache to {}", this); +#endif + attached_cache = c; + + // Set up secrets. + c->entropy = entropy; + + // Set up remote allocator. + c->remote_allocator = public_state(); + + // Set up remote cache. + c->remote_dealloc_cache.init(); + } + + /** + * Performs the work of checking if empty under the assumption that + * a local cache has been attached. + */ + bool debug_is_empty_impl(bool* result) + { + auto test = [&result](auto& queue, smallsizeclass_t size_class) { + queue.filter([&result, size_class](auto slab_metadata) { + if (slab_metadata->needed() != 0) + { + if (result != nullptr) + *result = false; + else + report_fatal_error( + "debug_is_empty: found non-empty allocator: size={} ({})", + sizeclass_to_size(size_class), + size_class); + } + return false; + }); + }; + + bool sent_something = flush(true); + + smallsizeclass_t size_class = 0; + for (auto& alloc_class : alloc_classes) + { + test(alloc_class.available, size_class); + size_class++; + } + + // Place the static stub message on the queue. + init_message_queue(); + +#ifdef SNMALLOC_TRACING + message<1024>("debug_is_empty - done"); +#endif + return sent_something; + } + + /** + * If result parameter is non-null, then false is assigned into the + * the location pointed to by result if this allocator is non-empty. + * + * If result pointer is null, then this code raises a Pal::error on the + * particular check that fails, if any do fail. + * + * Do not run this while other thread could be deallocating as the + * message queue invariant is temporarily broken. + */ + bool debug_is_empty(bool* result) + { +#ifdef SNMALLOC_TRACING + message<1024>("debug_is_empty"); +#endif + if (attached_cache == nullptr) + { + // We need a cache to perform some operations, so set one up + // temporarily + LocalCache temp(public_state()); + attach(&temp); +#ifdef SNMALLOC_TRACING + message<1024>("debug_is_empty - attach a cache"); +#endif + auto sent_something = debug_is_empty_impl(result); + + // Remove cache from the allocator + flush(); + attached_cache = nullptr; + return sent_something; + } + + return debug_is_empty_impl(result); + } + }; + + /** + * Use this alias to access the pool of allocators throughout snmalloc. + */ + template + using AllocPool = Pool, Config, Config::pool>; +} // namespace snmalloc diff --git a/src/mem/entropy.h b/src/snmalloc/mem/entropy.h similarity index 75% rename from src/mem/entropy.h rename to src/snmalloc/mem/entropy.h index 329e10297..1b590942f 100644 --- a/src/mem/entropy.h +++ b/src/snmalloc/mem/entropy.h @@ -1,4 +1,5 @@ -#include "../ds/address.h" +#pragma once + #include "../pal/pal.h" #include @@ -27,25 +28,48 @@ namespace snmalloc #endif } + struct FreeListKey + { + address_t key1; + address_t key2; + address_t key_next; + + constexpr FreeListKey(uint64_t key1, uint64_t key2, uint64_t key_next) + : key1(static_cast(key1)), + key2(static_cast(key2)), + key_next(static_cast(key_next)) + {} + }; + class LocalEntropy { - uint64_t bit_source; - uint64_t local_key; - uint64_t local_counter; - address_t constant_key; - uint64_t fresh_bits; - uint64_t count; + uint64_t bit_source{0}; + uint64_t local_key{0}; + uint64_t local_counter{0}; + uint64_t fresh_bits{0}; + uint64_t count{0}; + FreeListKey key{0, 0, 0}; public: + constexpr LocalEntropy() = default; + template void init() { local_key = get_entropy64(); local_counter = get_entropy64(); if constexpr (bits::BITS == 64) - constant_key = get_next(); + { + key.key1 = get_next(); + key.key2 = get_next(); + key.key_next = get_next(); + } else - constant_key = get_next() & 0xffff'ffff; + { + key.key1 = get_next() & 0xffff'ffff; + key.key2 = get_next() & 0xffff'ffff; + key.key_next = get_next() & 0xffff'ffff; + } bit_source = get_next(); } @@ -60,17 +84,15 @@ namespace snmalloc { uint64_t bottom_bit = bit_source & 1; bit_source = (bottom_bit << 63) | (bit_source >> 1); - return bottom_bit & 1; + return bit_source & 1; } /** - * A key that is not changed or used to create other keys - * - * This is for use when there is no storage for the key. + * A key for the free lists for this thread. */ - address_t get_constant_key() + const FreeListKey& get_free_list_key() { - return constant_key; + return key; } /** diff --git a/src/mem/external_alloc.h b/src/snmalloc/mem/external_alloc.h similarity index 79% rename from src/mem/external_alloc.h rename to src/snmalloc/mem/external_alloc.h index ae35870fd..4eecfa96f 100644 --- a/src/mem/external_alloc.h +++ b/src/snmalloc/mem/external_alloc.h @@ -52,9 +52,24 @@ namespace snmalloc::external_alloc { inline void* aligned_alloc(size_t alignment, size_t size) { + // TSAN complains if allocation is large than this. + if constexpr (bits::BITS == 64) + { + if (size >= 0x10000000000) + { + errno = ENOMEM; + return nullptr; + } + } + + if (alignment < sizeof(void*)) + alignment = sizeof(void*); + void* result; - if (posix_memalign(&result, alignment, size) != 0) + int err = posix_memalign(&result, alignment, size); + if (err != 0) { + errno = err; result = nullptr; } return result; diff --git a/src/snmalloc/mem/freelist.h b/src/snmalloc/mem/freelist.h new file mode 100644 index 000000000..335f12881 --- /dev/null +++ b/src/snmalloc/mem/freelist.h @@ -0,0 +1,774 @@ +#pragma once +/** + * This file encapsulates the in disused object free lists + * that are used per slab of small objects. The implementation + * can be configured to introduce randomness to the reallocation, + * and also provide signing to detect free list corruption. + * + * # Corruption + * + * The corruption detection works as follows + * + * free Object + * ----------------------------- + * | next | prev_encoded | ... | + * ----------------------------- + * A free object contains a pointer to next object in the free list, and + * a prev pointer, but the prev pointer is really a signature with the + * following property + * + * If n = c->next && n != 0, then n->prev_encoded = f(c,n). + * + * If f just returns the first parameter, then this degenerates to a doubly + * linked list. (Note that doing the degenerate case can be useful for + * debugging snmalloc bugs.) By making it a function of both pointers, it + * makes it harder for an adversary to mutate prev_encoded to a valid value. + * + * This provides protection against the free-list being corrupted by memory + * safety issues. + * + * # Randomness + * + * The randomness is introduced by building two free lists simulatenously, + * and randomly deciding which list to add an element to. + */ + +#include "../ds/ds.h" +#include "entropy.h" + +#include + +namespace snmalloc +{ + /** + * This function is used to sign back pointers in the free list. + */ + inline static address_t + signed_prev(address_t curr, address_t next, const FreeListKey& key) + { + auto c = curr; + auto n = next; + return (c + key.key1) * (n + key.key2); + } + + namespace freelist + { + class Object + { + public: + template< + SNMALLOC_CONCEPT(capptr::IsBound) BQueue = capptr::bounds::AllocWild> + class T; + + /** + * This "inductive step" type -- a queue-annotated pointer to a free + * Object containing a queue-annotated pointer -- shows up all over the + * place. Give it a shorter name (Object::BQueuePtr) for + * convenience. + */ + template + using BQueuePtr = CapPtr, BQueue>; + + /** + * As with BQueuePtr, but atomic. + */ + template + using BAtomicQueuePtr = AtomicCapPtr, BQueue>; + + /** + * This is the "base case" of that induction. While we can't get rid of + * the two different type parameters (in general), we can at least get rid + * of a bit of the clutter. "freelist::Object::HeadPtr" + * looks a little nicer than "CapPtr, BView>". + */ + template< + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> + using BHeadPtr = CapPtr, BView>; + + /** + * As with BHeadPtr, but atomic. + */ + template< + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> + using BAtomicHeadPtr = AtomicCapPtr, BView>; + + /** + * Free objects within each slab point directly to the next. + * There is an optional second field that is effectively a + * back pointer in a doubly linked list, however, it is encoded + * to prevent corruption. + * + * This is an inner class to avoid the need to specify BQueue when calling + * static methods. + * + * Raw C++ pointers to this type are *assumed to be domesticated*. In + * some cases we still explicitly annotate domesticated free Object*-s as + * CapPtr<>, but more often CapPtr,B> will have B = A. + * + * TODO: Consider putting prev_encoded at the end of the object, would + * require size to be threaded through, but would provide more OOB + * detection. + */ + template + class T + { + template< + bool, + bool, + SNMALLOC_CONCEPT(capptr::IsBound), + SNMALLOC_CONCEPT(capptr::IsBound)> + friend class Builder; + + friend class Object; + + union + { + BQueuePtr next_object; + // TODO: Should really use C++20 atomic_ref rather than a union. + BAtomicQueuePtr atomic_next_object; + }; +#ifdef SNMALLOC_CHECK_CLIENT + // Encoded representation of a back pointer. + // Hard to fake, and provides consistency on + // the next pointers. + address_t prev_encoded; +#endif + + public: + template< + SNMALLOC_CONCEPT(capptr::IsBound) BView = typename BQueue:: + template with_wildness, + typename Domesticator> + BHeadPtr + atomic_read_next(const FreeListKey& key, Domesticator domesticate) + { + auto n_wild = Object::decode_next( + address_cast(&this->next_object), + this->atomic_next_object.load(std::memory_order_acquire), + key); + auto n_tame = domesticate(n_wild); +#ifdef SNMALLOC_CHECK_CLIENT + if (n_tame != nullptr) + { + n_tame->check_prev( + signed_prev(address_cast(this), address_cast(n_tame), key)); + } +#endif + return n_tame; + } + + /** + * Read the next pointer + */ + template< + SNMALLOC_CONCEPT(capptr::IsBound) BView = typename BQueue:: + template with_wildness, + typename Domesticator> + BHeadPtr + read_next(const FreeListKey& key, Domesticator domesticate) + { + return domesticate(Object::decode_next( + address_cast(&this->next_object), this->next_object, key)); + } + + /** + * Check the signature of this free Object + */ + void check_prev(address_t signed_prev) + { + UNUSED(signed_prev); + snmalloc_check_client( + signed_prev == this->prev_encoded, + "Heap corruption - free list corrupted!"); + } + + /** + * Clean up this object when removing it from the list. This is + * important on CHERI to avoid leaking capabilities. On CHECK_CLIENT + * builds it might increase the difficulty to bypass the checks. + */ + void cleanup() + { +#if defined(__CHERI_PURE_CAPABILITY__) || defined(SNMALLOC_CHECK_CLIENT) + this->next_object = nullptr; +# ifdef SNMALLOC_CHECK_CLIENT + this->prev_encoded = 0; +# endif +#endif + } + }; + + // Note the inverted template argument order, since BView is inferable. + template< + SNMALLOC_CONCEPT(capptr::IsBound) BQueue, + SNMALLOC_CONCEPT(capptr::IsBound) BView> + static BHeadPtr make(CapPtr p) + { + return p.template as_static>(); + } + + /** + * A container-of operation to convert &f->next_object to f + */ + template + static Object::T* + from_next_ptr(CapPtr, BQueue>* ptr) + { + static_assert(offsetof(Object::T, next_object) == 0); + return reinterpret_cast*>(ptr); + } + + private: + /** + * Involutive encryption with raw pointers + */ + template + inline static Object::T* + code_next(address_t curr, Object::T* next, const FreeListKey& key) + { + // Note we can consider other encoding schemes here. + // * XORing curr and next. This doesn't require any key material + // * XORing (curr * key). This makes it harder to guess the underlying + // key, as each location effectively has its own key. + // Curr is not used in the current encoding scheme. + UNUSED(curr); + + if constexpr (CHECK_CLIENT && !aal_supports) + { + return unsafe_from_uintptr>( + unsafe_to_uintptr>(next) ^ key.key_next); + } + else + { + UNUSED(key); + return next; + } + } + + public: + /** + * Encode next. We perform two convenient little bits of type-level + * sleight of hand here: + * + * 1) We convert the provided HeadPtr to a QueuePtr, forgetting BView in + * the result; all the callers write the result through a pointer to a + * QueuePtr, though, strictly, the result itself is no less domesticated + * than the input (even if it is obfuscated). + * + * 2) Speaking of obfuscation, we continue to use a CapPtr<> type even + * though the result is likely not safe to dereference, being an + * obfuscated bundle of bits (on non-CHERI architectures, anyway). That's + * additional motivation to consider the result BQueue-bounded, as that + * is likely (but not necessarily) Wild. + */ + template< + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> + inline static BQueuePtr encode_next( + address_t curr, BHeadPtr next, const FreeListKey& key) + { + return BQueuePtr::unsafe_from( + code_next(curr, next.unsafe_ptr(), key)); + } + + /** + * Decode next. While traversing a queue, BView and BQueue here will + * often be equal (i.e., AllocUserWild) rather than dichotomous. However, + * we do occasionally decode an actual head pointer, so be polymorphic + * here. + * + * TODO: We'd like, in some sense, to more tightly couple or integrate + * this into to the domestication process. We could introduce an + * additional state in the capptr_bounds::wild taxonomy (e.g, Obfuscated) + * so that the Domesticator-s below have to call through this function to + * get the Wild pointer they can then make Tame. It's not yet entirely + * clear what that would look like and whether/how the encode_next side of + * things should be exposed. For the moment, obfuscation is left + * encapsulated within Object and we do not capture any of it statically. + */ + template< + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> + inline static BHeadPtr decode_next( + address_t curr, BHeadPtr next, const FreeListKey& key) + { + return BHeadPtr::unsafe_from( + code_next(curr, next.unsafe_ptr(), key)); + } + + template< + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> + static void assert_view_queue_bounds() + { + static_assert( + BView::wildness == capptr::dimension::Wildness::Tame, + "Free Object View must be domesticated, justifying raw pointers"); + + static_assert( + std::is_same_v< + typename BQueue::template with_wildness< + capptr::dimension::Wildness::Tame>, + BView>, + "Free Object Queue bounds must match View bounds (but may be Wild)"); + } + + /** + * Assign next_object and update its prev_encoded if + * SNMALLOC_CHECK_CLIENT. Static so that it can be used on reference to a + * free Object. + * + * Returns a pointer to the next_object field of the next parameter as an + * optimization for repeated snoc operations (in which + * next->next_object is nullptr). + */ + template< + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> + static BQueuePtr* store_next( + BQueuePtr* curr, + BHeadPtr next, + const FreeListKey& key) + { + assert_view_queue_bounds(); + +#ifdef SNMALLOC_CHECK_CLIENT + next->prev_encoded = + signed_prev(address_cast(curr), address_cast(next), key); +#else + UNUSED(key); +#endif + *curr = encode_next(address_cast(curr), next, key); + return &(next->next_object); + } + + template + static void store_null(BQueuePtr* curr, const FreeListKey& key) + { + *curr = + encode_next(address_cast(curr), BQueuePtr(nullptr), key); + } + + /** + * Assign next_object and update its prev_encoded if SNMALLOC_CHECK_CLIENT + * + * Uses the atomic view of next, so can be used in the message queues. + */ + template< + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> + static void atomic_store_next( + BHeadPtr curr, + BHeadPtr next, + const FreeListKey& key) + { + static_assert(BView::wildness == capptr::dimension::Wildness::Tame); + +#ifdef SNMALLOC_CHECK_CLIENT + next->prev_encoded = + signed_prev(address_cast(curr), address_cast(next), key); +#else + UNUSED(key); +#endif + // Signature needs to be visible before item is linked in + // so requires release semantics. + curr->atomic_next_object.store( + encode_next(address_cast(&curr->next_object), next, key), + std::memory_order_release); + } + + template< + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> + static void + atomic_store_null(BHeadPtr curr, const FreeListKey& key) + { + static_assert(BView::wildness == capptr::dimension::Wildness::Tame); + + curr->atomic_next_object.store( + encode_next( + address_cast(&curr->next_object), BQueuePtr(nullptr), key), + std::memory_order_relaxed); + } + }; + + static_assert( + sizeof(Object) <= MIN_ALLOC_SIZE, + "Needs to be able to fit in smallest allocation."); + + /** + * External code almost always uses Alloc and AllocWild for its free lists. + * Give them a convenient alias. + */ + using HeadPtr = + Object::BHeadPtr; + + /** + * Like HeadPtr, but atomic + */ + using AtomicHeadPtr = + Object::BAtomicHeadPtr; + + /** + * External code's inductive cases almost always use AllocWild. + */ + using QueuePtr = Object::BQueuePtr; + + /** + * Like QueuePtr, but atomic + */ + using AtomicQueuePtr = Object::BAtomicQueuePtr; + + /** + * Used to iterate a free list in object space. + * + * Checks signing of pointers + */ + template< + SNMALLOC_CONCEPT(capptr::IsBound) BView = capptr::bounds::Alloc, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue = capptr::bounds::AllocWild> + class Iter + { + Object::BHeadPtr curr{nullptr}; +#ifdef SNMALLOC_CHECK_CLIENT + address_t prev{0}; +#endif + + public: + constexpr Iter(Object::BHeadPtr head, address_t prev_value) + : curr(head) + { +#ifdef SNMALLOC_CHECK_CLIENT + prev = prev_value; +#endif + UNUSED(prev_value); + } + + constexpr Iter() = default; + + /** + * Checks if there are any more values to iterate. + */ + bool empty() + { + return curr == nullptr; + } + + /** + * Returns current head without affecting the iterator. + */ + Object::BHeadPtr peek() + { + return curr; + } + + /** + * Moves the iterator on, and returns the current value. + */ + template + Object::BHeadPtr + take(const FreeListKey& key, Domesticator domesticate) + { + auto c = curr; + auto next = curr->read_next(key, domesticate); + + Aal::prefetch(next.unsafe_ptr()); + curr = next; +#ifdef SNMALLOC_CHECK_CLIENT + c->check_prev(prev); + prev = signed_prev(address_cast(c), address_cast(next), key); +#else + UNUSED(key); +#endif + c->cleanup(); + return c; + } + }; + + /** + * Used to build a free list in object space. + * + * Adds signing of pointers in the SNMALLOC_CHECK_CLIENT mode + * + * If RANDOM is enabled, the builder uses two queues, and + * "randomly" decides to add to one of the two queues. This + * means that we will maintain a randomisation of the order + * between allocations. + * + * The fields are paired up to give better codegen as then they are offset + * by a power of 2, and the bit extract from the interleaving seed can + * be shifted to calculate the relevant offset to index the fields. + * + * If RANDOM is set to false, then the code does not perform any + * randomisation. + */ + template< + bool RANDOM, + bool INIT = true, + SNMALLOC_CONCEPT(capptr::IsBound) BView = capptr::bounds::Alloc, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue = capptr::bounds::AllocWild> + class Builder + { + static constexpr size_t LENGTH = RANDOM ? 2 : 1; + + /* + * We use native pointers below so that we don't run afoul of strict + * aliasing rules. head is a Object::HeadPtr -- that is, a + * known-domesticated pointer to a queue of wild pointers -- and it's + * usually the case that end is a Object::BQueuePtr* -- that is, a + * known-domesticated pointer to a wild pointer to a queue of wild + * pointers. However, in order to do branchless inserts, we set end = + * &head, which breaks strict aliasing rules with the types as given. + * Fortunately, these are private members and so we can use native + * pointers and just expose a more strongly typed interface. + */ + + // Pointer to the first element. + std::array head{nullptr}; + // Pointer to the reference to the last element. + // In the empty case end[i] == &head[i] + // This enables branch free enqueuing. + std::array end{nullptr}; + + Object::BQueuePtr* cast_end(uint32_t ix) + { + return reinterpret_cast*>(end[ix]); + } + + void set_end(uint32_t ix, Object::BQueuePtr* p) + { + end[ix] = reinterpret_cast(p); + } + + Object::BHeadPtr cast_head(uint32_t ix) + { + return Object::BHeadPtr::unsafe_from( + static_cast*>(head[ix])); + } + + std::array length{}; + + public: + constexpr Builder() + { + if (INIT) + { + init(); + } + } + + /** + * Checks if the builder contains any elements. + */ + bool empty() + { + for (size_t i = 0; i < LENGTH; i++) + { + if (end[i] != &head[i]) + { + return false; + } + } + return true; + } + + /** + * Adds an element to the builder + */ + void add( + Object::BHeadPtr n, + const FreeListKey& key, + LocalEntropy& entropy) + { + uint32_t index; + if constexpr (RANDOM) + index = entropy.next_bit(); + else + index = 0; + + set_end(index, Object::store_next(cast_end(index), n, key)); + if constexpr (RANDOM) + { + length[index]++; + } + } + + /** + * Adds an element to the builder, if we are guaranteed that + * RANDOM is false. This is useful in certain construction + * cases that do not need to introduce randomness, such as + * during the initialisation construction of a free list, which + * uses its own algorithm, or during building remote deallocation + * lists, which will be randomised at the other end. + */ + template + std::enable_if_t + add(Object::BHeadPtr n, const FreeListKey& key) + { + static_assert(RANDOM_ == RANDOM, "Don't set template parameter"); + set_end(0, Object::store_next(cast_end(0), n, key)); + } + + /** + * Makes a terminator to a free list. + */ + SNMALLOC_FAST_PATH void + terminate_list(uint32_t index, const FreeListKey& key) + { + Object::store_null(cast_end(index), key); + } + + /** + * Read head removing potential encoding + * + * Although, head does not require meta-data protection + * as it is not stored in an object allocation. For uniformity + * it is treated like the next_object field in a free Object + * and is thus subject to encoding if the next_object pointers + * encoded. + */ + Object::BHeadPtr + read_head(uint32_t index, const FreeListKey& key) + { + return Object::decode_next( + address_cast(&head[index]), cast_head(index), key); + } + + address_t get_fake_signed_prev(uint32_t index, const FreeListKey& key) + { + return signed_prev( + address_cast(&head[index]), address_cast(read_head(index, key)), key); + } + + /** + * Close a free list, and set the iterator parameter + * to iterate it. + * + * In the RANDOM case, it may return only part of the freelist. + * + * The return value is how many entries are still contained in the + * builder. + */ + SNMALLOC_FAST_PATH uint16_t + close(Iter& fl, const FreeListKey& key) + { + uint32_t i; + if constexpr (RANDOM) + { + SNMALLOC_ASSERT(end[1] != &head[0]); + SNMALLOC_ASSERT(end[0] != &head[1]); + + // Select longest list. + i = length[0] > length[1] ? 0 : 1; + } + else + { + i = 0; + } + + terminate_list(i, key); + + fl = {read_head(i, key), get_fake_signed_prev(i, key)}; + + end[i] = &head[i]; + + if constexpr (RANDOM) + { + length[i] = 0; + return length[1 - i]; + } + else + { + return 0; + } + } + + /** + * Set the builder to a not building state. + */ + constexpr void init() + { + for (size_t i = 0; i < LENGTH; i++) + { + end[i] = &head[i]; + if (RANDOM) + { + length[i] = 0; + } + } + } + + template + std::enable_if_t< + !RANDOM_, + std::pair< + Object::BHeadPtr, + Object::BHeadPtr>> + extract_segment(const FreeListKey& key) + { + static_assert(RANDOM_ == RANDOM, "Don't set SFINAE parameter!"); + SNMALLOC_ASSERT(!empty()); + + auto first = read_head(0, key); + // end[0] is pointing to the first field in the object, + // this is doing a CONTAINING_RECORD like cast to get back + // to the actual object. This isn't true if the builder is + // empty, but you are not allowed to call this in the empty case. + auto last = Object::BHeadPtr::unsafe_from( + Object::from_next_ptr(cast_end(0))); + init(); + return {first, last}; + } + + template + SNMALLOC_FAST_PATH void + validate(const FreeListKey& key, Domesticator domesticate) + { +#ifdef SNMALLOC_CHECK_CLIENT + for (uint32_t i = 0; i < LENGTH; i++) + { + if (&head[i] == end[i]) + { + SNMALLOC_CHECK(length[i] == 0); + continue; + } + + size_t count = 1; + auto curr = read_head(i, key); + auto prev = get_fake_signed_prev(i, key); + while (true) + { + curr->check_prev(prev); + if (address_cast(&(curr->next_object)) == address_cast(end[i])) + break; + count++; + auto next = curr->read_next(key, domesticate); + prev = signed_prev(address_cast(curr), address_cast(next), key); + curr = next; + } + SNMALLOC_CHECK(count == length[i]); + } +#else + UNUSED(key); + UNUSED(domesticate); +#endif + } + + /** + * Returns length of the shorter free list. + * + * This method is only usable if the free list is adding randomisation + * as that is when it has two lists. + */ + template + [[nodiscard]] std::enable_if_t min_list_length() const + { + static_assert(RANDOM_ == RANDOM, "Don't set SFINAE parameter!"); + + return length[0] < length[1] ? length[0] : length[1]; + } + }; + } // namespace freelist +} // namespace snmalloc diff --git a/src/snmalloc/mem/globalalloc.h b/src/snmalloc/mem/globalalloc.h new file mode 100644 index 000000000..dc9528f66 --- /dev/null +++ b/src/snmalloc/mem/globalalloc.h @@ -0,0 +1,137 @@ +#pragma once + +#include "../ds_core/ds_core.h" +#include "localalloc.h" + +namespace snmalloc +{ + template + inline static void cleanup_unused() + { +#ifndef SNMALLOC_PASS_THROUGH + static_assert( + Config::Options.CoreAllocIsPoolAllocated, + "Global cleanup is available only for pool-allocated configurations"); + // Call this periodically to free and coalesce memory allocated by + // allocators that are not currently in use by any thread. + // One atomic operation to extract the stack, another to restore it. + // Handling the message queue for each stack is non-atomic. + auto* first = AllocPool::extract(); + auto* alloc = first; + decltype(alloc) last; + + if (alloc != nullptr) + { + while (alloc != nullptr) + { + alloc->flush(); + last = alloc; + alloc = AllocPool::extract(alloc); + } + + AllocPool::restore(first, last); + } +#endif + } + + /** + If you pass a pointer to a bool, then it returns whether all the + allocators are empty. If you don't pass a pointer to a bool, then will + raise an error all the allocators are not empty. + */ + template + inline static void debug_check_empty(bool* result = nullptr) + { +#ifndef SNMALLOC_PASS_THROUGH + static_assert( + Config::Options.CoreAllocIsPoolAllocated, + "Global status is available only for pool-allocated configurations"); + // This is a debugging function. It checks that all memory from all + // allocators has been freed. + auto* alloc = AllocPool::iterate(); + +# ifdef SNMALLOC_TRACING + message<1024>("debug check empty: first {}", alloc); +# endif + bool done = false; + bool okay = true; + + while (!done) + { +# ifdef SNMALLOC_TRACING + message<1024>("debug_check_empty: Check all allocators!"); +# endif + done = true; + alloc = AllocPool::iterate(); + okay = true; + + while (alloc != nullptr) + { +# ifdef SNMALLOC_TRACING + message<1024>("debug check empty: {}", alloc); +# endif + // Check that the allocator has freed all memory. + // repeat the loop if empty caused message sends. + if (alloc->debug_is_empty(&okay)) + { + done = false; +# ifdef SNMALLOC_TRACING + message<1024>("debug check empty: sent messages {}", alloc); +# endif + } + +# ifdef SNMALLOC_TRACING + message<1024>("debug check empty: okay = {}", okay); +# endif + alloc = AllocPool::iterate(alloc); + } + } + + if (result != nullptr) + { + *result = okay; + return; + } + + // Redo check so abort is on allocator with allocation left. + if (!okay) + { + alloc = AllocPool::iterate(); + while (alloc != nullptr) + { + alloc->debug_is_empty(nullptr); + alloc = AllocPool::iterate(alloc); + } + } +#else + UNUSED(result); +#endif + } + + template + inline static void debug_in_use(size_t count) + { + static_assert( + Config::Options.CoreAllocIsPoolAllocated, + "Global status is available only for pool-allocated configurations"); + auto alloc = AllocPool::iterate(); + while (alloc != nullptr) + { + if (alloc->debug_is_in_use()) + { + if (count == 0) + { + error("ERROR: allocator in use."); + } + count--; + } + alloc = AllocPool::iterate(alloc); + + if (count != 0) + { + error("Error: two few allocators in use."); + } + } + } + +} // namespace snmalloc diff --git a/src/snmalloc/mem/localalloc.h b/src/snmalloc/mem/localalloc.h new file mode 100644 index 000000000..592625eb5 --- /dev/null +++ b/src/snmalloc/mem/localalloc.h @@ -0,0 +1,839 @@ +#pragma once + +#ifdef _MSC_VER +# define ALLOCATOR __declspec(allocator) +#else +# define ALLOCATOR +#endif + +#include "../ds/ds.h" +#include "corealloc.h" +#include "freelist.h" +#include "localcache.h" +#include "pool.h" +#include "remotecache.h" +#include "sizeclasstable.h" + +#ifdef SNMALLOC_PASS_THROUGH +# include "external_alloc.h" +#endif + +#include +#include +namespace snmalloc +{ + enum Boundary + { + /** + * The location of the first byte of this allocation. + */ + Start, + /** + * The location of the last byte of the allocation. + */ + End, + /** + * The location one past the end of the allocation. This is mostly useful + * for bounds checking, where anything less than this value is safe. + */ + OnePastEnd + }; + + /** + * A local allocator contains the fast-path allocation routines and + * encapsulates all of the behaviour of an allocator that is local to some + * context, typically a thread. This delegates to a `CoreAllocator` for all + * slow-path operations, including anything that requires claiming new chunks + * of address space. + * + * The template parameter defines the configuration of this allocator and is + * passed through to the associated `CoreAllocator`. The `Options` structure + * of this defines one property that directly affects the behaviour of the + * local allocator: `LocalAllocSupportsLazyInit`, which defaults to true, + * defines whether the local allocator supports lazy initialisation. If this + * is true then the local allocator will construct a core allocator the first + * time it needs to perform a slow-path operation. If this is false then the + * core allocator must be provided externally by invoking the `init` method + * on this class *before* any allocation-related methods are called. + */ + template + class LocalAllocator + { + public: + using Config = Config_; + + private: + /** + * Define local names for specialised versions of various types that are + * specialised for the back-end that we are using. + * @{ + */ + using CoreAlloc = CoreAllocator; + using PagemapEntry = typename Config::PagemapEntry; + /// }@ + + // Free list per small size class. These are used for + // allocation on the fast path. This part of the code is inspired by + // mimalloc. + // Also contains remote deallocation cache. + LocalCache local_cache{&Config::unused_remote}; + + // Underlying allocator for most non-fast path operations. + CoreAlloc* core_alloc{nullptr}; + + // As allocation and deallocation can occur during thread teardown + // we need to record if we are already in that state as we will not + // receive another teardown call, so each operation needs to release + // the underlying data structures after the call. + bool post_teardown{false}; + + /** + * Checks if the core allocator has been initialised, and runs the + * `action` with the arguments, args. + * + * If the core allocator is not initialised, then first initialise it, + * and then perform the action using the core allocator. + * + * This is an abstraction of the common pattern of check initialisation, + * and then performing the operations. It is carefully crafted to tail + * call the continuations, and thus generate good code for the fast path. + */ + template + SNMALLOC_FAST_PATH decltype(auto) check_init(Action action, Args... args) + { + if (SNMALLOC_LIKELY(core_alloc != nullptr)) + { + return core_alloc->handle_message_queue(action, core_alloc, args...); + } + return lazy_init(action, args...); + } + + /** + * This initialises the fast allocator by acquiring a core allocator, and + * setting up its local copy of data structures. + * + * If the allocator does not support lazy initialisation then this assumes + * that initialisation has already taken place and invokes the action + * immediately. + */ + template + SNMALLOC_SLOW_PATH decltype(auto) lazy_init(Action action, Args... args) + { + SNMALLOC_ASSERT(core_alloc == nullptr); + if constexpr (!Config::Options.LocalAllocSupportsLazyInit) + { + SNMALLOC_CHECK( + false && + "lazy_init called on an allocator that doesn't support lazy " + "initialisation"); + // Unreachable, but needed to keep the type checker happy in deducing + // the return type of this function. + return static_cast(nullptr); + } + else + { + // Initialise the thread local allocator + if constexpr (Config::Options.CoreAllocOwnsLocalState) + { + init(); + } + + // register_clean_up must be called after init. register clean up may + // be implemented with allocation, so need to ensure we have a valid + // allocator at this point. + if (!post_teardown) + // Must be called at least once per thread. + // A pthread implementation only calls the thread destruction handle + // if the key has been set. + Config::register_clean_up(); + + // Perform underlying operation + auto r = action(core_alloc, args...); + + // After performing underlying operation, in the case of teardown + // already having begun, we must flush any state we just acquired. + if (post_teardown) + { +#ifdef SNMALLOC_TRACING + message<1024>("post_teardown flush()"); +#endif + // We didn't have an allocator because the thread is being torndown. + // We need to return any local state, so we don't leak it. + flush(); + } + + return r; + } + } + + /** + * Allocation that are larger than are handled by the fast allocator must be + * passed to the core allocator. + */ + template + SNMALLOC_SLOW_PATH capptr::Alloc alloc_not_small(size_t size) + { + if (size == 0) + { + // Deal with alloc zero of with a small object here. + // Alternative semantics giving nullptr is also allowed by the + // standard. + return small_alloc(1); + } + + return check_init([&](CoreAlloc* core_alloc) { + // Grab slab of correct size + // Set remote as large allocator remote. + auto [chunk, meta] = Config::Backend::alloc_chunk( + core_alloc->get_backend_local_state(), + large_size_to_chunk_size(size), + PagemapEntry::encode( + core_alloc->public_state(), size_to_sizeclass_full(size))); + // set up meta data so sizeclass is correct, and hence alloc size, and + // external pointer. +#ifdef SNMALLOC_TRACING + message<1024>("size {} pow2size {}", size, bits::next_pow2_bits(size)); +#endif + + // Initialise meta data for a successful large allocation. + if (meta != nullptr) + meta->initialise_large(); + + if (zero_mem == YesZero && chunk.unsafe_ptr() != nullptr) + { + Config::Pal::template zero( + chunk.unsafe_ptr(), bits::next_pow2(size)); + } + + return capptr_chunk_is_alloc(capptr_to_user_address_control(chunk)); + }); + } + + template + SNMALLOC_FAST_PATH capptr::Alloc small_alloc(size_t size) + { + auto domesticate = + [this](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { + return capptr_domesticate(core_alloc->backend_state_ptr(), p); + }; + auto slowpath = [&]( + smallsizeclass_t sizeclass, + freelist::Iter<>* fl) SNMALLOC_FAST_PATH_LAMBDA { + if (SNMALLOC_LIKELY(core_alloc != nullptr)) + { + return core_alloc->handle_message_queue( + []( + CoreAlloc* core_alloc, + smallsizeclass_t sizeclass, + freelist::Iter<>* fl) { + return core_alloc->template small_alloc(sizeclass, *fl); + }, + core_alloc, + sizeclass, + fl); + } + return lazy_init( + [&](CoreAlloc*, smallsizeclass_t sizeclass) { + return small_alloc(sizeclass_to_size(sizeclass)); + }, + sizeclass); + }; + + return local_cache.template alloc( + domesticate, size, slowpath); + } + + /** + * Send all remote deallocation to other threads. + */ + void post_remote_cache() + { + core_alloc->post(); + } + + /** + * Slow path for deallocation we do not have space for this remote + * deallocation. This could be because, + * - we actually don't have space for this remote deallocation, + * and need to send them on; or + * - the allocator was not already initialised. + * In the second case we need to recheck if this is a remote deallocation, + * as we might acquire the originating allocator. + */ + SNMALLOC_SLOW_PATH void dealloc_remote_slow(capptr::Alloc p) + { + if (core_alloc != nullptr) + { +#ifdef SNMALLOC_TRACING + message<1024>( + "Remote dealloc post {} ({})", + p.unsafe_ptr(), + alloc_size(p.unsafe_ptr())); +#endif + const PagemapEntry& entry = + Config::Backend::template get_metaentry(address_cast(p)); + local_cache.remote_dealloc_cache.template dealloc( + entry.get_remote()->trunc_id(), p, key_global); + post_remote_cache(); + return; + } + + // Recheck what kind of dealloc we should do in case the allocator we get + // from lazy_init is the originating allocator. (TODO: but note that this + // can't suddenly become a large deallocation; the only distinction is + // between being ours to handle and something to post to a Remote.) + lazy_init( + [&](CoreAlloc*, CapPtr p) { + dealloc(p.unsafe_ptr()); // TODO don't double count statistics + return nullptr; + }, + p); + } + + /** + * Abstracts access to the message queue to handle different + * layout configurations of the allocator. + */ + auto& message_queue() + { + return local_cache.remote_allocator; + } + + /** + * Call `Config::is_initialised()` if it is implemented, + * unconditionally returns true otherwise. + */ + SNMALLOC_FAST_PATH + bool is_initialised() + { + return call_is_initialised(nullptr, 0); + } + + /** + * SFINAE helper. Matched only if `T` implements `ensure_init`. Calls it + * if it exists. + */ + template + SNMALLOC_FAST_PATH auto call_ensure_init(T*, int) + -> decltype(T::ensure_init()) + { + T::ensure_init(); + } + + /** + * SFINAE helper. Matched only if `T` does not implement `ensure_init`. + * Does nothing if called. + */ + template + SNMALLOC_FAST_PATH auto call_ensure_init(T*, long) + {} + + /** + * Call `Config::ensure_init()` if it is implemented, do + * nothing otherwise. + */ + SNMALLOC_FAST_PATH + void ensure_init() + { + call_ensure_init(nullptr, 0); + } + + public: + constexpr LocalAllocator() = default; + /** + * Remove copy constructors and assignment operators. + * Once initialised the CoreAlloc will take references to the internals + * of this allocators, and thus copying/moving it is very unsound. + */ + LocalAllocator(const LocalAllocator&) = delete; + LocalAllocator& operator=(const LocalAllocator&) = delete; + + /** + * Initialise the allocator. For allocators that support local + * initialisation, this is called with a core allocator that this class + * allocates (from a pool allocator) the first time it encounters a slow + * path. If this class is configured without lazy initialisation support + * then this must be called externally + */ + void init(CoreAlloc* c) + { + // Initialise the global allocator structures + ensure_init(); + + // Should only be called if the allocator has not been initialised. + SNMALLOC_ASSERT(core_alloc == nullptr); + + // Attach to it. + c->attach(&local_cache); + core_alloc = c; +#ifdef SNMALLOC_TRACING + message<1024>("init(): core_alloc={} @ {}", core_alloc, &local_cache); +#endif + // local_cache.stats.sta rt(); + } + + // This is effectively the constructor for the LocalAllocator, but due to + // not wanting initialisation checks on the fast path, it is initialised + // lazily. + void init() + { + // Initialise the global allocator structures + ensure_init(); + // Grab an allocator for this thread. + init(AllocPool::acquire(&(this->local_cache))); + } + + // Return all state in the fast allocator and release the underlying + // core allocator. This is used during teardown to empty the thread + // local state. + void flush() + { + // Detached thread local state from allocator. + if (core_alloc != nullptr) + { + core_alloc->flush(); + + // core_alloc->stats().add(local_cache.stats); + // // Reset stats, required to deal with repeated flushing. + // new (&local_cache.stats) Stats(); + + // Detach underlying allocator + core_alloc->attached_cache = nullptr; + // Return underlying allocator to the system. + if constexpr (Config::Options.CoreAllocOwnsLocalState) + { + AllocPool::release(core_alloc); + } + + // Set up thread local allocator to look like + // it is new to hit slow paths. + core_alloc = nullptr; +#ifdef SNMALLOC_TRACING + message<1024>("flush(): core_alloc={}", core_alloc); +#endif + local_cache.remote_allocator = &Config::unused_remote; + local_cache.remote_dealloc_cache.capacity = 0; + } + } + + /** + * Allocate memory of a dynamically known size. + */ + template + SNMALLOC_FAST_PATH ALLOCATOR void* alloc(size_t size) + { +#ifdef SNMALLOC_PASS_THROUGH + // snmalloc guarantees a lot of alignment, so we can depend on this + // make pass through call aligned_alloc with the alignment snmalloc + // would guarantee. + void* result = external_alloc::aligned_alloc( + natural_alignment(size), round_size(size)); + if (zero_mem == YesZero && result != nullptr) + memset(result, 0, size); + return result; +#else + // Perform the - 1 on size, so that zero wraps around and ends up on + // slow path. + if (SNMALLOC_LIKELY( + (size - 1) <= (sizeclass_to_size(NUM_SMALL_SIZECLASSES - 1) - 1))) + { + // Small allocations are more likely. Improve + // branch prediction by placing this case first. + return capptr_reveal(small_alloc(size)); + } + + return capptr_reveal(alloc_not_small(size)); +#endif + } + + /** + * Allocate memory of a statically known size. + */ + template + SNMALLOC_FAST_PATH ALLOCATOR void* alloc() + { + return alloc(size); + } + + /* + * Many of these tests come with an "or is null" branch that they'd need to + * add if we did them up front. Instead, defer them until we're past the + * point where we know, from the pagemap, or by explicitly testing, that the + * pointer under test is not nullptr. + */ +#if defined(__CHERI_PURE_CAPABILITY__) && defined(SNMALLOC_CHECK_CLIENT) + SNMALLOC_SLOW_PATH void dealloc_cheri_checks(void* p) + { + /* + * Enforce the use of an unsealed capability. + * + * TODO In CHERI+MTE, this, is part of the CAmoCDecVersion instruction; + * elide this test in that world. + */ + snmalloc_check_client( + !__builtin_cheri_sealed_get(p), "Sealed capability in deallocation"); + + /* + * Enforce permissions on the returned pointer. These pointers end up in + * free queues and will be cycled out to clients again, so try to catch + * erroneous behavior now, rather than later. + * + * TODO In the CHERI+MTE case, we must reconstruct the pointer for the + * free queues as part of the discovery of the start of the object (so + * that it has the correct version), and the CAmoCDecVersion call imposes + * its own requirements on the permissions (to ensure that it's at least + * not zero). They are somewhat more lax than we might wish, so this test + * may remain, guarded by SNMALLOC_CHECK_CLIENT, but no explicit + * permissions checks are required in the non-SNMALLOC_CHECK_CLIENT case + * to defend ourselves or other clients against a misbehaving client. + */ + static const size_t reqperm = CHERI_PERM_LOAD | CHERI_PERM_STORE | + CHERI_PERM_LOAD_CAP | CHERI_PERM_STORE_CAP; + snmalloc_check_client( + (__builtin_cheri_perms_get(p) & reqperm) == reqperm, + "Insufficient permissions on capability in deallocation"); + + /* + * We check for a valid tag here, rather than in domestication, because + * domestication might be answering a slightly different question, about + * the plausibility of addresses rather than of exact pointers. + * + * TODO Further, in the CHERI+MTE case, the tag check will be implicit in + * a future CAmoCDecVersion instruction, and there should be no harm in + * the lookups we perform along the way to get there. In that world, + * elide this test. + */ + snmalloc_check_client( + __builtin_cheri_tag_get(p), "Untagged capability in deallocation"); + + /* + * Verify that the capability is not zero-length, ruling out the other + * edge case around monotonicity. + */ + snmalloc_check_client( + __builtin_cheri_length_get(p) > 0, + "Zero-length capability in deallocation"); + + /* + * At present we check for the pointer also being the start of an + * allocation closer to dealloc; for small objects, that happens in + * dealloc_local_object_fast, either below or *on the far end of message + * receipt*. For large objects, it happens below by directly rounding to + * power of two rather than using the is_start_of_object helper. + * (XXX This does mean that we might end up threading our remote queue + * state somewhere slightly unexpected rather than at the head of an + * object. That is perhaps fine for now?) + */ + + /* + * TODO + * + * We could enforce other policies here, including that the length exactly + * match the sizeclass. At present, we bound caps we give for allocations + * to the underlying sizeclass, so even malloc(0) will have a non-zero + * length. Monotonicity would then imply that the pointer must be the + * head of an object (modulo, perhaps, temporal aliasing if we somehow + * introduced phase shifts in heap layout like some allocators do). + * + * If we switched to bounding with upwards-rounded representable bounds + * (c.f., CRRL) rather than underlying object size, then we should, + * instead, in general require plausibility of p_raw by checking that its + * length is nonzero and the snmalloc size class associated with its + * length is the one for the slab in question... except for the added + * challenge of malloc(0). Since 0 rounds up to 0, we might end up + * constructing zero-length caps to hand out, which we would then reject + * upon receipt. Instead, as part of introducing CRRL bounds, we should + * introduce a sizeclass for slabs holding zero-size objects. All told, + * we would want to check that + * + * size_to_sizeclass(length) == entry.get_sizeclass() + * + * I believe a relaxed CRRL test of + * + * length > 0 || (length == sizeclass_to_size(entry.get_sizeclass())) + * + * would also suffice and may be slightly less expensive than the test + * above, at the cost of not catching as many misbehaving clients. + * + * In either case, having bounded by CRRL bounds, we would need to be + * *reconstructing* the capabilities headed to our free lists to be given + * out to clients again; there are many more CRRL classes than snmalloc + * sizeclasses (this is the same reason that we can always get away with + * CSetBoundsExact in capptr_bound). Switching to CRRL bounds, if that's + * ever a thing we want to do, will be easier after we've done the + * plumbing for CHERI+MTE. + */ + + /* + * TODO: Unsurprisingly, the CHERI+MTE case once again has something to + * say here. In that world, again, we are certain to be reconstructing + * the capability for the free queue anyway, and so exactly what we wish + * to enforce, length-wise, of the provided capability, is somewhat more + * flexible. Using the provided capability bounds when recoloring memory + * could be a natural way to enforce that it covers the entire object, at + * the cost of a more elaborate recovery story (as we risk aborting with a + * partially recolored object). On non-SNMALLOC_CHECK_CLIENT builds, it + * likely makes sense to just enforce that length > 0 (*not* enforced by + * the CAmoCDecVersion instruction) and say that any authority-bearing + * interior pointer suffices to free the object. I believe that to be an + * acceptable security posture for the allocator and between clients; + * misbehavior is confined to the misbehaving client. + */ + } +#endif + + SNMALLOC_FAST_PATH void dealloc(void* p_raw) + { +#ifdef SNMALLOC_PASS_THROUGH + external_alloc::free(p_raw); +#else + // Care is needed so that dealloc(nullptr) works before init + // The backend allocator must ensure that a minimal page map exists + // before init, that maps null to a remote_deallocator that will never + // be in thread local state. + +# ifdef __CHERI_PURE_CAPABILITY__ + /* + * On CHERI platforms, snap the provided pointer to its base, ignoring + * any client-provided offset, which may have taken the pointer out of + * bounds and so appear to designate a different object. The base is + * is guaranteed by monotonicity either... + * * to be within the bounds originally returned by alloc(), or + * * one past the end (in which case, the capability length must be 0). + * + * Setting the offset does not trap on untagged capabilities, so the tag + * might be clear after this, as well. + * + * For a well-behaved client, this is a no-op: the base is already at the + * start of the allocation and so the offset is zero. + */ + p_raw = __builtin_cheri_offset_set(p_raw, 0); +# endif + + capptr::AllocWild p_wild = capptr_from_client(p_raw); + + /* + * p_tame may be nullptr, even if p_raw/p_wild are not, in the case + * where domestication fails. We exclusively use p_tame below so that + * such failures become no ops; in the nullptr path, which should be + * well off the fast path, we could be slightly more aggressive and test + * that p_raw is also nullptr and Pal::error() if not. (TODO) + * + * We do not rely on the bounds-checking ability of domestication here, + * and just check the address (and, on other architectures, perhaps + * well-formedness) of this pointer. The remainder of the logic will + * deal with the object's extent. + */ + capptr::Alloc p_tame = + capptr_domesticate(core_alloc->backend_state_ptr(), p_wild); + + const PagemapEntry& entry = + Config::Backend::get_metaentry(address_cast(p_tame)); + if (SNMALLOC_LIKELY(local_cache.remote_allocator == entry.get_remote())) + { +# if defined(__CHERI_PURE_CAPABILITY__) && defined(SNMALLOC_CHECK_CLIENT) + dealloc_cheri_checks(p_tame.unsafe_ptr()); +# endif + if (SNMALLOC_LIKELY(CoreAlloc::dealloc_local_object_fast( + entry, p_tame, local_cache.entropy))) + return; + core_alloc->dealloc_local_object_slow(p_tame, entry); + return; + } + + RemoteAllocator* remote = entry.get_remote(); + if (SNMALLOC_LIKELY(remote != nullptr)) + { +# if defined(__CHERI_PURE_CAPABILITY__) && defined(SNMALLOC_CHECK_CLIENT) + dealloc_cheri_checks(p_tame.unsafe_ptr()); +# endif + // Check if we have space for the remote deallocation + if (local_cache.remote_dealloc_cache.reserve_space(entry)) + { + local_cache.remote_dealloc_cache.template dealloc( + remote->trunc_id(), p_tame, key_global); +# ifdef SNMALLOC_TRACING + message<1024>( + "Remote dealloc fast {} ({})", p_raw, alloc_size(p_raw)); +# endif + return; + } + + dealloc_remote_slow(p_tame); + return; + } + + // If p_tame is not null, then dealloc has been call on something + // it shouldn't be called on. + // TODO: Should this be tested even in the !CHECK_CLIENT case? + snmalloc_check_client(p_tame == nullptr, "Not allocated by snmalloc."); + +# ifdef SNMALLOC_TRACING + message<1024>("nullptr deallocation"); +# endif + return; +#endif + } + + void check_size(void* p, size_t size) + { +#ifdef SNMALLOC_CHECK_CLIENT + size = size == 0 ? 1 : size; + auto sc = size_to_sizeclass_full(size); + auto pm_sc = + Config::Backend::get_metaentry(address_cast(p)).get_sizeclass(); + auto rsize = sizeclass_full_to_size(sc); + auto pm_size = sizeclass_full_to_size(pm_sc); + snmalloc_check_client( + sc == pm_sc, "Dealloc rounded size mismatch: {} != {}", rsize, pm_size); +#else + UNUSED(p, size); +#endif + } + + SNMALLOC_FAST_PATH void dealloc(void* p, size_t s) + { + check_size(p, s); + dealloc(p); + } + + template + SNMALLOC_FAST_PATH void dealloc(void* p) + { + check_size(p, size); + dealloc(p); + } + + void teardown() + { +#ifdef SNMALLOC_TRACING + message<1024>("Teardown: core_alloc={} @ {}", core_alloc, &local_cache); +#endif + post_teardown = true; + if (core_alloc != nullptr) + { + flush(); + } + } + + SNMALLOC_FAST_PATH size_t alloc_size(const void* p_raw) + { +#ifdef SNMALLOC_PASS_THROUGH + return external_alloc::malloc_usable_size(const_cast(p_raw)); +#else + // TODO What's the domestication policy here? At the moment we just + // probe the pagemap with the raw address, without checks. There could + // be implicit domestication through the `Config::Pagemap` or + // we could just leave well enough alone. + + // Note that alloc_size should return 0 for nullptr. + // Other than nullptr, we know the system will be initialised as it must + // be called with something we have already allocated. + // + // To handle this case we require the uninitialised pagemap contain an + // entry for the first chunk of memory, that states it represents a + // large object, so we can pull the check for null off the fast path. + const PagemapEntry& entry = + Config::Backend::template get_metaentry(address_cast(p_raw)); + + return sizeclass_full_to_size(entry.get_sizeclass()); +#endif + } + + /** + * Returns the Start/End of an object allocated by this allocator + * + * It is valid to pass any pointer, if the object was not allocated + * by this allocator, then it give the start and end as the whole of + * the potential pointer space. + */ + template + void* external_pointer(void* p) + { + /* + * Note that: + * * each case uses `pointer_offset`, so that on CHERI, our behaviour is + * monotone with respect to the capability `p`. + * + * * the returned pointer could be outside the CHERI bounds of `p`, and + * thus not something that can be followed. + * + * * we don't use capptr_from_client()/capptr_reveal(), to avoid the + * syntactic clutter. By inspection, `p` flows only to address_cast + * and pointer_offset, and so there's no risk that we follow or act + * to amplify the rights carried by `p`. + */ + if constexpr (location == Start) + { + size_t index = index_in_object(address_cast(p)); + return pointer_offset(p, 0 - index); + } + else if constexpr (location == End) + { + return pointer_offset(p, remaining_bytes(address_cast(p)) - 1); + } + else + { + return pointer_offset(p, remaining_bytes(address_cast(p))); + } + } + + /** + * Returns the number of remaining bytes in an object. + * + * auto p = (char*)malloc(size) + * remaining_bytes(p + n) == size - n provided n < size + */ + size_t remaining_bytes(address_t p) + { +#ifndef SNMALLOC_PASS_THROUGH + const PagemapEntry& entry = + Config::Backend::template get_metaentry(p); + + auto sizeclass = entry.get_sizeclass(); + return snmalloc::remaining_bytes(sizeclass, p); +#else + return reinterpret_cast( + std::numeric_limits::max() - p); +#endif + } + + bool check_bounds(const void* p, size_t s) + { + if (SNMALLOC_LIKELY(Config::is_initialised())) + { + return remaining_bytes(address_cast(p)) >= s; + } + return true; + } + + /** + * Returns the byte offset into an object. + * + * auto p = (char*)malloc(size) + * index_in_object(p + n) == n provided n < size + */ + size_t index_in_object(address_t p) + { +#ifndef SNMALLOC_PASS_THROUGH + const PagemapEntry& entry = + Config::Backend::template get_metaentry(p); + + auto sizeclass = entry.get_sizeclass(); + return snmalloc::index_in_object(sizeclass, p); +#else + return reinterpret_cast(p); +#endif + } + + /** + * Accessor, returns the local cache. If embedding code is allocating the + * core allocator for use by this local allocator then it needs to access + * this field. + */ + LocalCache& get_local_cache() + { + return local_cache; + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/mem/localcache.h b/src/snmalloc/mem/localcache.h new file mode 100644 index 000000000..68b232e4e --- /dev/null +++ b/src/snmalloc/mem/localcache.h @@ -0,0 +1,112 @@ +#pragma once + +#include "../ds/ds.h" +#include "freelist.h" +#include "remotecache.h" +#include "sizeclasstable.h" + +#include + +namespace snmalloc +{ + inline static SNMALLOC_FAST_PATH capptr::Alloc + finish_alloc_no_zero(freelist::HeadPtr p, smallsizeclass_t sizeclass) + { + SNMALLOC_ASSERT(is_start_of_object( + sizeclass_t::from_small_class(sizeclass), address_cast(p))); + UNUSED(sizeclass); + + return p.as_void(); + } + + template + inline static SNMALLOC_FAST_PATH capptr::Alloc + finish_alloc(freelist::HeadPtr p, smallsizeclass_t sizeclass) + { + auto r = finish_alloc_no_zero(p, sizeclass); + + if constexpr (zero_mem == YesZero) + Config::Pal::zero(r.unsafe_ptr(), sizeclass_to_size(sizeclass)); + + // TODO: Should this be zeroing the free Object state, in the non-zeroing + // case? + + return r; + } + + // This is defined on its own, so that it can be embedded in the + // thread local fast allocator, but also referenced from the + // thread local core allocator. + struct LocalCache + { + // Free list per small size class. These are used for + // allocation on the fast path. This part of the code is inspired by + // mimalloc. + freelist::Iter<> small_fast_free_lists[NUM_SMALL_SIZECLASSES] = {}; + + // This is the entropy for a particular thread. + LocalEntropy entropy; + + // Pointer to the remote allocator message_queue, used to check + // if a deallocation is local. + RemoteAllocator* remote_allocator; + + /** + * Remote deallocations for other threads + */ + RemoteDeallocCache remote_dealloc_cache; + + constexpr LocalCache(RemoteAllocator* remote_allocator) + : remote_allocator(remote_allocator) + {} + + /** + * Return all the free lists to the allocator. Used during thread teardown. + */ + template + bool flush(typename Config::LocalState* local_state, DeallocFun dealloc) + { + auto& key = entropy.get_free_list_key(); + auto domesticate = [local_state](freelist::QueuePtr p) + SNMALLOC_FAST_PATH_LAMBDA { + return capptr_domesticate(local_state, p); + }; + + for (size_t i = 0; i < NUM_SMALL_SIZECLASSES; i++) + { + // TODO could optimise this, to return the whole list in one append + // call. + while (!small_fast_free_lists[i].empty()) + { + auto p = small_fast_free_lists[i].take(key, domesticate); + SNMALLOC_ASSERT(is_start_of_object( + sizeclass_t::from_small_class(i), address_cast(p))); + dealloc(p.as_void()); + } + } + + return remote_dealloc_cache.post( + local_state, remote_allocator->trunc_id(), key_global); + } + + template< + ZeroMem zero_mem, + typename Config, + typename Slowpath, + typename Domesticator> + SNMALLOC_FAST_PATH capptr::Alloc + alloc(Domesticator domesticate, size_t size, Slowpath slowpath) + { + auto& key = entropy.get_free_list_key(); + smallsizeclass_t sizeclass = size_to_sizeclass(size); + auto& fl = small_fast_free_lists[sizeclass]; + if (SNMALLOC_LIKELY(!fl.empty())) + { + auto p = fl.take(key, domesticate); + return finish_alloc(p, sizeclass); + } + return slowpath(sizeclass, &fl); + } + }; + +} // namespace snmalloc diff --git a/src/snmalloc/mem/mem.h b/src/snmalloc/mem/mem.h new file mode 100644 index 000000000..9fb29a985 --- /dev/null +++ b/src/snmalloc/mem/mem.h @@ -0,0 +1,16 @@ +#include "backend_concept.h" +#include "backend_wrappers.h" +#include "corealloc.h" +#include "entropy.h" +#include "external_alloc.h" +#include "freelist.h" +#include "globalalloc.h" +#include "localalloc.h" +#include "localcache.h" +#include "metadata.h" +#include "pool.h" +#include "pooled.h" +#include "remoteallocator.h" +#include "remotecache.h" +#include "sizeclasstable.h" +#include "ticker.h" diff --git a/src/snmalloc/mem/metadata.h b/src/snmalloc/mem/metadata.h new file mode 100644 index 000000000..391650e6d --- /dev/null +++ b/src/snmalloc/mem/metadata.h @@ -0,0 +1,637 @@ +#pragma once + +#include "../ds/ds.h" +#include "freelist.h" +#include "sizeclasstable.h" + +namespace snmalloc +{ + struct RemoteAllocator; + + /** + * Remotes need to be aligned enough that the bottom bits have enough room for + * all the size classes, both large and small. An additional bit is required + * to separate backend uses. + */ + static constexpr size_t REMOTE_MIN_ALIGN = + bits::max(CACHELINE_SIZE, SIZECLASS_REP_SIZE) << 1; + + /** + * Base class for the templated FrontendMetaEntry. This exists to avoid + * needing a template parameter to access constants that are independent of + * the template parameter and contains all of the state that is agnostic to + * the types used for storing per-slab metadata. This class should never be + * instantiated directly (and its protected constructor guarantees that), + * only the templated subclass should be use. The subclass provides + * convenient accessors. + * + * A back end may also subclass `FrontendMetaEntry` to provide other + * back-end-specific information. The front end never directly instantiates + * these. + */ + class MetaEntryBase + { + protected: + /** + * This bit is set in remote_and_sizeclass to discriminate between the case + * that it is in use by the frontend (0) or by the backend (1). For the + * former case, see other methods on this and the subclass + * `FrontendMetaEntry`; for the latter, see backend/backend.h and + * backend/largebuddyrange.h. + * + * This value is statically checked by the frontend to ensure that its + * bit packing does not conflict; see mem/remoteallocator.h + */ + static constexpr address_t REMOTE_BACKEND_MARKER = 1 << 7; + + /** + * Bit used to indicate this should not be considered part of the previous + * PAL allocation. + * + * Some platforms cannot treat different PalAllocs as a single allocation. + * This is true on CHERI as the combined permission might not be + * representable. It is also true on Windows as you cannot Commit across + * multiple continuous VirtualAllocs. + */ + static constexpr address_t META_BOUNDARY_BIT = 1 << 0; + + /** + * The bit above the sizeclass is always zero unless this is used + * by the backend to represent another datastructure such as the buddy + * allocator entries. + */ + static constexpr size_t REMOTE_WITH_BACKEND_MARKER_ALIGN = + MetaEntryBase::REMOTE_BACKEND_MARKER; + static_assert( + (REMOTE_MIN_ALIGN >> 1) == MetaEntryBase::REMOTE_BACKEND_MARKER); + + /** + * In common cases, the pointer to the slab metadata. See + * docs/AddressSpace.md for additional details. + * + * The bottom bit is used to indicate if this is the first chunk in a PAL + * allocation, that cannot be combined with the preceeding chunk. + */ + uintptr_t meta{0}; + + /** + * In common cases, a bit-packed pointer to the owning allocator (if any), + * and the sizeclass of this chunk. See `encode` for + * details of this case and docs/AddressSpace.md for further details. + */ + uintptr_t remote_and_sizeclass{0}; + + /** + * Constructor from two pointer-sized words. The subclass is responsible + * for ensuring that accesses to these are type-safe. + */ + constexpr MetaEntryBase(uintptr_t m, uintptr_t ras) + : meta(m), remote_and_sizeclass(ras) + {} + + /** + * Default constructor, zero initialises. + */ + constexpr MetaEntryBase() : MetaEntryBase(0, 0) {} + + /** + * When a meta entry is in use by the back end, it exposes two words of + * state. The low bits in both are reserved. Bits in this bitmask must + * not be set by the back end in either word. + * + * During a major release, this constraint may be weakened, allowing the + * back end to set more bits. We don't currently use all of these bits in + * both words, but we reserve them all to make access uniform. If more + * bits are required by a back end then we could make this asymmetric. + * + * `REMOTE_BACKEND_MARKER` is the highest bit that we reserve, so this is + * currently every bit including that bit and all lower bits. + */ + static constexpr address_t BACKEND_RESERVED_MASK = + (REMOTE_BACKEND_MARKER << 1) - 1; + + public: + /** + * Does the back end currently own this entry? Note that freshly + * allocated entries are owned by the front end until explicitly + * claimed by the back end and so this will return `false` if neither + * the front nor back end owns this entry. + */ + [[nodiscard]] bool is_backend_owned() const + { + return (REMOTE_BACKEND_MARKER & remote_and_sizeclass) == + REMOTE_BACKEND_MARKER; + } + + /** + * Returns true if this metaentry has not been claimed by the front or back + * ends. + */ + [[nodiscard]] bool is_unowned() const + { + return ((meta == 0) || (meta == META_BOUNDARY_BIT)) && + (remote_and_sizeclass == 0); + } + + /** + * Encode the remote and the sizeclass. + */ + [[nodiscard]] static SNMALLOC_FAST_PATH uintptr_t + encode(RemoteAllocator* remote, sizeclass_t sizeclass) + { + /* remote might be nullptr; cast to uintptr_t before offsetting */ + return pointer_offset( + reinterpret_cast(remote), sizeclass.raw()); + } + + /** + * Return the remote and sizeclass in an implementation-defined encoding. + * This is not guaranteed to be stable across snmalloc releases and so the + * only safe use for this is to pass it to the two-argument constructor of + * this class. + */ + [[nodiscard]] SNMALLOC_FAST_PATH uintptr_t get_remote_and_sizeclass() const + { + return remote_and_sizeclass; + } + + /** + * Explicit assignment operator, copies the data preserving the boundary bit + * in the target if it is set. + */ + MetaEntryBase& operator=(const MetaEntryBase& other) + { + // Don't overwrite the boundary bit with the other's + meta = (other.meta & ~META_BOUNDARY_BIT) | + address_cast(meta & META_BOUNDARY_BIT); + remote_and_sizeclass = other.remote_and_sizeclass; + return *this; + } + + /** + * On some platforms, allocations originating from the OS may not be + * combined. The boundary bit indicates whether this is meta entry + * corresponds to the first chunk in such a range and so may not be combined + * with anything before it in the address space. + * @{ + */ + void set_boundary() + { + meta |= META_BOUNDARY_BIT; + } + + [[nodiscard]] bool is_boundary() const + { + return meta & META_BOUNDARY_BIT; + } + + bool clear_boundary_bit() + { + return meta &= ~META_BOUNDARY_BIT; + } + ///@} + + /** + * Returns the remote. + * + * If the meta entry is owned by the back end then this returns an + * undefined value and will abort in debug builds. + */ + [[nodiscard]] SNMALLOC_FAST_PATH RemoteAllocator* get_remote() const + { + SNMALLOC_ASSERT(!is_backend_owned()); + return reinterpret_cast( + pointer_align_down( + get_remote_and_sizeclass())); + } + + /** + * Return the sizeclass. + * + * This can be called irrespective of whether the corresponding meta entry + * is owned by the front or back end (and is, for example, called by + * `external_pointer`). In the future, it may provide some stronger + * guarantees on the value that is returned in this case. + */ + [[nodiscard]] SNMALLOC_FAST_PATH sizeclass_t get_sizeclass() const + { + // TODO: perhaps remove static_cast with resolution of + // https://github.com/CTSRD-CHERI/llvm-project/issues/588 + return sizeclass_t::from_raw( + static_cast(get_remote_and_sizeclass()) & + (REMOTE_WITH_BACKEND_MARKER_ALIGN - 1)); + } + + /** + * Claim the meta entry for use by the back end. This preserves the + * boundary bit, if it is set, but otherwise resets the meta entry to a + * pristine state. + */ + void claim_for_backend() + { + meta = is_boundary() ? META_BOUNDARY_BIT : 0; + remote_and_sizeclass = REMOTE_BACKEND_MARKER; + } + + /** + * When used by the back end, the two words in a meta entry have no + * semantics defined by the front end and are identified by enumeration + * values. + */ + enum class Word + { + /** + * The first word. + */ + One, + + /** + * The second word. + */ + Two + }; + + static constexpr bool is_backend_allowed_value(Word, uintptr_t val) + { + return (val & BACKEND_RESERVED_MASK) == 0; + } + + /** + * Proxy class that allows setting and reading back the bits in each word + * that are exposed for the back end. + * + * The back end must not keep instances of this class after returning the + * corresponding meta entry to the front end. + */ + class BackendStateWordRef + { + /** + * A pointer to the relevant word. + */ + uintptr_t* val; + + public: + /** + * Constructor, wraps a `uintptr_t`. Note that this may be used outside + * of the meta entry by code wishing to provide uniform storage to things + * that are either in a meta entry or elsewhere. + */ + constexpr BackendStateWordRef(uintptr_t* v) : val(v) {} + + /** + * Copy constructor. Aliases the underlying storage. Note that this is + * not thread safe: two `BackendStateWordRef` instances sharing access to + * the same storage must not be used from different threads without + * explicit synchronisation. + */ + constexpr BackendStateWordRef(const BackendStateWordRef& other) = default; + + /** + * Read the value. This zeroes any bits in the underlying storage that + * the back end is not permitted to access. + */ + [[nodiscard]] uintptr_t get() const + { + return (*val) & ~BACKEND_RESERVED_MASK; + } + + /** + * Default copy assignment. See the copy constructor for constraints on + * using this. + */ + BackendStateWordRef& + operator=(const BackendStateWordRef& other) = default; + + /** + * Assignment operator. Zeroes the bits in the provided value that the + * back end is not permitted to use and then stores the result in the + * value that this class manages. + */ + BackendStateWordRef& operator=(uintptr_t v) + { + SNMALLOC_ASSERT_MSG( + ((v & BACKEND_RESERVED_MASK) == 0), + "The back end is not permitted to use the low bits in the meta " + "entry. ({} & {}) == {}.", + v, + BACKEND_RESERVED_MASK, + (v & BACKEND_RESERVED_MASK)); + *val = v | (static_cast(*val) & BACKEND_RESERVED_MASK); + return *this; + } + + /** + * Comparison operator. Performs address comparison *not* value + * comparison. + */ + bool operator!=(const BackendStateWordRef& other) const + { + return val != other.val; + } + + /** + * Returns the address of the underlying storage in a form that can be + * passed to `snmalloc::message` for printing. + */ + address_t printable_address() + { + return address_cast(val); + } + }; + + /** + * Get a proxy that allows the back end to read from and write to (some bits + * of) a word in the meta entry. The meta entry must either be unowned or + * explicitly claimed by the back end before calling this. + */ + BackendStateWordRef get_backend_word(Word w) + { + if (!is_backend_owned()) + { + SNMALLOC_ASSERT_MSG( + is_unowned(), + "Meta entry is owned by the front end. Meta: {}, " + "remote_and_sizeclass:{}", + meta, + remote_and_sizeclass); + claim_for_backend(); + } + return {w == Word::One ? &meta : &remote_and_sizeclass}; + } + }; + + /** + * The FrontendSlabMetadata represent the metadata associated with a single + * slab. + */ + class alignas(CACHELINE_SIZE) FrontendSlabMetadata + { + public: + /** + * Used to link slab metadata together in various other data-structures. + * This is intended to be used with `SeqSet` and so may actually hold a + * subclass of this class provided by the back end. The `SeqSet` is + * responsible for maintaining that invariant. While an instance of this + * class is in a `SeqSet`, the `next` field should not be assigned to by + * anything that doesn't enforce the invariant that `next` stores a `T*`, + * where `T` is a subclass of `FrontendSlabMetadata`. + */ + FrontendSlabMetadata* next{nullptr}; + + constexpr FrontendSlabMetadata() = default; + + /** + * Data-structure for building the free list for this slab. + */ +#ifdef SNMALLOC_CHECK_CLIENT + freelist::Builder free_queue; +#else + freelist::Builder free_queue; +#endif + + /** + * The number of deallocation required until we hit a slow path. This + * counts down in two different ways that are handled the same on the + * fast path. The first is + * - deallocations until the slab has sufficient entries to be considered + * useful to allocate from. This could be as low as 1, or when we have + * a requirement for entropy then it could be much higher. + * - deallocations until the slab is completely unused. This is needed + * to be detected, so that the statistics can be kept up to date, and + * potentially return memory to the a global pool of slabs/chunks. + */ + uint16_t needed_ = 0; + + /** + * Flag that is used to indicate that the slab is currently not active. + * I.e. it is not in a CoreAllocator cache for the appropriate sizeclass. + */ + bool sleeping_ = false; + + /** + * Flag to indicate this is actually a large allocation rather than a slab + * of small allocations. + */ + bool large_ = false; + + uint16_t& needed() + { + return needed_; + } + + bool& sleeping() + { + return sleeping_; + } + + /** + * Initialise FrontendSlabMetadata for a slab. + */ + void initialise(smallsizeclass_t sizeclass) + { + free_queue.init(); + // Set up meta data as if the entire slab has been turned into a free + // list. This means we don't have to check for special cases where we have + // returned all the elements, but this is a slab that is still being bump + // allocated from. Hence, the bump allocator slab will never be returned + // for use in another size class. + set_sleeping(sizeclass, 0); + + large_ = false; + } + + /** + * Make this a chunk represent a large allocation. + * + * Set needed so immediately moves to slow path. + */ + void initialise_large() + { + // We will push to this just to make the fast path clean. + free_queue.init(); + + // Flag to detect that it is a large alloc on the slow path + large_ = true; + + // Jump to slow path on first deallocation. + needed() = 1; + } + + /** + * Updates statistics for adding an entry to the free list, if the + * slab is either + * - empty adding the entry to the free list, or + * - was full before the subtraction + * this returns true, otherwise returns false. + */ + bool return_object() + { + return (--needed()) == 0; + } + + bool is_unused() + { + return needed() == 0; + } + + bool is_sleeping() + { + return sleeping(); + } + + bool is_large() + { + return large_; + } + + /** + * Try to set this slab metadata to sleep. If the remaining elements are + * fewer than the threshold, then it will actually be set to the sleeping + * state, and will return true, otherwise it will return false. + */ + SNMALLOC_FAST_PATH bool + set_sleeping(smallsizeclass_t sizeclass, uint16_t remaining) + { + auto threshold = threshold_for_waking_slab(sizeclass); + if (remaining >= threshold) + { + // Set needed to at least one, possibly more so we only use + // a slab when it has a reasonable amount of free elements + auto allocated = sizeclass_to_slab_object_count(sizeclass); + needed() = allocated - remaining; + sleeping() = false; + return false; + } + + sleeping() = true; + needed() = threshold - remaining; + return true; + } + + SNMALLOC_FAST_PATH void set_not_sleeping(smallsizeclass_t sizeclass) + { + auto allocated = sizeclass_to_slab_object_count(sizeclass); + needed() = allocated - threshold_for_waking_slab(sizeclass); + + // Design ensures we can't move from full to empty. + // There are always some more elements to free at this + // point. This is because the threshold is always less + // than the count for the slab + SNMALLOC_ASSERT(needed() != 0); + + sleeping() = false; + } + + /** + * Allocates a free list from the meta data. + * + * Returns a freshly allocated object of the correct size, and a bool that + * specifies if the slab metadata should be placed in the queue for that + * sizeclass. + * + * If Randomisation is not used, it will always return false for the second + * component, but with randomisation, it may only return part of the + * available objects for this slab metadata. + */ + template + static SNMALLOC_FAST_PATH std::pair + alloc_free_list( + Domesticator domesticate, + FrontendSlabMetadata* meta, + freelist::Iter<>& fast_free_list, + LocalEntropy& entropy, + smallsizeclass_t sizeclass) + { + auto& key = entropy.get_free_list_key(); + + std::remove_reference_t tmp_fl; + auto remaining = meta->free_queue.close(tmp_fl, key); + auto p = tmp_fl.take(key, domesticate); + fast_free_list = tmp_fl; + +#ifdef SNMALLOC_CHECK_CLIENT + entropy.refresh_bits(); +#else + UNUSED(entropy); +#endif + + // This marks the slab as sleeping, and sets a wakeup + // when sufficient deallocations have occurred to this slab. + // Takes how many deallocations were not grabbed on this call + // This will be zero if there is no randomisation. + auto sleeping = meta->set_sleeping(sizeclass, remaining); + + return {p, !sleeping}; + } + }; + + /** + * Entry stored in the pagemap. See docs/AddressSpace.md for the full + * FrontendMetaEntry lifecycle. + */ + template + class FrontendMetaEntry : public MetaEntryBase + { + /** + * Ensure that the template parameter is valid. + */ + static_assert( + std::is_convertible_v, + "The front end requires that the back end provides slab metadata that is " + "compatible with the front-end's structure"); + + public: + using SlabMetadata = BackendSlabMetadata; + + constexpr FrontendMetaEntry() = default; + + /** + * Constructor, provides the remote and sizeclass embedded in a single + * pointer-sized word. This format is not guaranteed to be stable and so + * the second argument of this must always be the return value from + * `get_remote_and_sizeclass`. + */ + SNMALLOC_FAST_PATH + FrontendMetaEntry(BackendSlabMetadata* meta, uintptr_t remote_and_sizeclass) + : MetaEntryBase( + unsafe_to_uintptr(meta), remote_and_sizeclass) + { + SNMALLOC_ASSERT_MSG( + (REMOTE_BACKEND_MARKER & remote_and_sizeclass) == 0, + "Setting a backend-owned value ({}) via the front-end interface is not " + "allowed", + remote_and_sizeclass); + remote_and_sizeclass &= ~REMOTE_BACKEND_MARKER; + } + + /** + * Implicit copying of meta entries is almost certainly a bug and so the + * copy constructor is deleted to statically catch these problems. + */ + FrontendMetaEntry(const FrontendMetaEntry&) = delete; + + /** + * Explicit assignment operator, copies the data preserving the boundary bit + * in the target if it is set. + */ + FrontendMetaEntry& operator=(const FrontendMetaEntry& other) + { + MetaEntryBase::operator=(other); + return *this; + } + + /** + * Return the FrontendSlabMetadata metadata associated with this chunk, + * guarded by an assert that this chunk is being used as a slab (i.e., has + * an associated owning allocator). + */ + [[nodiscard]] SNMALLOC_FAST_PATH BackendSlabMetadata* + get_slab_metadata() const + { + SNMALLOC_ASSERT(get_remote() != nullptr); + return unsafe_from_uintptr( + meta & ~META_BOUNDARY_BIT); + } + }; + +} // namespace snmalloc diff --git a/src/snmalloc/mem/pool.h b/src/snmalloc/mem/pool.h new file mode 100644 index 000000000..0513c141d --- /dev/null +++ b/src/snmalloc/mem/pool.h @@ -0,0 +1,197 @@ +#pragma once + +#include "../ds/ds.h" +#include "pooled.h" + +#include + +namespace snmalloc +{ + /** + * Pool of a particular type of object. + * + * This pool will never return objects to the OS. It maintains a list of all + * objects ever allocated that can be iterated (not concurrency safe). Pooled + * types can be acquired from the pool, and released back to the pool. This is + * concurrency safe. + * + * This is used to bootstrap the allocation of allocators. + */ + template + class PoolState + { + template< + typename TT, + SNMALLOC_CONCEPT(IsConfig) Config, + PoolState& get_state()> + friend class Pool; + + private: + MPMCStack stack; + FlagWord lock{}; + capptr::Alloc list{nullptr}; + + public: + constexpr PoolState() = default; + }; + + /** + * Helper class used to instantiate a global PoolState. + * + * SingletonPoolState::pool is the default provider for the PoolState within + * the Pool class. + */ + template + class SingletonPoolState + { + /** + * SFINAE helper. Matched only if `T` implements `ensure_init`. Calls it + * if it exists. + */ + template + SNMALLOC_FAST_PATH static auto call_ensure_init(SharedStateHandle_*, int) + -> decltype(SharedStateHandle_::ensure_init()) + { + static_assert( + std::is_same::value, + "SFINAE parameter, should only be used with Config"); + SharedStateHandle_::ensure_init(); + } + + /** + * SFINAE helper. Matched only if `T` does not implement `ensure_init`. + * Does nothing if called. + */ + template + SNMALLOC_FAST_PATH static auto call_ensure_init(SharedStateHandle_*, long) + { + static_assert( + std::is_same::value, + "SFINAE parameter, should only be used with Config"); + } + + /** + * Call `Config::ensure_init()` if it is implemented, do nothing + * otherwise. + */ + SNMALLOC_FAST_PATH static void ensure_init() + { + call_ensure_init(nullptr, 0); + } + + static void make_pool(PoolState*) noexcept + { + ensure_init(); + // Default initializer already called on PoolState, no need to use + // placement new. + } + + public: + /** + * Returns a reference for the global PoolState for the given type. + * Also forces the initialization of the backend state, if needed. + */ + SNMALLOC_FAST_PATH static PoolState& pool() + { + return Singleton, &make_pool>::get(); + } + }; + + /** + * Wrapper class to access a pool of a particular type of object. + * + * The third template argument is a method to retrieve the actual PoolState. + * + * For the pool of allocators, refer to the AllocPool alias defined in + * corealloc.h. + * + * For a pool of another type, it is recommended to leave the + * third template argument with its default value. The SingletonPoolState + * class is used as a helper to provide a default PoolState management for + * this use case. + */ + template< + typename T, + SNMALLOC_CONCEPT(IsConfig) Config, + PoolState& get_state() = SingletonPoolState::pool> + class Pool + { + public: + template + static T* acquire(Args&&... args) + { + PoolState& pool = get_state(); + auto p = capptr::Alloc::unsafe_from(pool.stack.pop()); + + if (p != nullptr) + { + p->set_in_use(); + return p.unsafe_ptr(); + } + + auto raw = + Config::Backend::template alloc_meta_data(nullptr, sizeof(T)); + + if (raw == nullptr) + { + Config::Pal::error("Failed to initialise thread local allocator."); + } + + p = capptr_to_user_address_control( + Aal::capptr_bound( + capptr::Arena::unsafe_from(new (raw.unsafe_ptr()) + T(std::forward(args)...)), + sizeof(T))); + + FlagLock f(pool.lock); + p->list_next = pool.list; + pool.list = p; + + p->set_in_use(); + return p.unsafe_ptr(); + } + + /** + * Return to the pool an object previously retrieved by `acquire` + * + * Do not return objects from `extract`. + */ + static void release(T* p) + { + // The object's destructor is not run. If the object is "reallocated", it + // is returned without the constructor being run, so the object is reused + // without re-initialisation. + p->reset_in_use(); + get_state().stack.push(p); + } + + static T* extract(T* p = nullptr) + { + // Returns a linked list of all objects in the stack, emptying the stack. + if (p == nullptr) + return get_state().stack.pop_all(); + + return p->next; + } + + /** + * Return to the pool a list of object previously retrieved by `extract` + * + * Do not return objects from `acquire`. + */ + static void restore(T* first, T* last) + { + // Pushes a linked list of objects onto the stack. Use to put a linked + // list returned by extract back onto the stack. + get_state().stack.push(first, last); + } + + static T* iterate(T* p = nullptr) + { + if (p == nullptr) + return get_state().list.unsafe_ptr(); + + return p->list_next.unsafe_ptr(); + } + }; +} // namespace snmalloc diff --git a/src/mem/pooled.h b/src/snmalloc/mem/pooled.h similarity index 67% rename from src/mem/pooled.h rename to src/snmalloc/mem/pooled.h index a4ffa1e3b..51fc3515d 100644 --- a/src/mem/pooled.h +++ b/src/snmalloc/mem/pooled.h @@ -1,28 +1,29 @@ #pragma once -#include "../ds/bits.h" +#include "../ds/ds.h" +#include "backend_concept.h" namespace snmalloc { + template + class PoolState; + template class Pooled { - private: - template - friend class Pool; + public: template< - class a, - Construction c, - template - typename P, - template - typename AP> + typename TT, + SNMALLOC_CONCEPT(IsConfig) Config, + PoolState& get_state()> + friend class Pool; + template friend class MPMCStack; /// Used by the pool for chaining together entries when not in use. std::atomic next{nullptr}; /// Used by the pool to keep the list of all entries ever created. - T* list_next; + capptr::Alloc list_next; std::atomic_flag in_use = ATOMIC_FLAG_INIT; public: @@ -45,4 +46,4 @@ namespace snmalloc return result; } }; -} // namespace snmalloc \ No newline at end of file +} // namespace snmalloc diff --git a/src/snmalloc/mem/remoteallocator.h b/src/snmalloc/mem/remoteallocator.h new file mode 100644 index 000000000..2b92e9f6e --- /dev/null +++ b/src/snmalloc/mem/remoteallocator.h @@ -0,0 +1,185 @@ +#pragma once + +#include "../ds/ds.h" +#include "freelist.h" +#include "metadata.h" +#include "sizeclasstable.h" + +#include +#include + +namespace snmalloc +{ + /** + * Global key for all remote lists. + */ + inline static FreeListKey key_global(0xdeadbeef, 0xbeefdead, 0xdeadbeef); + + /** + * + * A RemoteAllocator is the message queue of freed objects. It exposes a MPSC + * append-only atomic queue that uses one xchg per append. + * + * The internal pointers are considered QueuePtr-s to support deployment + * scenarios in which the RemoteAllocator itself is exposed to the client. + * This is excessively paranoid in the common case that the RemoteAllocator-s + * are as "hard" for the client to reach as the Pagemap, which we trust to + * store not just Tame CapPtr<>s but raw C++ pointers. + * + * While we could try to condition the types used here on a flag in the + * backend's `struct Flags Options` value, we instead expose two domesticator + * callbacks at the interface and are careful to use one for the front and + * back values and the other for pointers read from the queue itself. That's + * not ideal, but it lets the client condition its behavior appropriately and + * prevents us from accidentally following either of these pointers in generic + * code. + * + * `domesticate_head` is used for the pointer used to reach the of the queue, + * while `domesticate_queue` is used to traverse the first link in the queue + * itself. In the case that the RemoteAllocator is not easily accessible to + * the client, `domesticate_head` can just be a type coersion, and + * `domesticate_queue` should perform actual validation. If the + * RemoteAllocator is exposed to the client, both Domesticators should perform + * validation. + */ + struct alignas(REMOTE_MIN_ALIGN) RemoteAllocator + { + using alloc_id_t = address_t; + + // Store the message queue on a separate cacheline. It is mutable data that + // is read by other threads. + alignas(CACHELINE_SIZE) freelist::AtomicQueuePtr back{nullptr}; + // Store the two ends on different cache lines as access by different + // threads. + alignas(CACHELINE_SIZE) freelist::QueuePtr front{nullptr}; + + constexpr RemoteAllocator() = default; + + void invariant() + { + SNMALLOC_ASSERT(back != nullptr); + } + + void init(freelist::HeadPtr stub) + { + freelist::Object::atomic_store_null(stub, key_global); + front = capptr_rewild(stub); + back.store(front, std::memory_order_relaxed); + invariant(); + } + + freelist::QueuePtr destroy() + { + freelist::QueuePtr fnt = front; + back.store(nullptr, std::memory_order_relaxed); + front = nullptr; + return fnt; + } + + inline bool is_empty() + { + freelist::QueuePtr bk = back.load(std::memory_order_relaxed); + + return bk == front; + } + + /** + * Pushes a list of messages to the queue. Each message from first to + * last should be linked together through their next pointers. + * + * The Domesticator here is used only on pointers read from the head. See + * the commentary on the class. + */ + template + void enqueue( + freelist::HeadPtr first, + freelist::HeadPtr last, + const FreeListKey& key, + Domesticator_head domesticate_head) + { + invariant(); + freelist::Object::atomic_store_null(last, key); + + // Exchange needs to be acq_rel. + // * It needs to be a release, so nullptr in next is visible. + // * Needs to be acquire, so linking into the list does not race with + // the other threads nullptr init of the next field. + freelist::QueuePtr prev = + back.exchange(capptr_rewild(last), std::memory_order_acq_rel); + + freelist::Object::atomic_store_next(domesticate_head(prev), first, key); + } + + freelist::QueuePtr peek() + { + return front; + } + + /** + * Destructively iterate the queue. Each queue element is removed and fed + * to the callback in turn. The callback may return false to stop iteration + * early (but must have processed the element it was given!). + * + * Takes a domestication callback for each of "pointers read from head" and + * "pointers read from queue". See the commentary on the class. + */ + template< + typename Domesticator_head, + typename Domesticator_queue, + typename Cb> + void dequeue( + const FreeListKey& key, + Domesticator_head domesticate_head, + Domesticator_queue domesticate_queue, + Cb cb) + { + invariant(); + SNMALLOC_ASSERT(front != nullptr); + + // Use back to bound, so we don't handle new entries. + auto b = back.load(std::memory_order_relaxed); + freelist::HeadPtr curr = domesticate_head(front); + + while (address_cast(curr) != address_cast(b)) + { + freelist::HeadPtr next = curr->atomic_read_next(key, domesticate_queue); + // We have observed a non-linearisable effect of the queue. + // Just go back to allocating normally. + if (SNMALLOC_UNLIKELY(next == nullptr)) + break; + // We want this element next, so start it loading. + Aal::prefetch(next.unsafe_ptr()); + if (SNMALLOC_UNLIKELY(!cb(curr))) + { + /* + * We've domesticate_queue-d next so that we can read through it, but + * we're storing it back into client-accessible memory in + * !QueueHeadsAreTame builds, so go ahead and consider it Wild again. + * On QueueHeadsAreTame builds, the subsequent domesticate_head call + * above will also be a type-level sleight of hand, but we can still + * justify it by the domesticate_queue that happened in this + * dequeue(). + */ + front = capptr_rewild(next); + invariant(); + return; + } + + curr = next; + } + + /* + * Here, we've hit the end of the queue: next is nullptr and curr has not + * been handed to the callback. The same considerations about Wildness + * above hold here. + */ + front = capptr_rewild(curr); + invariant(); + } + + alloc_id_t trunc_id() + { + return address_cast(this); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/mem/remotecache.h b/src/snmalloc/mem/remotecache.h new file mode 100644 index 000000000..a415a1daa --- /dev/null +++ b/src/snmalloc/mem/remotecache.h @@ -0,0 +1,180 @@ +#pragma once + +#include "../ds/ds.h" +#include "backend_wrappers.h" +#include "freelist.h" +#include "metadata.h" +#include "remoteallocator.h" +#include "sizeclasstable.h" + +#include +#include + +namespace snmalloc +{ + /** + * Stores the remote deallocation to batch them before sending + */ + struct RemoteDeallocCache + { + std::array, REMOTE_SLOTS> list; + + /** + * The total amount of memory we are waiting for before we will dispatch + * to other allocators. Zero can mean we have not initialised the allocator + * yet. This is initialised to the 0 so that we always hit a slow path to + * start with, when we hit the slow path and need to dispatch everything, we + * can check if we are a real allocator and lazily provide a real allocator. + */ + int64_t capacity{0}; + +#ifndef NDEBUG + bool initialised = false; +#endif + + /// Used to find the index into the array of queues for remote + /// deallocation + /// r is used for which round of sending this is. + template + inline size_t get_slot(size_t i, size_t r) + { + constexpr size_t initial_shift = + bits::next_pow2_bits_const(allocator_size); + // static_assert( + // initial_shift >= 8, + // "Can't embed sizeclass_t into allocator ID low bits"); + SNMALLOC_ASSERT((initial_shift + (r * REMOTE_SLOT_BITS)) < 64); + return (i >> (initial_shift + (r * REMOTE_SLOT_BITS))) & REMOTE_MASK; + } + + /** + * Checks if the capacity has enough to cache an entry from this + * slab. Returns true, if this does not overflow the budget. + * + * This does not require initialisation to be safely called. + */ + template + SNMALLOC_FAST_PATH bool reserve_space(const Entry& entry) + { + auto size = + static_cast(sizeclass_full_to_size(entry.get_sizeclass())); + + bool result = capacity > size; + if (result) + capacity -= size; + return result; + } + + template + SNMALLOC_FAST_PATH void dealloc( + RemoteAllocator::alloc_id_t target_id, + capptr::Alloc p, + const FreeListKey& key) + { + SNMALLOC_ASSERT(initialised); + auto r = p.template as_reinterpret>(); + + list[get_slot(target_id, 0)].add(r, key); + } + + template + bool post( + typename Config::LocalState* local_state, + RemoteAllocator::alloc_id_t id, + const FreeListKey& key) + { + SNMALLOC_ASSERT(initialised); + size_t post_round = 0; + bool sent_something = false; + auto domesticate = [local_state](freelist::QueuePtr p) + SNMALLOC_FAST_PATH_LAMBDA { + return capptr_domesticate(local_state, p); + }; + + while (true) + { + auto my_slot = get_slot(id, post_round); + + for (size_t i = 0; i < REMOTE_SLOTS; i++) + { + if (i == my_slot) + continue; + + if (!list[i].empty()) + { + auto [first, last] = list[i].extract_segment(key); + const auto& entry = + Config::Backend::get_metaentry(address_cast(first)); + auto remote = entry.get_remote(); + // If the allocator is not correctly aligned, then the bit that is + // set implies this is used by the backend, and we should not be + // deallocating memory here. + snmalloc_check_client( + !entry.is_backend_owned(), + "Delayed detection of attempt to free internal structure."); + if constexpr (Config::Options.QueueHeadsAreTame) + { + auto domesticate_nop = [](freelist::QueuePtr p) { + return freelist::HeadPtr::unsafe_from(p.unsafe_ptr()); + }; + remote->enqueue(first, last, key, domesticate_nop); + } + else + { + remote->enqueue(first, last, key, domesticate); + } + sent_something = true; + } + } + + if (list[my_slot].empty()) + break; + + // Entries could map back onto the "resend" list, + // so take copy of the head, mark the last element, + // and clear the original list. + freelist::Iter<> resend; + list[my_slot].close(resend, key); + + post_round++; + + while (!resend.empty()) + { + // Use the next N bits to spread out remote deallocs in our own + // slot. + auto r = resend.take(key, domesticate); + const auto& entry = Config::Backend::get_metaentry(address_cast(r)); + auto i = entry.get_remote()->trunc_id(); + size_t slot = get_slot(i, post_round); + list[slot].add(r, key); + } + } + + // Reset capacity as we have empty everything + capacity = REMOTE_CACHE; + + return sent_something; + } + + /** + * Constructor design to allow constant init + */ + constexpr RemoteDeallocCache() = default; + + /** + * Must be called before anything else to ensure actually initialised + * not just zero init. + */ + void init() + { +#ifndef NDEBUG + initialised = true; +#endif + for (auto& l : list) + { + l.init(); + } + capacity = REMOTE_CACHE; + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/mem/sizeclasstable.h b/src/snmalloc/mem/sizeclasstable.h new file mode 100644 index 000000000..4590ebb55 --- /dev/null +++ b/src/snmalloc/mem/sizeclasstable.h @@ -0,0 +1,508 @@ +#pragma once + +#include "../ds/ds.h" + +/** + * This file contains all the code for transforming transforming sizes to + * sizeclasses and back. It also contains various sizeclass pre-calculated + * tables for operations based on size class such as `modulus` and `divisible + * by`, and constants for the slab based allocator. + * + * TODO: Due to the current structure for constexpr evaluation this file does + * not well delimit internal versus external APIs. Some refactoring should be + * done. + */ + +namespace snmalloc +{ + using smallsizeclass_t = size_t; + using chunksizeclass_t = size_t; + + constexpr static inline smallsizeclass_t size_to_sizeclass_const(size_t size) + { + // Don't use sizeclasses that are not a multiple of the alignment. + // For example, 24 byte allocations can be + // problematic for some data due to alignment issues. + auto sc = static_cast( + bits::to_exp_mant_const(size)); + + SNMALLOC_ASSERT(sc == static_cast(sc)); + + return sc; + } + + static constexpr size_t NUM_SMALL_SIZECLASSES = + size_to_sizeclass_const(MAX_SMALL_SIZECLASS_SIZE); + + // Large classes range from [MAX_SMALL_SIZECLASS_SIZE, ADDRESS_SPACE). + static constexpr size_t NUM_LARGE_CLASSES = + DefaultPal::address_bits - MAX_SMALL_SIZECLASS_BITS; + + // How many bits are required to represent either a large or a small + // sizeclass. + static constexpr size_t TAG_SIZECLASS_BITS = bits::max( + bits::next_pow2_bits_const(NUM_SMALL_SIZECLASSES + 1), + bits::next_pow2_bits_const(NUM_LARGE_CLASSES + 1)); + + // Number of bits required to represent a tagged sizeclass that can be + // either small or large. + static constexpr size_t SIZECLASS_REP_SIZE = + bits::one_at_bit(TAG_SIZECLASS_BITS + 1); + + /** + * Encapsulates a tagged union of large and small sizeclasses. + * + * Used in various lookup tables to make efficient code that handles + * all objects allocated by snmalloc. + */ + class sizeclass_t + { + static constexpr size_t TAG = bits::one_at_bit(TAG_SIZECLASS_BITS); + + size_t value{0}; + + constexpr sizeclass_t(size_t value) : value(value) {} + + public: + constexpr sizeclass_t() = default; + + constexpr static sizeclass_t from_small_class(smallsizeclass_t sc) + { + SNMALLOC_ASSERT(sc < TAG); + // Note could use `+` or `|`. Using `+` as will combine nicely with array + // offset. + return {TAG + sc}; + } + + /** + * Takes the number of leading zero bits from the actual large size-1. + * See size_to_sizeclass_full + */ + constexpr static sizeclass_t from_large_class(size_t large_class) + { + SNMALLOC_ASSERT(large_class < TAG); + return {large_class}; + } + + constexpr static sizeclass_t from_raw(size_t raw) + { + return {raw}; + } + + constexpr size_t index() + { + return value & (TAG - 1); + } + + constexpr smallsizeclass_t as_small() + { + SNMALLOC_ASSERT(is_small()); + return value & (TAG - 1); + } + + constexpr chunksizeclass_t as_large() + { + SNMALLOC_ASSERT(!is_small()); + return bits::BITS - (value & (TAG - 1)); + } + + constexpr size_t raw() + { + return value; + } + + constexpr bool is_small() + { + return (value & TAG) != 0; + } + + constexpr bool is_default() + { + return value == 0; + } + + constexpr bool operator==(sizeclass_t other) + { + return value == other.value; + } + }; + + using sizeclass_compress_t = uint8_t; + + inline SNMALLOC_FAST_PATH static size_t + aligned_size(size_t alignment, size_t size) + { + // Client responsible for checking alignment is not zero + SNMALLOC_ASSERT(alignment != 0); + // Client responsible for checking alignment is a power of two + SNMALLOC_ASSERT(bits::is_pow2(alignment)); + + return ((alignment - 1) | (size - 1)) + 1; + } + + /** + * This structure contains the fields required for fast paths for sizeclasses. + */ + struct sizeclass_data_fast + { + size_t size; + // We store the mask as it is used more on the fast path, and the size of + // the slab. + size_t slab_mask; + // Table of constants for reciprocal division for each sizeclass. + size_t div_mult; + // Table of constants for reciprocal modulus for each sizeclass. + size_t mod_zero_mult; + }; + + /** + * This structure contains the remaining fields required for slow paths for + * sizeclasses. + */ + struct sizeclass_data_slow + { + uint16_t capacity; + uint16_t waking; + }; + + struct SizeClassTable + { + ModArray fast_; + ModArray slow_; + + size_t DIV_MULT_SHIFT{0}; + + [[nodiscard]] constexpr sizeclass_data_fast& fast(sizeclass_t index) + { + return fast_[index.raw()]; + } + + [[nodiscard]] constexpr sizeclass_data_fast fast(sizeclass_t index) const + { + return fast_[index.raw()]; + } + + [[nodiscard]] constexpr sizeclass_data_fast& fast_small(smallsizeclass_t sc) + { + return fast_[sizeclass_t::from_small_class(sc).raw()]; + } + + [[nodiscard]] constexpr sizeclass_data_fast + fast_small(smallsizeclass_t sc) const + { + return fast_[sizeclass_t::from_small_class(sc).raw()]; + } + + [[nodiscard]] constexpr sizeclass_data_slow& slow(sizeclass_t index) + { + return slow_[index.raw()]; + } + + [[nodiscard]] constexpr sizeclass_data_slow slow(sizeclass_t index) const + { + return slow_[index.raw()]; + } + + constexpr SizeClassTable() : fast_(), slow_(), DIV_MULT_SHIFT() + { + size_t max_capacity = 0; + + for (sizeclass_compress_t sizeclass = 0; + sizeclass < NUM_SMALL_SIZECLASSES; + sizeclass++) + { + auto& meta = fast_small(sizeclass); + + size_t rsize = + bits::from_exp_mant(sizeclass); + meta.size = rsize; + size_t slab_bits = bits::max( + bits::next_pow2_bits_const(MIN_OBJECT_COUNT * rsize), MIN_CHUNK_BITS); + + meta.slab_mask = bits::one_at_bit(slab_bits) - 1; + + auto& meta_slow = slow(sizeclass_t::from_small_class(sizeclass)); + meta_slow.capacity = + static_cast((meta.slab_mask + 1) / rsize); + + meta_slow.waking = +#ifdef SNMALLOC_CHECK_CLIENT + static_cast(meta_slow.capacity / 4); +#else + static_cast(bits::min((meta_slow.capacity / 4), 32)); +#endif + + if (meta_slow.capacity > max_capacity) + { + max_capacity = meta_slow.capacity; + } + } + + // Get maximum precision to calculate largest division range. + DIV_MULT_SHIFT = bits::BITS - bits::next_pow2_bits_const(max_capacity); + + for (sizeclass_compress_t sizeclass = 0; + sizeclass < NUM_SMALL_SIZECLASSES; + sizeclass++) + { + // Calculate reciprocal division constant. + auto& meta = fast_small(sizeclass); + meta.div_mult = + ((bits::one_at_bit(DIV_MULT_SHIFT) - 1) / meta.size) + 1; + + size_t zero = 0; + meta.mod_zero_mult = (~zero / meta.size) + 1; + } + + for (size_t sizeclass = 0; sizeclass < bits::BITS; sizeclass++) + { + auto lsc = sizeclass_t::from_large_class(sizeclass); + auto& meta = fast(lsc); + meta.size = sizeclass == 0 ? 0 : bits::one_at_bit(lsc.as_large()); + meta.slab_mask = meta.size - 1; + // The slab_mask will do all the necessary work, so + // perform identity multiplication for the test. + meta.mod_zero_mult = 1; + // The slab_mask will do all the necessary work for division + // so collapse the calculated offset. + meta.div_mult = 0; + } + } + }; + + static inline constexpr SizeClassTable sizeclass_metadata = SizeClassTable(); + + static constexpr size_t DIV_MULT_SHIFT = sizeclass_metadata.DIV_MULT_SHIFT; + + constexpr static inline size_t sizeclass_to_size(smallsizeclass_t sizeclass) + { + return sizeclass_metadata.fast_small(sizeclass).size; + } + + static inline size_t sizeclass_full_to_size(sizeclass_t sizeclass) + { + return sizeclass_metadata.fast(sizeclass).size; + } + + inline static size_t sizeclass_full_to_slab_size(sizeclass_t sizeclass) + { + return sizeclass_metadata.fast(sizeclass).slab_mask + 1; + } + + inline static size_t sizeclass_to_slab_size(smallsizeclass_t sizeclass) + { + return sizeclass_metadata.fast_small(sizeclass).slab_mask + 1; + } + + /** + * Only wake slab if we have this many free allocations + * + * This helps remove bouncing around empty to non-empty cases. + * + * It also increases entropy, when we have randomisation. + */ + inline uint16_t threshold_for_waking_slab(smallsizeclass_t sizeclass) + { + return sizeclass_metadata.slow(sizeclass_t::from_small_class(sizeclass)) + .waking; + } + + inline static size_t sizeclass_to_slab_sizeclass(smallsizeclass_t sizeclass) + { + size_t ssize = sizeclass_to_slab_size(sizeclass); + + return bits::next_pow2_bits(ssize) - MIN_CHUNK_BITS; + } + + inline static size_t slab_sizeclass_to_size(chunksizeclass_t sizeclass) + { + return bits::one_at_bit(MIN_CHUNK_BITS + sizeclass); + } + + /** + * For large allocations, the metaentry stores the raw log_2 of the size, + * which must be shifted into the index space of slab_sizeclass-es. + */ + inline static size_t + metaentry_chunk_sizeclass_to_slab_sizeclass(chunksizeclass_t sizeclass) + { + return sizeclass - MIN_CHUNK_BITS; + } + + inline constexpr static uint16_t + sizeclass_to_slab_object_count(smallsizeclass_t sizeclass) + { + return sizeclass_metadata.slow(sizeclass_t::from_small_class(sizeclass)) + .capacity; + } + + inline static address_t start_of_object(sizeclass_t sc, address_t addr) + { + auto meta = sizeclass_metadata.fast(sc); + address_t slab_start = addr & ~meta.slab_mask; + size_t offset = addr & meta.slab_mask; + size_t size = meta.size; + + if constexpr (sizeof(addr) >= 8) + { + // Only works for 64 bit multiplication, as the following will overflow in + // 32bit. + // Based on + // https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/ + // We are using an adaptation of the "indirect" method. By using the + // indirect method we can handle the large power of two classes just with + // the slab_mask by making the `div_mult` zero. The link uses 128 bit + // multiplication, we have shrunk the range of the calculation to remove + // this dependency. + size_t offset_start = ((offset * meta.div_mult) >> DIV_MULT_SHIFT) * size; + return slab_start + offset_start; + } + else + { + if (size == 0) + return 0; + return slab_start + (offset / size) * size; + } + } + + inline static size_t index_in_object(sizeclass_t sc, address_t addr) + { + return addr - start_of_object(sc, addr); + } + + inline static size_t remaining_bytes(sizeclass_t sc, address_t addr) + { + return sizeclass_metadata.fast(sc).size - index_in_object(sc, addr); + } + + inline static bool is_start_of_object(sizeclass_t sc, address_t addr) + { + size_t offset = addr & (sizeclass_full_to_slab_size(sc) - 1); + + // Only works up to certain offsets, exhaustively tested by rounding.cc + if constexpr (sizeof(offset) >= 8) + { + // Only works for 64 bit multiplication, as the following will overflow in + // 32bit. + // This is based on: + // https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/ + auto mod_zero_mult = sizeclass_metadata.fast(sc).mod_zero_mult; + return (offset * mod_zero_mult) < mod_zero_mult; + } + else + // Use 32-bit division as considerably faster than 64-bit, and + // everything fits into 32bits here. + return static_cast(offset % sizeclass_full_to_size(sc)) == 0; + } + + inline static size_t large_size_to_chunk_size(size_t size) + { + return bits::next_pow2(size); + } + + inline static size_t large_size_to_chunk_sizeclass(size_t size) + { + return bits::next_pow2_bits(size) - MIN_CHUNK_BITS; + } + + constexpr static SNMALLOC_PURE size_t sizeclass_lookup_index(const size_t s) + { + // We subtract and shift to reduce the size of the table, i.e. we don't have + // to store a value for every size. + return (s - 1) >> MIN_ALLOC_BITS; + } + + static inline smallsizeclass_t size_to_sizeclass(size_t size) + { + constexpr static size_t sizeclass_lookup_size = + sizeclass_lookup_index(MAX_SMALL_SIZECLASS_SIZE); + + /** + * This struct is used to statically initialise a table for looking up + * the correct sizeclass. + */ + struct SizeClassLookup + { + sizeclass_compress_t table[sizeclass_lookup_size] = {{}}; + + constexpr SizeClassLookup() + { + size_t curr = 1; + for (sizeclass_compress_t sizeclass = 0; + sizeclass < NUM_SMALL_SIZECLASSES; + sizeclass++) + { + for (; curr <= sizeclass_metadata.fast_small(sizeclass).size; + curr += 1 << MIN_ALLOC_BITS) + { + auto i = sizeclass_lookup_index(curr); + if (i == sizeclass_lookup_size) + break; + table[i] = sizeclass; + } + } + } + }; + + static constexpr SizeClassLookup sizeclass_lookup = SizeClassLookup(); + + auto index = sizeclass_lookup_index(size); + if (index < sizeclass_lookup_size) + { + return sizeclass_lookup.table[index]; + } + + // Check this is not called on large sizes. + SNMALLOC_ASSERT(size == 0); + // Map size == 0 to the first sizeclass. + return 0; + } + + /** + * A compressed size representation, + * either a small size class with the 7th bit set + * or a large class with the 7th bit not set. + * Large classes are stored as a mask shift. + * size = (~0 >> lc) + 1; + * Thus large size class 0, has size 0. + * And large size class 33, has size 2^31 + */ + static inline sizeclass_t size_to_sizeclass_full(size_t size) + { + if ((size - 1) < sizeclass_to_size(NUM_SMALL_SIZECLASSES - 1)) + { + return sizeclass_t::from_small_class(size_to_sizeclass(size)); + } + // bits::clz is undefined on 0, but we have size == 1 has already been + // handled here. We conflate 0 and sizes larger than we can allocate. + return sizeclass_t::from_large_class(bits::clz(size - 1)); + } + + inline SNMALLOC_FAST_PATH static size_t round_size(size_t size) + { + if (size > sizeclass_to_size(NUM_SMALL_SIZECLASSES - 1)) + { + return bits::next_pow2(size); + } + // If realloc(ptr, 0) returns nullptr, some consumers treat this as a + // reallocation failure and abort. To avoid this, we round up the size of + // requested allocations to the smallest size class. This can be changed + // on any platform that's happy to return nullptr from realloc(ptr,0) and + // should eventually become a configuration option. + if (size == 0) + { + return sizeclass_to_size(size_to_sizeclass(1)); + } + return sizeclass_to_size(size_to_sizeclass(size)); + } + + /// Returns the alignment that this size naturally has, that is + /// all allocations of size `size` will be aligned to the returned value. + inline SNMALLOC_FAST_PATH static size_t natural_alignment(size_t size) + { + auto rsize = round_size(size); + if (size == 0) + return 1; + return bits::one_at_bit(bits::ctz(rsize)); + } +} // namespace snmalloc diff --git a/src/snmalloc/mem/ticker.h b/src/snmalloc/mem/ticker.h new file mode 100644 index 000000000..2bce041e0 --- /dev/null +++ b/src/snmalloc/mem/ticker.h @@ -0,0 +1,100 @@ +#pragma once + +#include "../ds_core/ds_core.h" + +#include + +namespace snmalloc +{ + /** + * This class will attempt to call the PAL every 50ms to check the time. + * If the caller of check_tick, does so more frequently, it will attempt + * to back-off to only query the time, every n calls to check_tick, where + * `n` adapts to the current frequency of calling. + * + * The aim is to reduce the time spent querying the time as this might be + * an expensive operation if time has been virtualised. + */ + template + class Ticker + { + /** + * Calls to check_tick required before the time is next queried + */ + uint64_t count_down = 1; + + /** + * Number of ticks next time we check the time. + * That is, + * counted - count_down + * Is how many ticks, since last_epoch_ms was updated. + */ + uint64_t counted = 1; + + /** + * Last time we queried the clock. + */ + uint64_t last_query_ms = 0; + + /** + * Slow path that actually queries clock and sets up + * how many calls for the next time we hit the slow path. + */ + template + SNMALLOC_SLOW_PATH T check_tick_slow(T p = nullptr) + { + uint64_t now_ms = PAL::time_in_ms(); + + // Set up clock. + if (last_query_ms == 0) + { + last_query_ms = now_ms; + count_down = 1; + counted = 1; + return p; + } + + uint64_t duration_ms = now_ms - last_query_ms; + last_query_ms = now_ms; + + // Check is below clock resolution + if (duration_ms == 0) + { + // Exponential back off + count_down = counted; + counted *= 2; + return p; + } + + constexpr size_t deadline_in_ms = 50; + + // Estimate number of ticks to get to the new deadline, based on the + // current interval + auto new_deadline_in_ticks = + ((1 + counted) * deadline_in_ms) / duration_ms; + + counted = new_deadline_in_ticks; + count_down = new_deadline_in_ticks; + + return p; + } + + public: + template + SNMALLOC_FAST_PATH T check_tick(T p = nullptr) + { + if constexpr (pal_supports) + { + // Check before decrement, so that later calcations can use + // count_down == 0 for check on the next call. + // This is used if the ticks are way below the frequency of + // heart beat. + if (--count_down == 0) + { + return check_tick_slow(p); + } + } + return p; + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/override/jemalloc_compat.cc b/src/snmalloc/override/jemalloc_compat.cc new file mode 100644 index 000000000..a65554630 --- /dev/null +++ b/src/snmalloc/override/jemalloc_compat.cc @@ -0,0 +1,388 @@ +#include "override.h" + +#include +#include + +using namespace snmalloc; +namespace +{ + /** + * Helper for JEMalloc-compatible non-standard APIs. These take a flags + * argument as an `int`. This class provides a wrapper for extracting the + * fields embedded in this API. + */ + class JEMallocFlags + { + /** + * The raw flags. + */ + int flags; + + public: + /** + * Constructor, takes a `flags` parameter from one of the `*allocx()` + * JEMalloc APIs. + */ + constexpr JEMallocFlags(int flags) : flags(flags) {} + + /** + * Jemalloc's *allocx APIs store the alignment in the low 6 bits of the + * flags, allowing any alignment up to 2^63. + */ + constexpr int log2align() + { + return flags & 0x3f; + } + + /** + * Jemalloc's *allocx APIs use bit 6 to indicate whether memory should be + * zeroed. + */ + constexpr bool should_zero() + { + return (flags & 0x40) == 0x40; + } + + /** + * Jemalloc's *allocm APIs use bit 7 to indicate whether reallocation may + * move. This is ignored by the `*allocx` functions. + */ + constexpr bool may_not_move() + { + return (flags & 0x80) == 0x80; + } + + size_t aligned_size(size_t size) + { + return ::aligned_size(bits::one_at_bit(log2align()), size); + } + }; + + /** + * Error codes from Jemalloc 3's experimental API. + */ + enum JEMalloc3Result + { + /** + * Allocation succeeded. + */ + allocm_success = 0, + + /** + * Allocation failed because memory was not available. + */ + allocm_err_oom = 1, + + /** + * Reallocation failed because it would have required moving. + */ + allocm_err_not_moved = 2 + }; +} // namespace + +extern "C" +{ + // Stub implementations for jemalloc compatibility. + // These are called by FreeBSD's libthr (pthreads) to notify malloc of + // various events. They are currently unused, though we may wish to reset + // statistics on fork if built with statistics. + + SNMALLOC_EXPORT SNMALLOC_USED_FUNCTION inline void _malloc_prefork(void) {} + SNMALLOC_EXPORT SNMALLOC_USED_FUNCTION inline void _malloc_postfork(void) {} + SNMALLOC_EXPORT SNMALLOC_USED_FUNCTION inline void _malloc_first_thread(void) + {} + + /** + * Jemalloc API provides a way of avoiding name lookup when calling + * `mallctl`. For now, always return an error. + */ + int SNMALLOC_NAME_MANGLE(mallctlnametomib)(const char*, size_t*, size_t*) + { + return ENOENT; + } + + /** + * Jemalloc API provides a generic entry point for various functions. For + * now, this is always implemented to return an error. + */ + int SNMALLOC_NAME_MANGLE(mallctlbymib)( + const size_t*, size_t, void*, size_t*, void*, size_t) + { + return ENOENT; + } + + /** + * Jemalloc API provides a generic entry point for various functions. For + * now, this is always implemented to return an error. + */ + SNMALLOC_EXPORT int + SNMALLOC_NAME_MANGLE(mallctl)(const char*, void*, size_t*, void*, size_t) + { + return ENOENT; + } + +#ifdef SNMALLOC_JEMALLOC3_EXPERIMENTAL + /** + * Jemalloc 3 experimental API. Allocates at least `size` bytes and returns + * the result in `*ptr`, if `rsize` is not null then writes the allocated size + * into `*rsize`. `flags` controls whether the memory is zeroed and what + * alignment is requested. + */ + int SNMALLOC_NAME_MANGLE(allocm)( + void** ptr, size_t* rsize, size_t size, int flags) + { + auto f = JEMallocFlags(flags); + size = f.aligned_size(size); + if (rsize != nullptr) + { + *rsize = round_size(size); + } + if (f.should_zero()) + { + *ptr = ThreadAlloc::get().alloc(size); + } + else + { + *ptr = ThreadAlloc::get().alloc(size); + } + return (*ptr != nullptr) ? allocm_success : allocm_err_oom; + } + + /** + * Jemalloc 3 experimental API. Reallocates the allocation in `*ptr` to be at + * least `size` bytes and returns the result in `*ptr`, if `rsize` is not null + * then writes the allocated size into `*rsize`. `flags` controls whether the + * memory is zeroed and what alignment is requested and whether reallocation + * is permitted. If reallocating, the size will be at least `size` + `extra` + * bytes. + */ + int SNMALLOC_NAME_MANGLE(rallocm)( + void** ptr, size_t* rsize, size_t size, size_t extra, int flags) + { + auto f = JEMallocFlags(flags); + auto alloc_size = f.aligned_size(size); + + auto& a = ThreadAlloc::get(); + size_t sz = a.alloc_size(*ptr); + // Keep the current allocation if the given size is in the same sizeclass. + if (sz == round_size(alloc_size)) + { + if (rsize != nullptr) + { + *rsize = sz; + } + return allocm_success; + } + + if (f.may_not_move()) + { + return allocm_err_not_moved; + } + + if (std::numeric_limits::max() - size > extra) + { + alloc_size = f.aligned_size(size + extra); + } + + void* p = + f.should_zero() ? a.alloc(alloc_size) : a.alloc(alloc_size); + if (SNMALLOC_LIKELY(p != nullptr)) + { + sz = bits::min(alloc_size, sz); + // Guard memcpy as GCC is assuming not nullptr for ptr after the memcpy + // otherwise. + if (sz != 0) + { + memcpy(p, *ptr, sz); + } + a.dealloc(*ptr); + *ptr = p; + if (rsize != nullptr) + { + *rsize = alloc_size; + } + return allocm_success; + } + return allocm_err_oom; + } + + /** + * Jemalloc 3 experimental API. Sets `*rsize` to the size of the allocation + * at `*ptr`. The third argument contains some flags relating to arenas that + * we ignore. + */ + int SNMALLOC_NAME_MANGLE(sallocm)(const void* ptr, size_t* rsize, int) + { + *rsize = ThreadAlloc::get().alloc_size(ptr); + return allocm_success; + } + + /** + * Jemalloc 3 experimental API. Deallocates the allocation + * at `*ptr`. The second argument contains some flags relating to arenas that + * we ignore. + */ + int SNMALLOC_NAME_MANGLE(dallocm)(void* ptr, int) + { + ThreadAlloc::get().dealloc(ptr); + return allocm_success; + } + + /** + * Jemalloc 3 experimental API. Returns in `*rsize` the size of the + * allocation that would be returned if `size` and `flags` are passed to + * `allocm`. + */ + int SNMALLOC_NAME_MANGLE(nallocm)(size_t* rsize, size_t size, int flags) + { + *rsize = round_size(JEMallocFlags(flags).aligned_size(size)); + return allocm_success; + } +#endif + +#ifdef SNMALLOC_JEMALLOC_NONSTANDARD + /** + * Jemalloc function that provides control over alignment and zeroing + * behaviour via the `flags` argument. This argument also includes control + * over the thread cache and arena to use. These don't translate directly to + * snmalloc and so are ignored. + */ + SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(mallocx)(size_t size, int flags) + { + auto f = JEMallocFlags(flags); + size = f.aligned_size(size); + if (f.should_zero()) + { + return ThreadAlloc::get().alloc(size); + } + return ThreadAlloc::get().alloc(size); + } + + /** + * Jemalloc non-standard function that is similar to `realloc`. This can + * request zeroed memory for any newly allocated memory, though only if the + * object grows (which, for snmalloc, means if it's copied). The flags + * controlling the thread cache and arena are ignored. + */ + SNMALLOC_EXPORT void* + SNMALLOC_NAME_MANGLE(rallocx)(void* ptr, size_t size, int flags) + { + auto f = JEMallocFlags(flags); + size = f.aligned_size(size); + + auto& a = ThreadAlloc::get(); + size_t sz = round_size(a.alloc_size(ptr)); + // Keep the current allocation if the given size is in the same sizeclass. + if (sz == size) + { + return ptr; + } + + if (size == (size_t)-1) + { + return nullptr; + } + + // We have a choice here of either asking for zeroed memory, or trying to + // zero the remainder. The former is *probably* faster for large + // allocations, because we get zeroed memory from the PAL and don't zero it + // twice. This is not profiled and so should be considered for refactoring + // if anyone cares about the performance of these APIs. + void* p = f.should_zero() ? a.alloc(size) : a.alloc(size); + if (SNMALLOC_LIKELY(p != nullptr)) + { + sz = bits::min(size, sz); + // Guard memcpy as GCC is assuming not nullptr for ptr after the memcpy + // otherwise. + if (sz != 0) + memcpy(p, ptr, sz); + a.dealloc(ptr); + } + return p; + } + + /** + * Jemalloc non-standard API that performs a `realloc` only if it can do so + * without copying and returns the size of the underlying object. With + * snmalloc, this simply returns the size of the sizeclass backing the + * object. + */ + size_t SNMALLOC_NAME_MANGLE(xallocx)(void* ptr, size_t, size_t, int) + { + auto& a = ThreadAlloc::get(); + return a.alloc_size(ptr); + } + + /** + * Jemalloc non-standard API that queries the underlying size of the + * allocation. + */ + size_t SNMALLOC_NAME_MANGLE(sallocx)(const void* ptr, int) + { + auto& a = ThreadAlloc::get(); + return a.alloc_size(ptr); + } + + /** + * Jemalloc non-standard API that frees `ptr`. The second argument allows + * specifying a thread cache or arena but this is currently unused in + * snmalloc. + */ + void SNMALLOC_NAME_MANGLE(dallocx)(void* ptr, int) + { + ThreadAlloc::get().dealloc(ptr); + } + + /** + * Jemalloc non-standard API that frees `ptr`. The second argument specifies + * a size, which is intended to speed up the operation. This could improve + * performance for snmalloc, if we could guarantee that this is allocated by + * the current thread but is otherwise not helpful. The third argument allows + * specifying a thread cache or arena but this is currently unused in + * snmalloc. + */ + void SNMALLOC_NAME_MANGLE(sdallocx)(void* ptr, size_t, int) + { + ThreadAlloc::get().dealloc(ptr); + } + + /** + * Jemalloc non-standard API that returns the size of memory that would be + * allocated if the same arguments were passed to `mallocx`. + */ + size_t SNMALLOC_NAME_MANGLE(nallocx)(size_t size, int flags) + { + return round_size(JEMallocFlags(flags).aligned_size(size)); + } +#endif + +#if !defined(__PIC__) && defined(SNMALLOC_BOOTSTRAP_ALLOCATOR) + // The following functions are required to work before TLS is set up, in + // statically-linked programs. These temporarily grab an allocator from the + // pool and return it. + + void* __je_bootstrap_malloc(size_t size) + { + return get_scoped_allocator()->alloc(size); + } + + void* __je_bootstrap_calloc(size_t nmemb, size_t size) + { + bool overflow = false; + size_t sz = bits::umul(size, nmemb, overflow); + if (overflow) + { + errno = ENOMEM; + return nullptr; + } + // Include size 0 in the first sizeclass. + sz = ((sz - 1) >> (bits::BITS - 1)) + sz; + return get_scoped_allocator()->alloc(sz); + } + + void __je_bootstrap_free(void* ptr) + { + get_scoped_allocator()->dealloc(ptr); + } +#endif +} diff --git a/src/snmalloc/override/malloc-extensions.cc b/src/snmalloc/override/malloc-extensions.cc new file mode 100644 index 000000000..d84210ef8 --- /dev/null +++ b/src/snmalloc/override/malloc-extensions.cc @@ -0,0 +1,13 @@ +#include "malloc-extensions.h" + +#include "../snmalloc.h" + +using namespace snmalloc; + +void get_malloc_info_v1(malloc_info_v1* stats) +{ + auto curr = StandardConfig::Backend::get_current_usage(); + auto peak = StandardConfig::Backend::get_peak_usage(); + stats->current_memory_usage = curr; + stats->peak_memory_usage = peak; +} diff --git a/src/override/malloc-extensions.h b/src/snmalloc/override/malloc-extensions.h similarity index 100% rename from src/override/malloc-extensions.h rename to src/snmalloc/override/malloc-extensions.h diff --git a/src/snmalloc/override/malloc.cc b/src/snmalloc/override/malloc.cc new file mode 100644 index 000000000..5830f9454 --- /dev/null +++ b/src/snmalloc/override/malloc.cc @@ -0,0 +1,228 @@ +#include "override.h" + +#include +#include + +using namespace snmalloc; + +#ifndef MALLOC_USABLE_SIZE_QUALIFIER +# define MALLOC_USABLE_SIZE_QUALIFIER +#endif + +extern "C" +{ + SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(__malloc_end_pointer)(void* ptr) + { + return ThreadAlloc::get().external_pointer(ptr); + } + + SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(malloc)(size_t size) + { + return ThreadAlloc::get().alloc(size); + } + + SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(free)(void* ptr) + { + ThreadAlloc::get().dealloc(ptr); + } + + SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(cfree)(void* ptr) + { + ThreadAlloc::get().dealloc(ptr); + } + + /** + * Clang was helpfully inlining the constant return value, and + * thus converting from a tail call to an ordinary call. + */ + SNMALLOC_EXPORT inline void* snmalloc_not_allocated = nullptr; + + static SNMALLOC_SLOW_PATH void* SNMALLOC_NAME_MANGLE(snmalloc_set_error)() + { + errno = ENOMEM; + return snmalloc_not_allocated; + } + + SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(calloc)(size_t nmemb, size_t size) + { + bool overflow = false; + size_t sz = bits::umul(size, nmemb, overflow); + if (SNMALLOC_UNLIKELY(overflow)) + { + return SNMALLOC_NAME_MANGLE(snmalloc_set_error)(); + } + return ThreadAlloc::get().alloc(sz); + } + + SNMALLOC_EXPORT + size_t SNMALLOC_NAME_MANGLE(malloc_usable_size)( + MALLOC_USABLE_SIZE_QUALIFIER void* ptr) + { + return ThreadAlloc::get().alloc_size(ptr); + } + + SNMALLOC_EXPORT + size_t SNMALLOC_NAME_MANGLE(malloc_good_size)(size_t size) + { + return round_size(size); + } + + SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(realloc)(void* ptr, size_t size) + { + auto& a = ThreadAlloc::get(); + size_t sz = a.alloc_size(ptr); + // Keep the current allocation if the given size is in the same sizeclass. + if (sz == round_size(size)) + { +#ifdef SNMALLOC_PASS_THROUGH + // snmallocs alignment guarantees can be broken by realloc in pass-through + // this is not exercised, by existing clients, but is tested. + if (pointer_align_up(ptr, natural_alignment(size)) == ptr) + return ptr; +#else + return ptr; +#endif + } + + if (size == (size_t)-1) + { + errno = ENOMEM; + return nullptr; + } + + void* p = a.alloc(size); + if (SNMALLOC_LIKELY(p != nullptr)) + { + sz = bits::min(size, sz); + // Guard memcpy as GCC is assuming not nullptr for ptr after the memcpy + // otherwise. + if (sz != 0) + memcpy(p, ptr, sz); + a.dealloc(ptr); + } + else if (SNMALLOC_LIKELY(size == 0)) + { + a.dealloc(ptr); + } + else + { + errno = ENOMEM; + } + return p; + } + +#if !defined(SNMALLOC_NO_REALLOCARRAY) + SNMALLOC_EXPORT void* + SNMALLOC_NAME_MANGLE(reallocarray)(void* ptr, size_t nmemb, size_t size) + { + bool overflow = false; + size_t sz = bits::umul(size, nmemb, overflow); + if (overflow) + { + errno = ENOMEM; + return nullptr; + } + return SNMALLOC_NAME_MANGLE(realloc)(ptr, sz); + } +#endif + +#if !defined(SNMALLOC_NO_REALLOCARR) + SNMALLOC_EXPORT int + SNMALLOC_NAME_MANGLE(reallocarr)(void* ptr_, size_t nmemb, size_t size) + { + int err = errno; + auto& a = ThreadAlloc::get(); + bool overflow = false; + size_t sz = bits::umul(size, nmemb, overflow); + if (sz == 0) + { + errno = err; + return 0; + } + if (overflow) + { + errno = err; + return EOVERFLOW; + } + + void** ptr = reinterpret_cast(ptr_); + void* p = a.alloc(sz); + if (p == nullptr) + { + errno = ENOMEM; + return ENOMEM; + } + + sz = bits::min(sz, a.alloc_size(*ptr)); + // Guard memcpy as GCC is assuming not nullptr for ptr after the memcpy + // otherwise. + if (sz != 0) + memcpy(p, *ptr, sz); + errno = err; + a.dealloc(*ptr); + *ptr = p; + return 0; + } +#endif + + SNMALLOC_EXPORT void* + SNMALLOC_NAME_MANGLE(memalign)(size_t alignment, size_t size) + { + if ((alignment == 0) || (alignment == size_t(-1))) + { + errno = EINVAL; + return nullptr; + } + + if ((size + alignment) < size) + { + errno = ENOMEM; + return nullptr; + } + + return SNMALLOC_NAME_MANGLE(malloc)(aligned_size(alignment, size)); + } + + SNMALLOC_EXPORT void* + SNMALLOC_NAME_MANGLE(aligned_alloc)(size_t alignment, size_t size) + { + SNMALLOC_ASSERT((size % alignment) == 0); + return SNMALLOC_NAME_MANGLE(memalign)(alignment, size); + } + + SNMALLOC_EXPORT int SNMALLOC_NAME_MANGLE(posix_memalign)( + void** memptr, size_t alignment, size_t size) + { + if ((alignment < sizeof(uintptr_t) || ((alignment & (alignment - 1)) != 0))) + { + return EINVAL; + } + + void* p = SNMALLOC_NAME_MANGLE(memalign)(alignment, size); + if (SNMALLOC_UNLIKELY(p == nullptr)) + { + if (size != 0) + return ENOMEM; + } + *memptr = p; + return 0; + } + +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) + SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(valloc)(size_t size) + { + return SNMALLOC_NAME_MANGLE(memalign)(OS_PAGE_SIZE, size); + } +#endif + + SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(pvalloc)(size_t size) + { + if (size == size_t(-1)) + { + errno = ENOMEM; + return nullptr; + } + return SNMALLOC_NAME_MANGLE(memalign)( + OS_PAGE_SIZE, (size + OS_PAGE_SIZE - 1) & ~(OS_PAGE_SIZE - 1)); + } +} diff --git a/src/snmalloc/override/memcpy.cc b/src/snmalloc/override/memcpy.cc new file mode 100644 index 000000000..c2283ec1e --- /dev/null +++ b/src/snmalloc/override/memcpy.cc @@ -0,0 +1,13 @@ +#include "override.h" + +extern "C" +{ + /** + * Snmalloc checked memcpy. + */ + SNMALLOC_EXPORT void* + SNMALLOC_NAME_MANGLE(memcpy)(void* dst, const void* src, size_t len) + { + return snmalloc::memcpy(dst, src, len); + } +} diff --git a/src/override/new.cc b/src/snmalloc/override/new.cc similarity index 56% rename from src/override/new.cc rename to src/snmalloc/override/new.cc index 70083da37..c0acc953a 100644 --- a/src/override/new.cc +++ b/src/snmalloc/override/new.cc @@ -1,9 +1,11 @@ -#include "../mem/alloc.h" -#include "../mem/threadalloc.h" -#include "../snmalloc.h" +#include "malloc.cc" #ifdef _WIN32 -# define EXCEPTSPEC +# ifdef __clang__ +# define EXCEPTSPEC noexcept +# else +# define EXCEPTSPEC +# endif #else # ifdef _GLIBCXX_USE_NOEXCEPT # define EXCEPTSPEC _GLIBCXX_USE_NOEXCEPT @@ -18,54 +20,54 @@ using namespace snmalloc; void* operator new(size_t size) { - return ThreadAlloc::get_noncachable()->alloc(size); + return ThreadAlloc::get().alloc(size); } void* operator new[](size_t size) { - return ThreadAlloc::get_noncachable()->alloc(size); + return ThreadAlloc::get().alloc(size); } void* operator new(size_t size, std::nothrow_t&) { - return ThreadAlloc::get_noncachable()->alloc(size); + return ThreadAlloc::get().alloc(size); } void* operator new[](size_t size, std::nothrow_t&) { - return ThreadAlloc::get_noncachable()->alloc(size); + return ThreadAlloc::get().alloc(size); } void operator delete(void* p)EXCEPTSPEC { - ThreadAlloc::get_noncachable()->dealloc(p); + ThreadAlloc::get().dealloc(p); } void operator delete(void* p, size_t size)EXCEPTSPEC { if (p == nullptr) return; - ThreadAlloc::get_noncachable()->dealloc(p, size); + ThreadAlloc::get().dealloc(p, size); } void operator delete(void* p, std::nothrow_t&) { - ThreadAlloc::get_noncachable()->dealloc(p); + ThreadAlloc::get().dealloc(p); } void operator delete[](void* p) EXCEPTSPEC { - ThreadAlloc::get_noncachable()->dealloc(p); + ThreadAlloc::get().dealloc(p); } void operator delete[](void* p, size_t size) EXCEPTSPEC { if (p == nullptr) return; - ThreadAlloc::get_noncachable()->dealloc(p, size); + ThreadAlloc::get().dealloc(p, size); } void operator delete[](void* p, std::nothrow_t&) { - ThreadAlloc::get_noncachable()->dealloc(p); + ThreadAlloc::get().dealloc(p); } diff --git a/src/snmalloc/override/override.h b/src/snmalloc/override/override.h new file mode 100644 index 000000000..0ca70bc11 --- /dev/null +++ b/src/snmalloc/override/override.h @@ -0,0 +1,15 @@ +#pragma once + +#include "../global/global.h" + +#ifndef SNMALLOC_EXPORT +# define SNMALLOC_EXPORT +#endif +#ifdef SNMALLOC_STATIC_LIBRARY_PREFIX +# define __SN_CONCAT(a, b) a##b +# define __SN_EVALUATE(a, b) __SN_CONCAT(a, b) +# define SNMALLOC_NAME_MANGLE(a) \ + __SN_EVALUATE(SNMALLOC_STATIC_LIBRARY_PREFIX, a) +#elif !defined(SNMALLOC_NAME_MANGLE) +# define SNMALLOC_NAME_MANGLE(a) a +#endif diff --git a/src/snmalloc/override/rust.cc b/src/snmalloc/override/rust.cc new file mode 100644 index 000000000..64da984ca --- /dev/null +++ b/src/snmalloc/override/rust.cc @@ -0,0 +1,53 @@ +#define SNMALLOC_NAME_MANGLE(a) sn_##a +#include "malloc.cc" + +#include + +#ifndef SNMALLOC_EXPORT +# define SNMALLOC_EXPORT +#endif + +using namespace snmalloc; + +extern "C" SNMALLOC_EXPORT void* + SNMALLOC_NAME_MANGLE(rust_alloc)(size_t alignment, size_t size) +{ + return ThreadAlloc::get().alloc(aligned_size(alignment, size)); +} + +extern "C" SNMALLOC_EXPORT void* + SNMALLOC_NAME_MANGLE(rust_alloc_zeroed)(size_t alignment, size_t size) +{ + return ThreadAlloc::get().alloc(aligned_size(alignment, size)); +} + +extern "C" SNMALLOC_EXPORT void + SNMALLOC_NAME_MANGLE(rust_dealloc)(void* ptr, size_t alignment, size_t size) +{ + ThreadAlloc::get().dealloc(ptr, aligned_size(alignment, size)); +} + +extern "C" SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(rust_realloc)( + void* ptr, size_t alignment, size_t old_size, size_t new_size) +{ + size_t aligned_old_size = aligned_size(alignment, old_size), + aligned_new_size = aligned_size(alignment, new_size); + if ( + size_to_sizeclass_full(aligned_old_size).raw() == + size_to_sizeclass_full(aligned_new_size).raw()) + return ptr; + void* p = ThreadAlloc::get().alloc(aligned_new_size); + if (p) + { + std::memcpy(p, ptr, old_size < new_size ? old_size : new_size); + ThreadAlloc::get().dealloc(ptr, aligned_old_size); + } + return p; +} + +extern "C" SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(rust_statistics)( + size_t* current_memory_usage, size_t* peak_memory_usage) +{ + *current_memory_usage = StandardConfig::Backend::get_current_usage(); + *peak_memory_usage = StandardConfig::Backend::get_peak_usage(); +} \ No newline at end of file diff --git a/src/snmalloc/pal/pal.h b/src/snmalloc/pal/pal.h new file mode 100644 index 000000000..47dde5e49 --- /dev/null +++ b/src/snmalloc/pal/pal.h @@ -0,0 +1,185 @@ +/** + * The platform abstraction layer. This defines an abstraction that exposes + * services from the operating system or any equivalent environment. + * + * It is possible to have multiple PALs in a single snmalloc instance. For + * example, one may provide the base operating system functionality, another + * may hide some of this or provide its own abstractions for sandboxing or + * isolating heaps. + * + * Files in this directory may depend on the architecture abstraction and core + * layers (`aal` and `ds_core`, respectively) but nothing else in snmalloc. + */ +#pragma once + +#include "../aal/aal.h" +#include "pal_concept.h" +#include "pal_consts.h" + +// If simulating OE, then we need the underlying platform +#if defined(OPEN_ENCLAVE) +# include "pal_open_enclave.h" +#endif +#if !defined(OPEN_ENCLAVE) || defined(OPEN_ENCLAVE_SIMULATION) +# include "pal_apple.h" +# include "pal_dragonfly.h" +# include "pal_freebsd.h" +# include "pal_freebsd_kernel.h" +# include "pal_haiku.h" +# include "pal_linux.h" +# include "pal_netbsd.h" +# include "pal_openbsd.h" +# include "pal_solaris.h" +# include "pal_windows.h" +#endif +#include "pal_noalloc.h" +#include "pal_plain.h" + +namespace snmalloc +{ + using DefaultPal = +#if defined(SNMALLOC_MEMORY_PROVIDER) + SNMALLOC_MEMORY_PROVIDER; +#elif defined(OPEN_ENCLAVE) + PALOpenEnclave; +#elif defined(_WIN32) + PALWindows; +#elif defined(__APPLE__) + PALApple<>; +#elif defined(__linux__) + PALLinux; +#elif defined(FreeBSD_KERNEL) + PALFreeBSDKernel; +#elif defined(__FreeBSD__) + PALFreeBSD; +#elif defined(__HAIKU__) + PALHaiku; +#elif defined(__NetBSD__) + PALNetBSD; +#elif defined(__OpenBSD__) + PALOpenBSD; +#elif defined(__sun) + PALSolaris; +#elif defined(__DragonFly__) + PALDragonfly; +#else +# error Unsupported platform +#endif + + [[noreturn]] SNMALLOC_SLOW_PATH inline void error(const char* const str) + { + DefaultPal::error(str); + } + + // Used to keep Superslab metadata committed. + static constexpr size_t OS_PAGE_SIZE = DefaultPal::page_size; + + /** + * Perform platform-specific adjustment of return pointers. + * + * This is here, rather than in every PAL proper, merely to minimize + * disruption to PALs for platforms that do not support StrictProvenance AALs. + */ + template< + typename PAL = DefaultPal, + typename AAL = Aal, + typename T, + SNMALLOC_CONCEPT(capptr::IsBound) B> + static inline typename std::enable_if_t< + !aal_supports, + CapPtr>> + capptr_to_user_address_control(CapPtr p) + { + return CapPtr>::unsafe_from( + p.unsafe_ptr()); + } + + template< + typename PAL = DefaultPal, + typename AAL = Aal, + typename T, + SNMALLOC_CONCEPT(capptr::IsBound) B> + static SNMALLOC_FAST_PATH typename std::enable_if_t< + aal_supports, + CapPtr>> + capptr_to_user_address_control(CapPtr p) + { + return PAL::capptr_to_user_address_control(p); + } + + /** + * A convenience wrapper that avoids the need to litter unsafe accesses with + * every call to PAL::zero. + * + * We do this here rather than plumb CapPtr further just to minimize + * disruption and avoid code bloat. This wrapper ought to compile down to + * nothing if SROA is doing its job. + */ + template< + typename PAL, + bool page_aligned = false, + typename T, + SNMALLOC_CONCEPT(capptr::IsBound) B> + static SNMALLOC_FAST_PATH void pal_zero(CapPtr p, size_t sz) + { + static_assert( + !page_aligned || B::spatial >= capptr::dimension::Spatial::Chunk); + PAL::template zero(p.unsafe_ptr(), sz); + } + + static_assert( + bits::is_pow2(OS_PAGE_SIZE), "OS_PAGE_SIZE must be a power of two"); + static_assert( + OS_PAGE_SIZE % Aal::smallest_page_size == 0, + "The smallest architectural page size must divide OS_PAGE_SIZE"); + + // Some system headers (e.g. Linux' sys/user.h, FreeBSD's machine/param.h) + // define `PAGE_SIZE` as a macro, while others (e.g. macOS 11's + // mach/machine/vm_param.h) define `PAGE_SIZE` as an extern. We don't use + // `PAGE_SIZE` as our variable name, to avoid conflicts, but if we do see a + // macro definition evaluates to a constant then check that our value matches + // the platform's expected value. +#ifdef PAGE_SIZE + static_assert( +# if __has_builtin(__builtin_constant_p) + !__builtin_constant_p(PAGE_SIZE) || (PAGE_SIZE == OS_PAGE_SIZE), +# else + true, +# endif + "Page size from system header does not match snmalloc config page size."); +#endif + + /** + * Report a fatal error via a PAL-specific error reporting mechanism. This + * takes a format string and a set of arguments. The format string indicates + * the remaining arguments with "{}". This could be extended later to + * support indexing fairly easily, if we ever want to localise these error + * messages. + * + * The following are supported as arguments: + * + * - Characters (`char`), printed verbatim. + * - Strings (anything convertible to `std::string_view`), typically string + * literals because nothing on this path should be performing heap + * allocations. Printed verbatim. + * - Raw pointers (void*), printed as hex strings. + * - Integers (convertible to `size_t`), printed as hex strings. + * + * These types should be sufficient for allocator-related error messages. + */ + template + [[noreturn]] inline void report_fatal_error(Args... args) + { + MessageBuilder msg{std::forward(args)...}; + DefaultPal::error(msg.get_message()); + } + + template + inline void message(Args... args) + { + MessageBuilder msg{std::forward(args)...}; + MessageBuilder msg_tid{ + "{}: {}", DefaultPal::get_tid(), msg.get_message()}; + DefaultPal::message(msg_tid.get_message()); + } +} // namespace snmalloc diff --git a/src/pal/pal_apple.h b/src/snmalloc/pal/pal_apple.h similarity index 91% rename from src/pal/pal_apple.h rename to src/snmalloc/pal/pal_apple.h index 685ae9353..69f4e5da0 100644 --- a/src/pal/pal_apple.h +++ b/src/snmalloc/pal/pal_apple.h @@ -28,7 +28,7 @@ namespace snmalloc * The features exported by this PAL. */ static constexpr uint64_t pal_features = - AlignedAllocation | LazyCommit | Entropy; + AlignedAllocation | LazyCommit | Entropy | Time; /* * `page_size` @@ -113,9 +113,8 @@ namespace snmalloc { SNMALLOC_ASSERT(is_aligned_block(p, size)); -# ifdef USE_POSIX_COMMIT_CHECKS - memset(p, 0x5a, size); -# endif + if constexpr (DEBUG) + memset(p, 0x5a, size); // `MADV_FREE_REUSABLE` can only be applied to writable pages, // otherwise it's an error. @@ -126,12 +125,13 @@ namespace snmalloc while (madvise(p, size, MADV_FREE_REUSABLE) == -1 && errno == EAGAIN) ; -# ifdef USE_POSIX_COMMIT_CHECKS - // This must occur after `MADV_FREE_REUSABLE`. - // - // `mach_vm_protect` is observably slower in benchmarks. - mprotect(p, size, PROT_NONE); -# endif + if constexpr (PalEnforceAccess) + { + // This must occur after `MADV_FREE_REUSABLE`. + // + // `mach_vm_protect` is observably slower in benchmarks. + mprotect(p, size, PROT_NONE); + } } /** @@ -160,6 +160,7 @@ namespace snmalloc template static void notify_using(void* p, size_t size) noexcept { + KeepErrno e; SNMALLOC_ASSERT( is_aligned_block(p, size) || (zero_mem == NoZero)); @@ -173,18 +174,19 @@ namespace snmalloc anonymous_memory_fd, 0); - if (likely(r != MAP_FAILED)) + if (SNMALLOC_LIKELY(r != MAP_FAILED)) { return; } } -# ifdef USE_POSIX_COMMIT_CHECKS - // Mark pages as writable for `madvise` below. - // - // `mach_vm_protect` is observably slower in benchmarks. - mprotect(p, size, PROT_READ | PROT_WRITE); -# endif + if constexpr (PalEnforceAccess) + { + // Mark pages as writable for `madvise` below. + // + // `mach_vm_protect` is observably slower in benchmarks. + mprotect(p, size, PROT_READ | PROT_WRITE); + } // `MADV_FREE_REUSE` can only be applied to writable pages, // otherwise it's an error. @@ -204,7 +206,7 @@ namespace snmalloc // Apple's `mmap` doesn't support user-specified alignment and only // guarantees mappings are aligned to the system page size, so we use // `mach_vm_map` instead. - template + template static void* reserve_aligned(size_t size) noexcept { SNMALLOC_ASSERT(bits::is_pow2(size)); @@ -218,11 +220,9 @@ namespace snmalloc // must be initialized to 0 or addr is interepreted as a lower-bound. mach_vm_address_t addr = 0; -# ifdef USE_POSIX_COMMIT_CHECKS - vm_prot_t prot = committed ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_NONE; -# else - vm_prot_t prot = VM_PROT_READ | VM_PROT_WRITE; -# endif + vm_prot_t prot = (state_using || !PalEnforceAccess) ? + VM_PROT_READ | VM_PROT_WRITE : + VM_PROT_NONE; kern_return_t kr = mach_vm_map( mach_task_self(), @@ -237,9 +237,9 @@ namespace snmalloc VM_PROT_READ | VM_PROT_WRITE, VM_INHERIT_COPY); - if (unlikely(kr != KERN_SUCCESS)) + if (SNMALLOC_UNLIKELY(kr != KERN_SUCCESS)) { - error("Failed to allocate memory\n"); + return nullptr; } return reinterpret_cast(addr); diff --git a/src/pal/pal_bsd.h b/src/snmalloc/pal/pal_bsd.h similarity index 81% rename from src/pal/pal_bsd.h rename to src/snmalloc/pal/pal_bsd.h index 1c3577623..4689b43c0 100644 --- a/src/pal/pal_bsd.h +++ b/src/snmalloc/pal/pal_bsd.h @@ -10,8 +10,8 @@ namespace snmalloc * Generic *BSD PAL mixin. This provides features that are common to the BSD * family. */ - template - class PALBSD : public PALPOSIX + template + class PALBSD : public PALPOSIX { public: /** @@ -34,14 +34,16 @@ namespace snmalloc static void notify_not_using(void* p, size_t size) noexcept { SNMALLOC_ASSERT(is_aligned_block(p, size)); - // Call this Pal to simulate the Windows decommit in CI. -#ifdef USE_POSIX_COMMIT_CHECKS - memset(p, 0x5a, size); -#endif + + if constexpr (DEBUG) + memset(p, 0x5a, size); + madvise(p, size, MADV_FREE); -#ifdef USE_POSIX_COMMIT_CHECKS - mprotect(p, size, PROT_NONE); -#endif + + if constexpr (PalEnforceAccess) + { + mprotect(p, size, PROT_NONE); + } } }; } // namespace snmalloc diff --git a/src/pal/pal_bsd_aligned.h b/src/snmalloc/pal/pal_bsd_aligned.h similarity index 57% rename from src/pal/pal_bsd_aligned.h rename to src/snmalloc/pal/pal_bsd_aligned.h index bac3e8284..4c88287f3 100644 --- a/src/pal/pal_bsd_aligned.h +++ b/src/snmalloc/pal/pal_bsd_aligned.h @@ -10,8 +10,8 @@ namespace snmalloc * This adds aligned allocation using `MAP_ALIGNED` to the generic BSD * implementation. This flag is supported by NetBSD and FreeBSD. */ - template - class PALBSD_Aligned : public PALBSD + template + class PALBSD_Aligned : public PALBSD { public: /** @@ -28,7 +28,7 @@ namespace snmalloc /** * Reserve memory at a specific alignment. */ - template + template static void* reserve_aligned(size_t size) noexcept { // Alignment must be a power of 2. @@ -37,30 +37,22 @@ namespace snmalloc int log2align = static_cast(bits::next_pow2_bits(size)); + auto prot = + state_using || !PalEnforceAccess ? PROT_READ | PROT_WRITE : PROT_NONE; + void* p = mmap( nullptr, size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED(log2align), + prot, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED(log2align) | + OS::extra_mmap_flags(state_using), -1, 0); if (p == MAP_FAILED) - PALBSD::error("Out of memory"); + return nullptr; return p; } - - /** - * Explicitly deleted method for returning non-aligned memory. This causes - * incorrect use of `constexpr if` to fail on platforms with aligned - * allocation. Without this, this PAL and its subclasses exported both - * allocation functions and so callers would type-check if they called - * either in `constexpr if` branches and then fail on platforms such as - * Linux or Windows, which expose only unaligned or aligned allocations, - * respectively. - */ - static std::pair - reserve_at_least(size_t size) noexcept = delete; }; } // namespace snmalloc diff --git a/src/snmalloc/pal/pal_concept.h b/src/snmalloc/pal/pal_concept.h new file mode 100644 index 000000000..44dec410a --- /dev/null +++ b/src/snmalloc/pal/pal_concept.h @@ -0,0 +1,168 @@ +#pragma once + +#ifdef __cpp_concepts +# include "pal_consts.h" +# include "pal_ds.h" + +# include + +namespace snmalloc +{ + /* + * These concepts enforce that these are indeed constants that fit in the + * desired types. (This is subtly different from saying that they are the + * required types; C++ may handle constants without much regard for their + * claimed type.) + */ + + /** + * PALs must advertize the bit vector of their supported features. + */ + template + concept IsPAL_static_features = requires() + { + typename std::integral_constant; + }; + + /** + * PALs must advertise the size of the address space and their page size + */ + template + concept IsPAL_static_sizes = requires() + { + typename std::integral_constant; + typename std::integral_constant; + }; + + /** + * PALs expose an error reporting function which takes a const C string. + */ + template + concept IsPAL_error = requires(const char* const str) + { + { + PAL::error(str) + } + ->ConceptSame; + }; + + /** + * PALs expose a basic library of memory operations. + */ + template + concept IsPAL_memops = requires(void* vp, std::size_t sz) + { + { + PAL::notify_not_using(vp, sz) + } + noexcept->ConceptSame; + + { + PAL::template notify_using(vp, sz) + } + noexcept->ConceptSame; + { + PAL::template notify_using(vp, sz) + } + noexcept->ConceptSame; + + { + PAL::template zero(vp, sz) + } + noexcept->ConceptSame; + { + PAL::template zero(vp, sz) + } + noexcept->ConceptSame; + }; + + /** + * The Pal must provide a thread id for debugging. It should not return + * the default value of ThreadIdentity, as that is used as not an tid in some + * places. + */ + template + concept IsPAL_tid = requires() + { + { + PAL::get_tid() + } + noexcept->ConceptSame; + }; + + /** + * Absent any feature flags, the PAL must support a crude primitive allocator + */ + template + concept IsPAL_reserve = requires(PAL p, std::size_t sz) + { + { + PAL::reserve(sz) + } + noexcept->ConceptSame; + }; + + /** + * Some PALs expose a richer allocator which understands aligned allocations + */ + template + concept IsPAL_reserve_aligned = requires(std::size_t sz) + { + { + PAL::template reserve_aligned(sz) + } + noexcept->ConceptSame; + { + PAL::template reserve_aligned(sz) + } + noexcept->ConceptSame; + }; + + /** + * Some PALs can provide memory pressure callbacks. + */ + template + concept IsPAL_mem_low_notify = requires(PalNotificationObject* pno) + { + { + PAL::expensive_low_memory_check() + } + ->ConceptSame; + { + PAL::register_for_low_memory_callback(pno) + } + ->ConceptSame; + }; + + template + concept IsPAL_get_entropy64 = requires() + { + { + PAL::get_entropy64() + } + ->ConceptSame; + }; + + /** + * PALs ascribe to the conjunction of several concepts. These are broken + * out by the shape of the requires() quantifiers required and by any + * requisite claimed pal_features. PALs not claiming particular features + * are, naturally, not bound by the corresponding concept. + */ + // clang-format off + template + concept IsPAL = + IsPAL_static_features && + IsPAL_static_sizes && + IsPAL_error && + IsPAL_memops && + IsPAL_tid && + (!pal_supports || IsPAL_get_entropy64) && + (!pal_supports || IsPAL_mem_low_notify) && + (pal_supports || + ((!pal_supports || IsPAL_reserve_aligned) && + IsPAL_reserve)); + // clang-format on + +} // namespace snmalloc +#endif diff --git a/src/pal/pal_consts.h b/src/snmalloc/pal/pal_consts.h similarity index 58% rename from src/pal/pal_consts.h rename to src/snmalloc/pal/pal_consts.h index 84823fed6..8de4cb09c 100644 --- a/src/pal/pal_consts.h +++ b/src/snmalloc/pal/pal_consts.h @@ -1,11 +1,32 @@ #pragma once -#include "../ds/defines.h" +#include "../ds_core/ds_core.h" #include +#include namespace snmalloc { + /** + * Pal implementations should query this flag to see whether they + * are allowed to optimise memory access, or that they must provide + * exceptions/segfaults if accesses do not obey the + * - using + * - using_readonly + * - not_using + * model. + * + * TODO: There is a known bug in CheriBSD that means round-tripping through + * PROT_NONE sheds capability load and store permissions (while restoring data + * read/write, for added excitement). For the moment, just force this down on + * CHERI. + */ +#if defined(SNMALLOC_CHECK_CLIENT) && !defined(__CHERI_PURE_CAPABILITY__) + static constexpr bool PalEnforceAccess = true; +#else + static constexpr bool PalEnforceAccess = false; +#endif + /** * Flags in a bitfield of optional features that a PAL may support. These * should be set in the PAL's `pal_features` static constexpr field. @@ -20,6 +41,7 @@ namespace snmalloc * whether low memory conditions are still in effect. */ LowMemoryNotification = (1 << 0), + /** * This PAL natively supports allocation with a guaranteed alignment. If * this is not supported, then we will over-allocate and round the @@ -30,22 +52,31 @@ namespace snmalloc * `request()` method that takes only a size. */ AlignedAllocation = (1 << 1), + /** * This PAL natively supports lazy commit of pages. This means have large * allocations and not touching them does not increase memory usage. This is * exposed in the Pal. */ LazyCommit = (1 << 2), + /** * This Pal does not support allocation. All memory used with this Pal * should be pre-allocated. */ NoAllocation = (1 << 3), + /** * This Pal provides a source of Entropy */ Entropy = (1 << 4), + + /** + * This Pal provides a millisecond time source + */ + Time = (1 << 5), }; + /** * Flag indicating whether requested memory should be zeroed. */ @@ -55,6 +86,7 @@ namespace snmalloc * Memory should not be zeroed, contents are undefined. */ NoZero, + /** * Memory must be zeroed. This can be lazily allocated via a copy-on-write * mechanism as long as any load from the memory returns zero. @@ -65,70 +97,7 @@ namespace snmalloc /** * Default Tag ID for the Apple class */ - static const uint8_t PALAnonDefaultID = 241; - - /** - * This struct is used to represent callbacks for notification from the - * platform. It contains a next pointer as client is responsible for - * allocation as we cannot assume an allocator at this point. - */ - struct PalNotificationObject - { - std::atomic pal_next = nullptr; - - void (*pal_notify)(PalNotificationObject* self); - - PalNotificationObject(void (*pal_notify)(PalNotificationObject* self)) - : pal_notify(pal_notify) - {} - }; - - /*** - * Wrapper for managing notifications for PAL events - */ - class PalNotifier - { - /** - * List of callbacks to notify - */ - std::atomic callbacks{nullptr}; - - public: - /** - * Register a callback object to be notified - * - * The object should never be deallocated by the client after calling - * this. - */ - void register_notification(PalNotificationObject* callback) - { - callback->pal_next = nullptr; - - auto prev = &callbacks; - auto curr = prev->load(); - do - { - while (curr != nullptr) - { - prev = &(curr->pal_next); - curr = prev->load(); - } - } while (!prev->compare_exchange_weak(curr, callback)); - } - - /** - * Calls the pal_notify of all the registered objects. - */ - void notify_all() - { - PalNotificationObject* curr = callbacks; - while (curr != nullptr) - { - curr->pal_notify(curr); - curr = curr->pal_next; - } - } - }; + static const int PALAnonDefaultID = 241; /** * Query whether the PAL supports a specific feature. diff --git a/src/pal/pal_dragonfly.h b/src/snmalloc/pal/pal_dragonfly.h similarity index 100% rename from src/pal/pal_dragonfly.h rename to src/snmalloc/pal/pal_dragonfly.h diff --git a/src/snmalloc/pal/pal_ds.h b/src/snmalloc/pal/pal_ds.h new file mode 100644 index 000000000..3da37cf46 --- /dev/null +++ b/src/snmalloc/pal/pal_ds.h @@ -0,0 +1,162 @@ +#pragma once + +#include "../ds_core/ds_core.h" + +#include +#include + +namespace snmalloc +{ + template + class PalList + { + /** + * List of callbacks to notify + */ + std::atomic elements{nullptr}; + + static_assert( + std::is_same>::value, + "Required pal_next type."); + + public: + /** + * Add an element to the list + */ + void add(T* element) + { + auto prev = &elements; + auto curr = prev->load(); + do + { + while (curr != nullptr) + { + prev = &(curr->pal_next); + curr = prev->load(); + } + } while (!prev->compare_exchange_weak(curr, element)); + } + + /** + * Applies function to all the elements of the list + */ + void apply_all(function_ref func) + { + T* curr = elements; + while (curr != nullptr) + { + func(curr); + curr = curr->pal_next; + } + } + }; + + /** + * This struct is used to represent callbacks for notification from the + * platform. It contains a next pointer as client is responsible for + * allocation as we cannot assume an allocator at this point. + */ + struct PalNotificationObject + { + std::atomic pal_next = nullptr; + + void (*pal_notify)(PalNotificationObject* self); + + PalNotificationObject(void (*pal_notify)(PalNotificationObject* self)) + : pal_notify(pal_notify) + {} + }; + + /*** + * Wrapper for managing notifications for PAL events + */ + class PalNotifier + { + /** + * List of callbacks to notify + */ + PalList callbacks; + + public: + /** + * Register a callback object to be notified + * + * The object should never be deallocated by the client after calling + * this. + */ + void register_notification(PalNotificationObject* callback) + { + callbacks.add(callback); + } + + /** + * Calls the pal_notify of all the registered objects. + */ + void notify_all() + { + callbacks.apply_all([](auto curr) { curr->pal_notify(curr); }); + } + }; + + class PalTimerObject + { + friend class PalTimer; + template + friend class PalList; + + std::atomic pal_next; + + void (*pal_notify)(PalTimerObject* self); + + uint64_t last_run = 0; + uint64_t repeat; + + public: + PalTimerObject(void (*pal_notify)(PalTimerObject* self), uint64_t repeat) + : pal_notify(pal_notify), repeat(repeat) + {} + }; + + /** + * Simple mechanism for handling timers. + * + * Note: This is really designed for a very small number of timers, + * and this design should be changed if that is no longer the case. + */ + class PalTimer + { + /** + * List of callbacks to notify + */ + PalList timers; + + public: + /** + * Register a callback to be called every repeat milliseconds. + */ + void register_timer(PalTimerObject* timer) + { + timers.add(timer); + } + + void check(uint64_t time_ms) + { + static std::atomic_bool lock{false}; + + // Deduplicate calls into here, and make single threaded. + if (lock.exchange(true, std::memory_order_acquire)) + return; + + timers.apply_all([time_ms](PalTimerObject* curr) { + if ( + (curr->last_run == 0) || ((time_ms - curr->last_run) > curr->repeat)) + { + curr->last_run = time_ms; + curr->pal_notify(curr); + } + }); + + lock.store(false, std::memory_order_release); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/pal/pal_freebsd.h b/src/snmalloc/pal/pal_freebsd.h new file mode 100644 index 000000000..5d208c3b4 --- /dev/null +++ b/src/snmalloc/pal/pal_freebsd.h @@ -0,0 +1,145 @@ +#pragma once + +#if defined(__FreeBSD__) && !defined(_KERNEL) +# include "pal_bsd_aligned.h" + +// On CHERI platforms, we need to know the value of CHERI_PERM_SW_VMEM. +// This pollutes the global namespace a little, sadly, but I think only with +// symbols that begin with CHERI_, which is as close to namespaces as C offers. +# if defined(__CHERI_PURE_CAPABILITY__) +# include +# if !defined(CHERI_PERM_SW_VMEM) +# define CHERI_PERM_SW_VMEM CHERI_PERM_CHERIABI_VMMAP +# endif +# endif + +/** + * Direct system-call wrappers so that we can skip libthr interception, which + * won't work if malloc is broken. + * @{ + */ +extern "C" ssize_t __sys_writev(int fd, const struct iovec* iov, int iovcnt); +extern "C" int __sys_fsync(int fd); +/// @} + +namespace snmalloc +{ + /** + * FreeBSD-specific platform abstraction layer. + * + * This adds FreeBSD-specific aligned allocation to the generic BSD + * implementation. + */ + class PALFreeBSD + : public PALBSD_Aligned + { + public: + /** + * Bitmap of PalFeatures flags indicating the optional features that this + * PAL supports. + * + * The FreeBSD PAL does not currently add any features beyond those of a + * generic BSD with support for arbitrary alignment from `mmap`. This + * field is declared explicitly to remind anyone modifying this class to + * add new features that they should add any required feature flags. + */ + static constexpr uint64_t pal_features = PALBSD_Aligned::pal_features; + + /** + * FreeBSD uses atypically small address spaces on its 64 bit RISC machines. + * Problematically, these are so small that if we used the default + * address_bits (48), we'd try to allocate the whole AS (or larger!) for the + * Pagemap itself! + */ + static constexpr size_t address_bits = (Aal::bits == 32) ? + Aal::address_bits : + (Aal::aal_name == RISCV ? 38 : Aal::address_bits); + // TODO, if we ever backport to MIPS, this should yield 39 there. + + /** + * Extra mmap flags. Exclude mappings from core files if they are + * read-only or pure reservations. + */ + static int extra_mmap_flags(bool state_using) + { + return state_using ? 0 : MAP_NOCORE; + } + + /** + * Notify platform that we will not be using these pages. + * + * We use the `MADV_FREE` and `NADV_NOCORE` flags to `madvise`. The first + * allows the system to discard the page and replace it with a CoW mapping + * of the zero page. The second prevents this mapping from appearing in + * core files. + */ + static void notify_not_using(void* p, size_t size) noexcept + { + SNMALLOC_ASSERT(is_aligned_block(p, size)); + + if constexpr (DEBUG) + memset(p, 0x5a, size); + + madvise(p, size, MADV_NOCORE); + madvise(p, size, MADV_FREE); + + if constexpr (PalEnforceAccess) + { + mprotect(p, size, PROT_NONE); + } + } + + /** + * Notify platform that we will be using these pages for reading. + * + * This is used only for pages full of zeroes and so we exclude them from + * core dumps. + */ + static void notify_using_readonly(void* p, size_t size) noexcept + { + PALBSD_Aligned::notify_using_readonly(p, size); + madvise(p, size, MADV_NOCORE); + } + + /** + * Notify platform that we will be using these pages. + * + * We may have previously marked this memory as not being included in core + * files, so mark it for inclusion again. + */ + template + static void notify_using(void* p, size_t size) noexcept + { + PALBSD_Aligned::notify_using(p, size); + madvise(p, size, MADV_CORE); + } + +# if defined(__CHERI_PURE_CAPABILITY__) + static_assert( + aal_supports, + "CHERI purecap support requires StrictProvenance AAL"); + + /** + * On CheriBSD, exporting a pointer means stripping it of the authority to + * manage the address space it references by clearing the SW_VMEM + * permission bit. + */ + template + static SNMALLOC_FAST_PATH CapPtr> + capptr_to_user_address_control(CapPtr p) + { + if constexpr (Aal::aal_cheri_features & Aal::AndPermsTrapsUntagged) + { + if (p == nullptr) + { + return nullptr; + } + } + return CapPtr>::unsafe_from( + __builtin_cheri_perms_and( + p.unsafe_ptr(), ~static_cast(CHERI_PERM_SW_VMEM))); + } +# endif + }; +} // namespace snmalloc +#endif diff --git a/src/pal/pal_freebsd_kernel.h b/src/snmalloc/pal/pal_freebsd_kernel.h similarity index 89% rename from src/pal/pal_freebsd_kernel.h rename to src/snmalloc/pal/pal_freebsd_kernel.h index 084b38b39..11b951ce5 100644 --- a/src/pal/pal_freebsd_kernel.h +++ b/src/snmalloc/pal/pal_freebsd_kernel.h @@ -1,6 +1,6 @@ #pragma once -#include "../ds/bits.h" +#include "../ds_core/ds_core.h" #if defined(FreeBSD_KERNEL) extern "C" @@ -28,6 +28,15 @@ namespace snmalloc * PAL supports. */ static constexpr uint64_t pal_features = AlignedAllocation; + + /** + * Report a message to console, followed by a newline. + */ + static void message(const char* const str) noexcept + { + printf("%s\n", str); + } + [[noreturn]] void error(const char* const str) { panic("snmalloc error: %s", str); @@ -59,7 +68,7 @@ namespace snmalloc ::bzero(p, size); } - template + template static void* reserve_aligned(size_t size) noexcept { SNMALLOC_ASSERT(bits::is_pow2(size)); @@ -80,7 +89,7 @@ namespace snmalloc { return nullptr; } - if (committed) + if (state_using) { if ( kmem_back(kernel_object, addr, size, M_ZERO | M_WAITOK) != diff --git a/src/pal/pal_haiku.h b/src/snmalloc/pal/pal_haiku.h similarity index 81% rename from src/pal/pal_haiku.h rename to src/snmalloc/pal/pal_haiku.h index 9cc2b8d48..bafe23c48 100644 --- a/src/pal/pal_haiku.h +++ b/src/snmalloc/pal/pal_haiku.h @@ -20,7 +20,7 @@ namespace snmalloc * PAL supports. * */ - static constexpr uint64_t pal_features = PALPOSIX::pal_features; + static constexpr uint64_t pal_features = PALPOSIX::pal_features | Entropy; /** * Haiku requires an explicit no-reserve flag in `mmap` to guarantee lazy @@ -37,6 +37,15 @@ namespace snmalloc SNMALLOC_ASSERT(is_aligned_block(p, size)); posix_madvise(p, size, POSIX_MADV_DONTNEED); } + + /** + * Hopefully a temporary workaround until the kernel random feature + * is exposed properly in the userspace ? + */ + static uint64_t get_entropy64() + { + return PALPOSIX::dev_urandom(); + } }; } // namespace snmalloc #endif diff --git a/src/snmalloc/pal/pal_linux.h b/src/snmalloc/pal/pal_linux.h new file mode 100644 index 000000000..5e1289a13 --- /dev/null +++ b/src/snmalloc/pal/pal_linux.h @@ -0,0 +1,244 @@ +#pragma once + +#if defined(__linux__) +# include "../ds_core/ds_core.h" +# include "pal_posix.h" + +# include +# include +# include +# include +// __has_include does not reliably determine if we actually have linux/random.h +// available +# if defined(SNMALLOC_HAS_LINUX_RANDOM_H) +# include +# endif + +extern "C" int puts(const char* str); + +namespace snmalloc +{ + class PALLinux : public PALPOSIX + { + public: + /** + * Bitmap of PalFeatures flags indicating the optional features that this + * PAL supports. + * + * We always make sure that linux has entropy support. + */ + static constexpr uint64_t pal_features = PALPOSIX::pal_features | Entropy; + + static constexpr size_t page_size = + Aal::aal_name == PowerPC ? 0x10000 : PALPOSIX::page_size; + + /** + * Linux requires an explicit no-reserve flag in `mmap` to guarantee lazy + * commit if /proc/sys/vm/overcommit_memory is set to `heuristic` (0). + * + * https://www.kernel.org/doc/html/latest/vm/overcommit-accounting.html + */ + static constexpr int default_mmap_flags = MAP_NORESERVE; + + /** + * MADV_FREE is only available since Linux 4.5. + * + * Fallback to MADV_DONTNEED on older kernels + */ + static constexpr int madvise_free_flags = +# if defined(MADV_FREE) + MADV_FREE +# else + MADV_DONTNEED +# endif + ; + + static void* reserve(size_t size) noexcept + { + void* p = PALPOSIX::reserve(size); + if (p) + { + madvise(p, size, MADV_DONTDUMP); +# ifdef SNMALLOC_PAGEID +# ifndef PR_SET_VMA +# define PR_SET_VMA 0x53564d41 +# define PR_SET_VMA_ANON_NAME 0 +# endif + + /** + * + * If the kernel is set with CONFIG_ANON_VMA_NAME + * the reserved pages would appear as follow + * + * 7fa5f0ceb000-7fa5f0e00000 rw-p 00000000 00:00 0 [anon:snmalloc] + * 7fa5f0e00000-7fa5f1800000 rw-p 00000000 00:00 0 [anon:snmalloc] + * + */ + + prctl( + PR_SET_VMA, + PR_SET_VMA_ANON_NAME, + (unsigned long)p, + size, + (unsigned long)"snmalloc"); +# endif + } + return p; + } + + /** + * OS specific function for zeroing memory. + * + * Linux implements an unusual interpretation of `MADV_DONTNEED`, which + * immediately resets the pages to the zero state (rather than marking them + * as sensible ones to swap out in high memory pressure). We use this to + * clear the underlying memory range. + */ + template + static void zero(void* p, size_t size) noexcept + { + // QEMU does not seem to be giving the desired behaviour for + // MADV_DONTNEED. switch back to memset only for QEMU. +# ifndef SNMALLOC_QEMU_WORKAROUND + if ( + (page_aligned || is_aligned_block(p, size)) && + (size > 16 * page_size)) + { + // Only use this on large allocations as memset faster, and doesn't + // introduce IPI so faster for small allocations. + SNMALLOC_ASSERT(is_aligned_block(p, size)); + madvise(p, size, MADV_DONTNEED); + } + else +# endif + { + ::memset(p, 0, size); + } + } + + static void notify_not_using(void* p, size_t size) noexcept + { + SNMALLOC_ASSERT(is_aligned_block(p, size)); + + // Fill memory so that when we switch the pages back on we don't make + // assumptions on the content. + if constexpr (DEBUG) + memset(p, 0x5a, size); + + madvise(p, size, MADV_DONTDUMP); + madvise(p, size, madvise_free_flags); + + if constexpr (PalEnforceAccess) + { + mprotect(p, size, PROT_NONE); + } + } + + /** + * Notify platform that we will be using these pages for reading. + * + * This is used only for pages full of zeroes and so we exclude them from + * core dumps. + */ + static void notify_using_readonly(void* p, size_t size) noexcept + { + PALPOSIX::notify_using_readonly(p, size); + madvise(p, size, MADV_DONTDUMP); + } + + /** + * Notify platform that we will be using these pages. + */ + template + static void notify_using(void* p, size_t size) noexcept + { + PALPOSIX::notify_using(p, size); + madvise(p, size, MADV_DODUMP); + } + + static uint64_t get_entropy64() + { + // TODO: If the system call fails then the POSIX PAL calls libc + // functions that can require malloc, which may result in deadlock. + + // SYS_getrandom API stablized since 3.17. + // This fallback implementation is to aid some environments + // where SYS_getrandom is provided in kernel but the libc + // is not providing getentropy interface. + + union + { + uint64_t result; + char buffer[sizeof(uint64_t)]; + }; + ssize_t ret; + + // give a try to SYS_getrandom +# ifdef SYS_getrandom + static std::atomic_bool syscall_not_working = false; + // Relaxed ordering should be fine here. This function will be called + // during early initialisation, which will examine the availability in a + // protected routine. + if (false == syscall_not_working.load(std::memory_order_relaxed)) + { + auto current = std::begin(buffer); + auto target = std::end(buffer); + while (auto length = target - current) + { + // Reading data via syscall from system entropy pool. + // According to both MUSL and GLIBC implementation, getentropy uses + // /dev/urandom (blocking API). + // + // The third argument here indicates: + // 1. `GRND_RANDOM` bit is not set, so the source of entropy will be + // `urandom`. + // 2. `GRND_NONBLOCK` bit is set. Since we are reading from + // `urandom`, this means if the entropy pool is + // not initialised, we will get a EAGAIN. + ret = syscall(SYS_getrandom, current, length, GRND_NONBLOCK); + // check whether are interrupt by a signal + if (SNMALLOC_UNLIKELY(ret < 0)) + { + if (SNMALLOC_UNLIKELY(errno == EAGAIN)) + { + // the system is going through early initialisation: at this stage + // it is very likely that snmalloc is being used in some system + // programs and we do not want to block it. + return reinterpret_cast(&result) ^ + reinterpret_cast(&error); + } + if (errno != EINTR) + { + break; + } + } + else + { + current += ret; + } + } + if (SNMALLOC_UNLIKELY(target != current)) + { + // in this routine, the only possible situations should be ENOSYS + // or EPERM (forbidden by seccomp, for example). + SNMALLOC_ASSERT(errno == ENOSYS || errno == EPERM); + syscall_not_working.store(true, std::memory_order_relaxed); + } + else + { + return result; + } + } +# endif + + // Syscall is not working. + // In this case, it is not a good idea to fallback to std::random_device: + // 1. it may want to use malloc to create a buffer, which causes + // reentrancy problem during initialisation routine. + // 2. some implementations also require libstdc++ to be linked since + // its APIs are not exception-free. + return dev_urandom(); + } + }; +} // namespace snmalloc +#endif diff --git a/src/pal/pal_netbsd.h b/src/snmalloc/pal/pal_netbsd.h similarity index 53% rename from src/pal/pal_netbsd.h rename to src/snmalloc/pal/pal_netbsd.h index e91c4b278..75394704f 100644 --- a/src/pal/pal_netbsd.h +++ b/src/snmalloc/pal/pal_netbsd.h @@ -3,6 +3,18 @@ #ifdef __NetBSD__ # include "pal_bsd_aligned.h" +# include + +/** + * We skip the pthread cancellation checkpoints by reaching directly + * the following syscalls so we avoid the possible pthread + * allocation initialization timing issues. + * @{ + */ +extern "C" ssize_t _sys_writev(int fd, const struct iovec* iov, int iovcnt); +extern "C" int _sys_fsync(int fd); +/// @} + namespace snmalloc { /** @@ -11,7 +23,7 @@ namespace snmalloc * This adds NetBSD-specific aligned allocation to the generic BSD * implementation. */ - class PALNetBSD : public PALBSD_Aligned + class PALNetBSD : public PALBSD_Aligned { public: /** @@ -26,7 +38,17 @@ namespace snmalloc * As NetBSD does not have the getentropy call, get_entropy64 will * currently fallback to C++ libraries std::random_device. */ - static constexpr uint64_t pal_features = PALBSD_Aligned::pal_features; + static constexpr uint64_t pal_features = + PALBSD_Aligned::pal_features | Entropy; + + /** + * Temporary solution while waiting getrandom support for the next release + * random_device seems unimplemented in clang for this platform + */ + static uint64_t get_entropy64() + { + return PALPOSIX::dev_urandom(); + } }; } // namespace snmalloc #endif diff --git a/src/pal/pal_noalloc.h b/src/snmalloc/pal/pal_noalloc.h similarity index 68% rename from src/pal/pal_noalloc.h rename to src/snmalloc/pal/pal_noalloc.h index c35530cbe..94bc61e02 100644 --- a/src/pal/pal_noalloc.h +++ b/src/snmalloc/pal/pal_noalloc.h @@ -3,22 +3,32 @@ #pragma once +#include "../aal/aal.h" +#include "pal_concept.h" +#include "pal_consts.h" +#include "pal_timer_default.h" + #include namespace snmalloc { +#ifdef __cpp_concepts + /** + * The minimal subset of a PAL that we need for delegation + */ + template + concept PALNoAllocBase = IsPAL_static_sizes&& IsPAL_error; +#endif + /** * Platform abstraction layer that does not allow allocation. * * This is a minimal PAL for pre-reserved memory regions, where the * address-space manager is initialised with all of the memory that it will * ever use. - * - * It takes an error handler delegate as a template argument. This is - * expected to forward to the default PAL in most cases. */ - template - struct PALNoAlloc + template + struct PALNoAlloc : public BasePAL { /** * Bitmap of PalFeatures flags indicating the optional features that this @@ -26,14 +36,24 @@ namespace snmalloc */ static constexpr uint64_t pal_features = NoAllocation; - static constexpr size_t page_size = Aal::smallest_page_size; + static constexpr size_t page_size = BasePAL::page_size; + + static constexpr size_t address_bits = BasePAL::address_bits; /** * Print a stack trace. */ static void print_stack_trace() { - ErrorHandler::print_stack_trace(); + BasePAL::print_stack_trace(); + } + + /** + * Report a message to the user. + */ + static void message(const char* const str) noexcept + { + BasePAL::message(str); } /** @@ -41,7 +61,7 @@ namespace snmalloc */ [[noreturn]] static void error(const char* const str) noexcept { - ErrorHandler::error(str); + BasePAL::error(str); } /** @@ -65,8 +85,7 @@ namespace snmalloc } else { - UNUSED(p); - UNUSED(size); + UNUSED(p, size); } } diff --git a/src/snmalloc/pal/pal_open_enclave.h b/src/snmalloc/pal/pal_open_enclave.h new file mode 100644 index 000000000..be0f141be --- /dev/null +++ b/src/snmalloc/pal/pal_open_enclave.h @@ -0,0 +1,57 @@ +#pragma once + +#include "pal_noalloc.h" +#include "pal_tid_default.h" + +#ifdef OPEN_ENCLAVE +extern "C" void* oe_memset_s(void* p, size_t p_size, int c, size_t size); +extern "C" int oe_random(void* data, size_t size); +extern "C" [[noreturn]] void oe_abort(); + +namespace snmalloc +{ + class OpenEnclaveErrorHandler + { + public: + static void print_stack_trace() {} + + [[noreturn]] static void error(const char* const str) + { + UNUSED(str); + oe_abort(); + } + static constexpr size_t address_bits = Aal::address_bits; + static constexpr size_t page_size = Aal::smallest_page_size; + }; + + using OpenEnclaveBasePAL = PALNoAlloc; + + class PALOpenEnclave : public OpenEnclaveBasePAL, public PalTidDefault + { + public: + /** + * Bitmap of PalFeatures flags indicating the optional features that this + * PAL supports. + */ + static constexpr uint64_t pal_features = + OpenEnclaveBasePAL::pal_features | Entropy; + + template + static void zero(void* p, size_t size) noexcept + { + oe_memset_s(p, size, 0, size); + } + + /** + * Source of Entropy + */ + static uint64_t get_entropy64() + { + uint64_t result = 0; + if (oe_random(&result, sizeof(result)) != OE_OK) + error("Failed to get system randomness"); + return result; + } + }; +} +#endif diff --git a/src/pal/pal_openbsd.h b/src/snmalloc/pal/pal_openbsd.h similarity index 100% rename from src/pal/pal_openbsd.h rename to src/snmalloc/pal/pal_openbsd.h diff --git a/src/pal/pal_plain.h b/src/snmalloc/pal/pal_plain.h similarity index 78% rename from src/pal/pal_plain.h rename to src/snmalloc/pal/pal_plain.h index 9d8c8d9ec..005ef98fa 100644 --- a/src/pal/pal_plain.h +++ b/src/snmalloc/pal/pal_plain.h @@ -1,13 +1,14 @@ #pragma once -#include "../ds/bits.h" +#include "../ds_core/ds_core.h" +#include "pal_timer_default.h" namespace snmalloc { // Can be extended // Will require a reserve method in subclasses. template - class PALPlainMixin : public State + class PALPlainMixin : public State, public PalTimerDefaultImpl { public: // Notify platform that we will not be using these pages @@ -23,8 +24,7 @@ namespace snmalloc } else { - UNUSED(p); - UNUSED(size); + UNUSED(p, size); } } }; diff --git a/src/pal/pal_posix.h b/src/snmalloc/pal/pal_posix.h similarity index 54% rename from src/pal/pal_posix.h rename to src/snmalloc/pal/pal_posix.h index 9622ec8c5..8ad79958d 100644 --- a/src/pal/pal_posix.h +++ b/src/snmalloc/pal/pal_posix.h @@ -1,23 +1,24 @@ #pragma once -#include "../ds/address.h" -#if defined(BACKTRACE_HEADER) -# include BACKTRACE_HEADER +#include "../aal/aal.h" +#include "pal_tid_default.h" +#include "pal_timer_default.h" +#if defined(SNMALLOC_BACKTRACE_HEADER) +# include SNMALLOC_BACKTRACE_HEADER #endif #include +#include #include #include #include #include #include +#include #include #include #if __has_include() # include #endif -#if __has_include() -# include -#endif extern "C" int puts(const char* str); @@ -31,12 +32,15 @@ namespace snmalloc * efficient implementation. Subclasses should provide more efficient * implementations using platform-specific functionality. * - * The template parameter for this is the subclass and is used for explicit - * up casts to allow this class to call non-virtual methods on the templated - * version. + * The first template parameter for this is the subclass and is used for + * explicit up casts to allow this class to call non-virtual methods on the + * templated version. The next two allow subclasses to provide `writev` and + * `fsync` implementations that bypass any libc machinery that might not be + * working when an early-malloc error appears. */ - template - class PALPOSIX + template + class PALPOSIX : public PalTimerDefaultImpl>, + public PalTidDefault { /** * Helper class to access the `default_mmap_flags` field of `OS` if one @@ -96,12 +100,13 @@ namespace snmalloc static const int fd = T::anonymous_memory_fd; }; + protected: /** * A RAII class to capture and restore errno */ class KeepErrno { - decltype(errno) cached_errno; + int cached_errno; public: KeepErrno() : cached_errno(errno) {} @@ -120,7 +125,7 @@ namespace snmalloc * POSIX systems are assumed to support lazy commit. The build system checks * getentropy is available, only then this PAL supports Entropy. */ - static constexpr uint64_t pal_features = LazyCommit + static constexpr uint64_t pal_features = LazyCommit | Time #if defined(SNMALLOC_PLATFORM_HAS_GETENTROPY) | Entropy #endif @@ -128,25 +133,61 @@ namespace snmalloc static constexpr size_t page_size = Aal::smallest_page_size; + /** + * Address bits are potentially mediated by some POSIX OSes, but generally + * default to the architecture's. + * + * Unlike the AALs, which are composited by explicitly delegating to their + * template parameters and so play a SFINAE-based game to achieve similar + * ends, for the PALPOSIX<> classes we instead use more traditional + * inheritance (e.g., PALLinux is subtype of PALPOSIX) and so we + * can just use that mechanism here, too. + */ + static constexpr size_t address_bits = Aal::address_bits; + static void print_stack_trace() { -#ifdef BACKTRACE_HEADER + // TODO: the backtrace mechanism does not yet work on CHERI, and causes + // tests which expect to be able to hook abort() to fail. Skip it until + // https://github.com/CTSRD-CHERI/cheribsd/issues/962 is fixed. +#if defined(SNMALLOC_BACKTRACE_HEADER) && !defined(__CHERI_PURE_CAPABILITY__) constexpr int SIZE = 1024; void* buffer[SIZE]; auto nptrs = backtrace(buffer, SIZE); - fflush(stdout); - backtrace_symbols_fd(buffer, nptrs, STDOUT_FILENO); - puts(""); - fflush(stdout); + backtrace_symbols_fd(buffer, nptrs, STDERR_FILENO); + UNUSED(write(STDERR_FILENO, "\n", 1)); + UNUSED(fsync(STDERR_FILENO)); #endif } + /** + * Report a message to standard error, followed by a newline. + */ + static void message(const char* const str) noexcept + { + // We don't want logging to affect the errno behaviour of the program. + auto hold = KeepErrno(); + + void* nl = const_cast("\n"); + struct iovec iov[] = {{const_cast(str), strlen(str)}, {nl, 1}}; + UNUSED(writev(STDERR_FILENO, iov, sizeof(iov) / sizeof(struct iovec))); + UNUSED(fsync(STDERR_FILENO)); + } + /** * Report a fatal error an exit. */ [[noreturn]] static void error(const char* const str) noexcept { - puts(str); + /// by this part, the allocator is failed; so we cannot assume + /// subsequent allocation will work. + /// @attention: since the program is failing, we do not guarantee that + /// previous bytes in stdout will be flushed + void* nl = const_cast("\n"); + struct iovec iov[] = { + {nl, 1}, {const_cast(str), strlen(str)}, {nl, 1}}; + UNUSED(writev(STDERR_FILENO, iov, sizeof(iov) / sizeof(struct iovec))); + UNUSED(fsync(STDERR_FILENO)); print_stack_trace(); abort(); } @@ -162,15 +203,20 @@ namespace snmalloc static void notify_not_using(void* p, size_t size) noexcept { SNMALLOC_ASSERT(is_aligned_block(p, size)); -#ifdef USE_POSIX_COMMIT_CHECKS - // Fill memory so that when we switch the pages back on we don't make - // assumptions on the content. - memset(p, 0x5a, size); - mprotect(p, size, PROT_NONE); -#else - UNUSED(p); - UNUSED(size); -#endif + + if constexpr (PalEnforceAccess) + { + // Fill memory so that when we switch the pages back on we don't make + // assumptions on the content. + if constexpr (DEBUG) + memset(p, 0x5a, size); + + mprotect(p, size, PROT_NONE); + } + else + { + UNUSED(p, size); + } } /** @@ -178,7 +224,7 @@ namespace snmalloc * * On POSIX platforms, lazy commit means that this is a no-op, unless we * are also zeroing the pages in which case we call the platform's `zero` - * function. + * function, or we have initially mapped the pages as PROT_NONE. */ template static void notify_using(void* p, size_t size) noexcept @@ -186,17 +232,35 @@ namespace snmalloc SNMALLOC_ASSERT( is_aligned_block(p, size) || (zero_mem == NoZero)); -#ifdef USE_POSIX_COMMIT_CHECKS - mprotect(p, size, PROT_READ | PROT_WRITE); -#else - UNUSED(p); - UNUSED(size); -#endif + if constexpr (PalEnforceAccess) + mprotect(p, size, PROT_READ | PROT_WRITE); + else + { + UNUSED(p, size); + } if constexpr (zero_mem == YesZero) zero(p, size); } + /** + * Notify platform that we will be using these pages for reading. + * + * On POSIX platforms, lazy commit means that this is a no-op, unless + * we have initially mapped the pages as PROT_NONE. + */ + static void notify_using_readonly(void* p, size_t size) noexcept + { + SNMALLOC_ASSERT(is_aligned_block(p, size)); + + if constexpr (PalEnforceAccess) + mprotect(p, size, PROT_READ); + else + { + UNUSED(p, size); + } + } + /** * OS specific function for zeroing memory. * @@ -239,6 +303,15 @@ namespace snmalloc bzero(p, size); } + /** + * Extension point to allow subclasses to provide extra mmap flags. The + * argument indicates whether the memory should be in use or not. + */ + static int extra_mmap_flags(bool) + { + return 0; + } + /** * Reserve memory. * @@ -248,32 +321,31 @@ namespace snmalloc * POSIX does not define a portable interface for specifying alignment * greater than a page. */ - static std::pair reserve_at_least(size_t size) noexcept + static void* reserve(size_t size) noexcept { - SNMALLOC_ASSERT(bits::is_pow2(size)); + // If enforcing access, map pages initially as None, and then + // add permissions as required. Otherwise, immediately give all + // access as this is the most efficient to implement. + auto prot = PalEnforceAccess ? PROT_NONE : PROT_READ | PROT_WRITE; - // Magic number for over-allocating chosen by the Pal - // These should be further refined based on experiments. - constexpr size_t min_size = - bits::is64() ? bits::one_at_bit(32) : bits::one_at_bit(28); + void* p = mmap( + nullptr, + size, + prot, + MAP_PRIVATE | MAP_ANONYMOUS | DefaultMMAPFlags::flags | + OS::extra_mmap_flags(false), + AnonFD::fd, + 0); - for (size_t size_request = bits::max(size, min_size); - size_request >= size; - size_request = size_request / 2) + if (p != MAP_FAILED) { - void* p = mmap( - nullptr, - size_request, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | DefaultMMAPFlags::flags, - AnonFD::fd, - 0); - - if (p != MAP_FAILED) - return {p, size_request}; +#ifdef SNMALLOC_TRACING + snmalloc::message<1024>("Pal_posix reserved: {} ({})", p, size); +#endif + return p; } - OS::error("Out of memory"); + return nullptr; } /** @@ -304,5 +376,62 @@ namespace snmalloc } error("Entropy requested on platform that does not provide entropy"); } + + static uint64_t internal_time_in_ms() + { + auto hold = KeepErrno(); + + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1) + { + error("Failed to get time"); + } + + return (static_cast(ts.tv_sec) * 1000) + + (static_cast(ts.tv_nsec) / 1000000); + } + + static uint64_t dev_urandom() + { + union + { + uint64_t result; + char buffer[sizeof(uint64_t)]; + }; + ssize_t ret; + int flags = O_RDONLY; +#if defined(O_CLOEXEC) + flags |= O_CLOEXEC; +#endif + auto fd = open("/dev/urandom", flags, 0); + if (fd > 0) + { + auto current = std::begin(buffer); + auto target = std::end(buffer); + while (auto length = static_cast(target - current)) + { + ret = read(fd, current, length); + if (ret <= 0) + { + if (errno != EAGAIN && errno != EINTR) + { + break; + } + } + else + { + current += ret; + } + } + ret = close(fd); + SNMALLOC_ASSERT(0 == ret); + if (SNMALLOC_LIKELY(target == current)) + { + return result; + } + } + + error("Failed to get system randomness"); + } }; } // namespace snmalloc diff --git a/src/pal/pal_solaris.h b/src/snmalloc/pal/pal_solaris.h similarity index 100% rename from src/pal/pal_solaris.h rename to src/snmalloc/pal/pal_solaris.h diff --git a/src/snmalloc/pal/pal_tid_default.h b/src/snmalloc/pal/pal_tid_default.h new file mode 100644 index 000000000..678af98b0 --- /dev/null +++ b/src/snmalloc/pal/pal_tid_default.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +namespace snmalloc +{ + class PalTidDefault + { + public: + using ThreadIdentity = size_t; + + /** + * @brief Get the an id for the current thread. + * + * @return the thread id, this should never be the default of + * ThreadIdentity. Callers can assume it is a non-default value. + */ + static inline ThreadIdentity get_tid() noexcept + { + static thread_local size_t tid{0}; + static std::atomic tid_source{0}; + + if (tid == 0) + { + tid = ++tid_source; + } + return tid; + } + }; +} // namespace snmalloc \ No newline at end of file diff --git a/src/snmalloc/pal/pal_timer_default.h b/src/snmalloc/pal/pal_timer_default.h new file mode 100644 index 000000000..c7761effe --- /dev/null +++ b/src/snmalloc/pal/pal_timer_default.h @@ -0,0 +1,32 @@ + +#pragma once + +#include "pal_consts.h" +#include "pal_ds.h" + +#include + +namespace snmalloc +{ + template + class PalTimerDefaultImpl + { + inline static PalTimer timers{}; + + public: + static uint64_t time_in_ms() + { + auto time = PalTime::internal_time_in_ms(); + + // Process timers + timers.check(time); + + return time; + } + + static void register_timer(PalTimerObject* timer) + { + timers.register_timer(timer); + } + }; +} // namespace snmalloc diff --git a/src/pal/pal_windows.h b/src/snmalloc/pal/pal_windows.h similarity index 74% rename from src/pal/pal_windows.h rename to src/snmalloc/pal/pal_windows.h index 847a04ba6..2ab0bfc1f 100644 --- a/src/pal/pal_windows.h +++ b/src/snmalloc/pal/pal_windows.h @@ -1,7 +1,8 @@ #pragma once -#include "../ds/address.h" -#include "../ds/bits.h" +#include "../aal/aal.h" +#include "pal_tid_default.h" +#include "pal_timer_default.h" #ifdef _WIN32 # ifndef _MSC_VER @@ -22,9 +23,12 @@ # endif # endif +# include + namespace snmalloc { - class PALWindows + class PALWindows : public PalTimerDefaultImpl, + public PalTidDefault { /** * A flag indicating that we have tried to register for low-memory @@ -52,7 +56,8 @@ namespace snmalloc * Bitmap of PalFeatures flags indicating the optional features that this * PAL supports. This PAL supports low-memory notifications. */ - static constexpr uint64_t pal_features = LowMemoryNotification | Entropy + static constexpr uint64_t pal_features = LowMemoryNotification | Entropy | + Time # if defined(PLATFORM_HAS_VIRTUALALLOC2) && !defined(USE_SYSTEMATIC_TESTING) | AlignedAllocation # endif @@ -62,6 +67,11 @@ namespace snmalloc static constexpr size_t page_size = 0x1000; + /** + * Windows always inherits its underlying architecture's full address range. + */ + static constexpr size_t address_bits = Aal::address_bits; + /** * Check whether the low memory state is still in effect. This is an * expensive operation and should not be on any fast paths. @@ -107,10 +117,16 @@ namespace snmalloc low_memory_callbacks.register_notification(callback); } + static void message(const char* const str) + { + fputs(str, stderr); + fputc('\n', stderr); + fflush(stderr); + } + [[noreturn]] static void error(const char* const str) { - puts(str); - fflush(stdout); + message(str); abort(); } @@ -135,7 +151,8 @@ namespace snmalloc void* r = VirtualAlloc(p, size, MEM_COMMIT, PAGE_READWRITE); if (r == nullptr) - error("out of memory"); + report_fatal_error( + "out of memory: {} ({}) could not be committed", p, size); } /// OS specific function for zeroing memory @@ -152,39 +169,8 @@ namespace snmalloc ::memset(p, 0, size); } -# ifdef USE_SYSTEMATIC_TESTING - static size_t& systematic_bump_ptr() - { - static size_t bump_ptr = (size_t)0x4000'0000'0000; - return bump_ptr; - } - - static std::pair reserve_at_least(size_t size) noexcept - { - // Magic number for over-allocating chosen by the Pal - // These should be further refined based on experiments. - constexpr size_t min_size = - bits::is64() ? bits::one_at_bit(32) : bits::one_at_bit(28); - auto size_request = bits::max(size, min_size); - - DWORD flags = MEM_RESERVE; - - size_t retries = 1000; - void* p; - - do - { - p = VirtualAlloc( - (void*)systematic_bump_ptr(), size_request, flags, PAGE_READWRITE); - - systematic_bump_ptr() += size_request; - retries--; - } while (p == nullptr && retries > 0); - - return {p, size_request}; - } -# elif defined(PLATFORM_HAS_VIRTUALALLOC2) - template +# ifdef PLATFORM_HAS_VIRTUALALLOC2 + template static void* reserve_aligned(size_t size) noexcept { SNMALLOC_ASSERT(bits::is_pow2(size)); @@ -192,7 +178,7 @@ namespace snmalloc DWORD flags = MEM_RESERVE; - if (committed) + if (state_using) flags |= MEM_COMMIT; // If we're on Windows 10 or newer, we can use the VirtualAlloc2 @@ -209,34 +195,18 @@ namespace snmalloc void* ret = VirtualAlloc2FromApp( nullptr, nullptr, size, flags, PAGE_READWRITE, ¶m, 1); if (ret == nullptr) - { - error("Failed to allocate memory\n"); - } + errno = ENOMEM; return ret; } -# else - static std::pair reserve_at_least(size_t size) noexcept - { - SNMALLOC_ASSERT(bits::is_pow2(size)); +# endif - // Magic number for over-allocating chosen by the Pal - // These should be further refined based on experiments. - constexpr size_t min_size = - bits::is64() ? bits::one_at_bit(32) : bits::one_at_bit(28); - for (size_t size_request = bits::max(size, min_size); - size_request >= size; - size_request = size_request / 2) - { - void* ret = - VirtualAlloc(nullptr, size_request, MEM_RESERVE, PAGE_READWRITE); - if (ret != nullptr) - { - return std::pair(ret, size_request); - } - } - error("Failed to allocate memory\n"); + static void* reserve(size_t size) noexcept + { + void* ret = VirtualAlloc(nullptr, size, MEM_RESERVE, PAGE_READWRITE); + if (ret == nullptr) + errno = ENOMEM; + return ret; } -# endif /** * Source of Entropy @@ -253,6 +223,14 @@ namespace snmalloc error("Failed to get entropy."); return result; } + + static uint64_t internal_time_in_ms() + { + return static_cast( + std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch()) + .count()); + } }; } #endif diff --git a/src/snmalloc/snmalloc.h b/src/snmalloc/snmalloc.h new file mode 100644 index 000000000..47bd6e78a --- /dev/null +++ b/src/snmalloc/snmalloc.h @@ -0,0 +1,10 @@ +#pragma once + +// Core implementation of snmalloc independent of the configuration mode +#include "snmalloc_core.h" + +// If the user has defined SNMALLOC_PROVIDE_OWN_CONFIG, this include does +// nothing. Otherwise, it provide a default configuration of snmalloc::Alloc. +#include "backend/globalconfig.h" +// User facing API surface, needs to know what `Alloc` is. +#include "snmalloc_front.h" diff --git a/src/snmalloc/snmalloc_core.h b/src/snmalloc/snmalloc_core.h new file mode 100644 index 000000000..633b7abea --- /dev/null +++ b/src/snmalloc/snmalloc_core.h @@ -0,0 +1,3 @@ +#pragma once + +#include "backend_helpers/backend_helpers.h" diff --git a/src/snmalloc/snmalloc_front.h b/src/snmalloc/snmalloc_front.h new file mode 100644 index 000000000..4c5aa60ca --- /dev/null +++ b/src/snmalloc/snmalloc_front.h @@ -0,0 +1 @@ +#include "global/global.h" diff --git a/src/test/func/bits/bits.cc b/src/test/func/bits/bits.cc index f78be0b37..0046516a3 100644 --- a/src/test/func/bits/bits.cc +++ b/src/test/func/bits/bits.cc @@ -3,7 +3,7 @@ */ #include -#include +#include #include void test_ctz() @@ -35,11 +35,10 @@ void test_clz() int main(int argc, char** argv) { - UNUSED(argc); - UNUSED(argv); + snmalloc::UNUSED(argc, argv); setup(); test_clz(); test_ctz(); -} \ No newline at end of file +} diff --git a/src/test/func/cheri/cheri.cc b/src/test/func/cheri/cheri.cc new file mode 100644 index 000000000..d40f0b8da --- /dev/null +++ b/src/test/func/cheri/cheri.cc @@ -0,0 +1,153 @@ +#include + +#if defined(SNMALLOC_PASS_THROUGH) || !defined(__CHERI_PURE_CAPABILITY__) +// This test does not make sense in pass-through or w/o CHERI +int main() +{ + return 0; +} +#else + +// # define SNMALLOC_TRACING + +# include +# include +# include + +# if defined(__FreeBSD__) +# include +# endif + +using namespace snmalloc; + +bool cap_len_is(void* cap, size_t expected) +{ + return __builtin_cheri_length_get(cap) == expected; +} + +bool cap_vmem_perm_is(void* cap, bool expected) +{ +# if defined(CHERI_PERM_SW_VMEM) + return !!(__builtin_cheri_perms_get(cap) & CHERI_PERM_SW_VMEM) == expected; +# else +# warning "Don't know how to check VMEM permission bit" +# endif +} + +int main() +{ + +# if defined(__FreeBSD__) + { + size_t pagesize[8]; + int err = getpagesizes(pagesize, sizeof(pagesize) / sizeof(pagesize[0])); + SNMALLOC_CHECK(err > 0); + SNMALLOC_CHECK(pagesize[0] == OS_PAGE_SIZE); + } +# endif + + auto alloc = get_scoped_allocator(); + + message("Grab small object"); + { + static const size_t sz = 128; + void* o1 = alloc->alloc(sz); + SNMALLOC_CHECK(cap_len_is(o1, sz)); + SNMALLOC_CHECK(cap_vmem_perm_is(o1, false)); + alloc->dealloc(o1); + } + + /* + * This large object is sized to end up in our alloc's local buddy allocators + * when it's released. + */ + message("Grab large object"); + ptraddr_t alarge; + { + static const size_t sz = 1024 * 1024; + void* olarge = alloc->alloc(sz); + alarge = address_cast(olarge); + SNMALLOC_CHECK(cap_len_is(olarge, sz)); + SNMALLOC_CHECK(cap_vmem_perm_is(olarge, false)); + + static_cast(olarge)[128] = 'x'; + static_cast(olarge)[128 + OS_PAGE_SIZE] = 'y'; + +# if defined(__FreeBSD__) + static constexpr int irm = + MINCORE_INCORE | MINCORE_REFERENCED | MINCORE_MODIFIED; + char ic[2]; + int err = mincore(olarge, 2 * OS_PAGE_SIZE, ic); + SNMALLOC_CHECK(err == 0); + SNMALLOC_CHECK((ic[0] & irm) == irm); + SNMALLOC_CHECK((ic[1] & irm) == irm); + message("Large object in core; good"); +# endif + + alloc->dealloc(olarge); + } + + message("Grab large object again, verify reuse"); + { + static const size_t sz = 1024 * 1024; + errno = 0; + void* olarge = alloc->alloc(sz); + int err = errno; + + SNMALLOC_CHECK(alarge == address_cast(olarge)); + SNMALLOC_CHECK(err == 0); + +# if defined(__FreeBSD__) + /* + * Verify that the zeroing took place by mmap, which should mean that the + * first two pages are not in core. This implies that snmalloc successfully + * re-derived a Chunk- or Arena-bounded pointer and used that, and its VMAP + * permission, to tear pages out of the address space. + */ + static constexpr int irm = + MINCORE_INCORE | MINCORE_REFERENCED | MINCORE_MODIFIED; + char ic[2]; + err = mincore(olarge, 2 * OS_PAGE_SIZE, ic); + SNMALLOC_CHECK(err == 0); + SNMALLOC_CHECK((ic[0] & irm) == 0); + SNMALLOC_CHECK((ic[1] & irm) == 0); + message("Large object not in core; good"); +# endif + + SNMALLOC_CHECK(static_cast(olarge)[128] == '\0'); + SNMALLOC_CHECK(static_cast(olarge)[128 + OS_PAGE_SIZE] == '\0'); + SNMALLOC_CHECK(cap_len_is(olarge, sz)); + SNMALLOC_CHECK(cap_vmem_perm_is(olarge, false)); + + alloc->dealloc(olarge); + } + + /* + * Grab another CoreAlloc pointer from the pool and examine it. + * + * CoreAlloc-s come from the metadata pools of snmalloc, and so do not flow + * through the usual allocation machinery. + */ + message("Grab CoreAlloc from pool for inspection"); + { + static_assert( + std::is_same_v>); + + LocalCache lc{&StandardConfig::unused_remote}; + auto* ca = AllocPool::acquire(&lc); + + SNMALLOC_CHECK(cap_len_is(ca, sizeof(*ca))); + SNMALLOC_CHECK(cap_vmem_perm_is(ca, false)); + + /* + * Putting ca back into the pool would require unhooking our local cache, + * and that requires accessing privates. Since it's pretty harmless to do + * so here at the end of our test, just leak it. + */ + } + + message("CHERI checks OK"); + return 0; +} + +#endif diff --git a/src/test/func/domestication/domestication.cc b/src/test/func/domestication/domestication.cc new file mode 100644 index 000000000..5fbff09f5 --- /dev/null +++ b/src/test/func/domestication/domestication.cc @@ -0,0 +1,182 @@ +#include + +#ifdef SNMALLOC_PASS_THROUGH +// This test does not make sense in pass-through +int main() +{ + return 0; +} +#else + +// # define SNMALLOC_TRACING + +# include +# include +# include +# include + +// Specify type of allocator +# define SNMALLOC_PROVIDE_OWN_CONFIG +namespace snmalloc +{ + class CustomConfig : public CommonConfig + { + public: + using Pal = DefaultPal; + using PagemapEntry = DefaultPagemapEntry; + + private: + using ConcretePagemap = + FlatPagemap; + + public: + using Pagemap = BasicPagemap; + + public: + using LocalState = StandardLocalState< + Pal, + Pagemap, + Pipe, PagemapRegisterRange>>; + + using GlobalPoolState = PoolState>; + + using Backend = BackendAllocator; + + private: + SNMALLOC_REQUIRE_CONSTINIT + inline static GlobalPoolState alloc_pool; + + public: + /* + * C++, even as late as C++20, has some really quite strict limitations on + * designated initializers. However, as of C++17, we can have constexpr + * lambdas and so can use more of the power of the statement fragment of + * C++, and not just its initializer fragment, to initialize a non-prefix + * subset of the flags (in any order, at that). + */ + static constexpr Flags Options = []() constexpr + { + Flags opts = {}; + opts.QueueHeadsAreTame = false; + opts.HasDomesticate = true; + return opts; + } + (); + + static GlobalPoolState& pool() + { + return alloc_pool; + } + + static void register_clean_up() + { + snmalloc::register_clean_up(); + } + + static inline bool domesticate_trace; + static inline size_t domesticate_count; + static inline uintptr_t* domesticate_patch_location; + static inline uintptr_t domesticate_patch_value; + + /* Verify that a pointer points into the region managed by this config */ + template + static CapPtr< + T, + typename B::template with_wildness> + capptr_domesticate(LocalState*, CapPtr p) + { + domesticate_count++; + + if (domesticate_trace) + { + std::cout << "Domesticating " << p.unsafe_ptr() +# if __has_builtin(__builtin_return_address) + << " from " << __builtin_return_address(0) +# endif + << std::endl; + } + + if ( + domesticate_patch_location != nullptr && + p.template as_reinterpret().unsafe_ptr() == + domesticate_patch_location) + { + std::cout << "Patching over corruption" << std::endl; + *domesticate_patch_location = domesticate_patch_value; + snmalloc::CustomConfig::domesticate_patch_location = nullptr; + } + + return CapPtr< + T, + typename B::template with_wildness>:: + unsafe_from(p.unsafe_ptr()); + } + }; + + using Alloc = LocalAllocator; +} + +# define SNMALLOC_NAME_MANGLE(a) test_##a +# include + +int main() +{ + snmalloc::CustomConfig::Pagemap::concretePagemap.init(); // init pagemap + snmalloc::CustomConfig::domesticate_count = 0; + + LocalEntropy entropy; + entropy.init(); + key_global = FreeListKey(entropy.get_free_list_key()); + + auto alloc1 = new Alloc(); + + // Allocate from alloc1; the size doesn't matter a whole lot, it just needs to + // be a small object and so definitely owned by this allocator rather. + auto p = alloc1->alloc(48); + std::cout << "Allocated p " << p << std::endl; + + // Put that free object on alloc1's remote queue + auto alloc2 = new Alloc(); + alloc2->dealloc(p); + alloc2->flush(); + + // Clobber the linkage but not the back pointer + snmalloc::CustomConfig::domesticate_patch_location = + static_cast(p); + snmalloc::CustomConfig::domesticate_patch_value = *static_cast(p); + memset(p, 0xA5, sizeof(void*)); + + snmalloc::CustomConfig::domesticate_trace = true; + snmalloc::CustomConfig::domesticate_count = 0; + + // Open a new slab, so that slow path will pick up the message queue. That + // means this should be a sizeclass we've not used before, even internally. + auto q = alloc1->alloc(512); + std::cout << "Allocated q " << q << std::endl; + + snmalloc::CustomConfig::domesticate_trace = false; + + /* + * Expected domestication calls in the above message passing: + * + * - On !QueueHeadsAreTame builds only, RemoteAllocator::dequeue + * domesticating the front pointer (to the initial stub) + * + * - RemoteAllocator::dequeue domesticating the stub's next pointer (p) + * + * - FrontendMetaData::alloc_free_list, domesticating the successor object + * in the newly minted freelist::Iter (i.e., the thing that would be allocated + * after q). + */ + static constexpr size_t expected_count = + snmalloc::CustomConfig::Options.QueueHeadsAreTame ? 2 : 3; + SNMALLOC_CHECK(snmalloc::CustomConfig::domesticate_count == expected_count); + + // Prevent the allocators from going out of scope during the above test + alloc1->flush(); + alloc2->flush(); + + return 0; +} + +#endif diff --git a/src/test/func/external_pagemap/external_pagemap.cc b/src/test/func/external_pagemap/external_pagemap.cc index 75dc1976a..5e79c6a1a 100644 --- a/src/test/func/external_pagemap/external_pagemap.cc +++ b/src/test/func/external_pagemap/external_pagemap.cc @@ -1,4 +1,5 @@ -#if defined(SNMALLOC_PASS_THROUGH) || defined(_WIN32) +#if defined(SNMALLOC_PASS_THROUGH) || defined(_WIN32) || \ + !defined(TODO_REINSTATE_POSSIBLY) // This test does not make sense with malloc pass-through, skip it. // The malloc definitions are also currently incompatible with Windows headers // so skip this test on Windows as well. @@ -8,7 +9,7 @@ int main() } #else # define SNMALLOC_EXPOSE_PAGEMAP 1 -# include +# include using ExternalChunkmap = ExternalGlobalPagemapTemplate; @@ -19,7 +20,7 @@ int main() auto& global = GlobalChunkmap::pagemap(); SNMALLOC_CHECK(&p == &global); // Get a valid heap address - uintptr_t addr = reinterpret_cast(malloc(42)); + uintptr_t addr = unsafe_to_uintptr(malloc(42)); // Make this very strongly aligned addr &= ~0xfffffULL; void* page = p.page_for_address(addr); diff --git a/src/test/func/first_operation/first_operation.cc b/src/test/func/first_operation/first_operation.cc index 05092f12a..629027fc9 100644 --- a/src/test/func/first_operation/first_operation.cc +++ b/src/test/func/first_operation/first_operation.cc @@ -7,67 +7,35 @@ #include "test/setup.h" -#include +#include +#include #include -/** - * This test is checking lazy init is correctly done with `get`. - * - * The test is written so platforms that do not do lazy init can satify the - * test. - */ -void get_test() -{ - // This should get the GlobalPlaceHolder if using lazy init - auto a1 = snmalloc::ThreadAlloc::get_noncachable(); - - // This should get a real allocator - auto a2 = snmalloc::ThreadAlloc::get(); - - // Trigger potential lazy_init if `get` didn't (shouldn't happen). - a2->dealloc(a2->alloc(5)); - - // Get an allocated allocator. - auto a3 = snmalloc::ThreadAlloc::get_noncachable(); - - if (a1 != a3) - { - printf("Lazy test!\n"); - // If the allocators are different then lazy_init has occurred. - // This should have been caused by the call to `get` rather than - // the allocations. - if (a2 != a3) - { - abort(); - } - } -} - void alloc1(size_t size) { - void* r = snmalloc::ThreadAlloc::get_noncachable()->alloc(size); - snmalloc::ThreadAlloc::get_noncachable()->dealloc(r); + void* r = snmalloc::ThreadAlloc::get().alloc(size); + snmalloc::ThreadAlloc::get().dealloc(r); } void alloc2(size_t size) { - auto a = snmalloc::ThreadAlloc::get_noncachable(); - void* r = a->alloc(size); - a->dealloc(r); + auto& a = snmalloc::ThreadAlloc::get(); + void* r = a.alloc(size); + a.dealloc(r); } void alloc3(size_t size) { - auto a = snmalloc::ThreadAlloc::get_noncachable(); - void* r = a->alloc(size); - a->dealloc(r, size); + auto& a = snmalloc::ThreadAlloc::get(); + void* r = a.alloc(size); + a.dealloc(r, size); } void alloc4(size_t size) { - auto a = snmalloc::ThreadAlloc::get(); - void* r = a->alloc(size); - a->dealloc(r); + auto& a = snmalloc::ThreadAlloc::get(); + void* r = a.alloc(size); + a.dealloc(r); } void check_calloc(void* p, size_t size) @@ -77,7 +45,16 @@ void check_calloc(void* p, size_t size) for (size_t i = 0; i < size; i++) { if (((uint8_t*)p)[i] != 0) + { + std::cout << "Calloc contents:" << std::endl; + for (size_t j = 0; j < size; j++) + { + std::cout << std::hex << (size_t)((uint8_t*)p)[j] << " "; + if (j % 32 == 0) + std::cout << std::endl; + } abort(); + } // ((uint8_t*)p)[i] = 0x5a; } } @@ -86,54 +63,53 @@ void check_calloc(void* p, size_t size) void calloc1(size_t size) { void* r = - snmalloc::ThreadAlloc::get_noncachable()->alloc( - size); + snmalloc::ThreadAlloc::get().alloc(size); check_calloc(r, size); - snmalloc::ThreadAlloc::get_noncachable()->dealloc(r); + snmalloc::ThreadAlloc::get().dealloc(r); } void calloc2(size_t size) { - auto a = snmalloc::ThreadAlloc::get_noncachable(); - void* r = a->alloc(size); + auto& a = snmalloc::ThreadAlloc::get(); + void* r = a.alloc(size); check_calloc(r, size); - a->dealloc(r); + a.dealloc(r); } void calloc3(size_t size) { - auto a = snmalloc::ThreadAlloc::get_noncachable(); - void* r = a->alloc(size); + auto& a = snmalloc::ThreadAlloc::get(); + void* r = a.alloc(size); check_calloc(r, size); - a->dealloc(r, size); + a.dealloc(r, size); } void calloc4(size_t size) { - auto a = snmalloc::ThreadAlloc::get(); - void* r = a->alloc(size); + auto& a = snmalloc::ThreadAlloc::get(); + void* r = a.alloc(size); check_calloc(r, size); - a->dealloc(r); + a.dealloc(r); } void dealloc1(void* p, size_t) { - snmalloc::ThreadAlloc::get_noncachable()->dealloc(p); + snmalloc::ThreadAlloc::get().dealloc(p); } void dealloc2(void* p, size_t size) { - snmalloc::ThreadAlloc::get_noncachable()->dealloc(p, size); + snmalloc::ThreadAlloc::get().dealloc(p, size); } void dealloc3(void* p, size_t) { - snmalloc::ThreadAlloc::get()->dealloc(p); + snmalloc::ThreadAlloc::get().dealloc(p); } void dealloc4(void* p, size_t size) { - snmalloc::ThreadAlloc::get()->dealloc(p, size); + snmalloc::ThreadAlloc::get().dealloc(p, size); } void f(size_t size) @@ -148,31 +124,32 @@ void f(size_t size) auto t7 = std::thread(calloc3, size); auto t8 = std::thread(calloc4, size); - auto a = snmalloc::current_alloc_pool()->acquire(); - auto p1 = a->alloc(size); - auto p2 = a->alloc(size); - auto p3 = a->alloc(size); - auto p4 = a->alloc(size); - - auto t9 = std::thread(dealloc1, p1, size); - auto t10 = std::thread(dealloc2, p2, size); - auto t11 = std::thread(dealloc3, p3, size); - auto t12 = std::thread(dealloc4, p4, size); - - t1.join(); - t2.join(); - t3.join(); - t4.join(); - t5.join(); - t6.join(); - t7.join(); - t8.join(); - t9.join(); - t10.join(); - t11.join(); - t12.join(); - snmalloc::current_alloc_pool()->release(a); - snmalloc::current_alloc_pool()->debug_in_use(0); + { + auto a = snmalloc::get_scoped_allocator(); + auto p1 = a->alloc(size); + auto p2 = a->alloc(size); + auto p3 = a->alloc(size); + auto p4 = a->alloc(size); + + auto t9 = std::thread(dealloc1, p1, size); + auto t10 = std::thread(dealloc2, p2, size); + auto t11 = std::thread(dealloc3, p3, size); + auto t12 = std::thread(dealloc4, p4, size); + + t1.join(); + t2.join(); + t3.join(); + t4.join(); + t5.join(); + t6.join(); + t7.join(); + t8.join(); + t9.join(); + t10.join(); + t11.join(); + t12.join(); + } // Drops a. + // snmalloc::current_alloc_pool()->debug_in_use(0); printf("."); fflush(stdout); } @@ -183,16 +160,13 @@ int main(int, char**) printf("."); fflush(stdout); - std::thread t(get_test); - t.join(); - f(0); f(1); f(3); f(5); f(7); printf("\n"); - for (size_t exp = 1; exp < snmalloc::SUPERSLAB_BITS; exp++) + for (size_t exp = 1; exp < snmalloc::MAX_SMALL_SIZECLASS_BITS; exp++) { auto shifted = [exp](size_t v) { return v << exp; }; diff --git a/src/test/func/fixed_region/fixed_region.cc b/src/test/func/fixed_region/fixed_region.cc index 2059fc958..2c00c7b8c 100644 --- a/src/test/func/fixed_region/fixed_region.cc +++ b/src/test/func/fixed_region/fixed_region.cc @@ -1,67 +1,72 @@ -#define SNMALLOC_SGX -#define OPEN_ENCLAVE -#define OE_OK 0 -#define OPEN_ENCLAVE_SIMULATION +#include "test/setup.h" + #include -#include +#include +#include #ifdef assert # undef assert #endif #define assert please_use_SNMALLOC_ASSERT -extern "C" void* oe_memset_s(void* p, size_t p_size, int c, size_t size) -{ - UNUSED(p_size); - return memset(p, c, size); -} - -extern "C" int oe_random(void* p, size_t p_size) -{ - UNUSED(p_size); - UNUSED(p); - // Stub for random data. - return 0; -} +using namespace snmalloc; -extern "C" void oe_abort() -{ - abort(); -} +using CustomGlobals = FixedRangeConfig>; +using FixedAlloc = LocalAllocator; -using namespace snmalloc; int main() { #ifndef SNMALLOC_PASS_THROUGH // Depends on snmalloc specific features - auto& mp = *MemoryProviderStateMixin< - DefaultPal, - DefaultArenaMap>::make(); + setup(); // 28 is large enough to produce a nested allocator. // It is also large enough for the example to run in. // For 1MiB superslabs, SUPERSLAB_BITS + 4 is not big enough for the example. - size_t large_class = 28 - SUPERSLAB_BITS; - size_t size = bits::one_at_bit(SUPERSLAB_BITS + large_class); - void* oe_base = mp.reserve(large_class).unsafe_capptr; - void* oe_end = (uint8_t*)oe_base + size; - PALOpenEnclave::setup_initial_range(oe_base, oe_end); - std::cout << "Allocated region " << oe_base << " - " << oe_end << std::endl; + auto size = bits::one_at_bit(28); + auto oe_base = DefaultPal::reserve(size); + DefaultPal::notify_using(oe_base, size); + auto oe_end = pointer_offset(oe_base, size); + std::cout << "Allocated region " << oe_base << " - " + << pointer_offset(oe_base, size) << std::endl; - auto a = ThreadAlloc::get(); + CustomGlobals::init(nullptr, oe_base, size); + FixedAlloc a; + size_t object_size = 128; + size_t count = 0; + size_t i = 0; while (true) { - auto r1 = a->alloc(100); + auto r1 = a.alloc(object_size); + count += object_size; + i++; + if (i == 1024) + { + i = 0; + std::cout << "."; + } // Run until we exhaust the fixed region. // This should return null. if (r1 == nullptr) - return 0; + break; if (oe_base > r1) + { + std::cout << "Allocated: " << r1 << std::endl; abort(); + } if (oe_end < r1) + { + std::cout << "Allocated: " << r1 << std::endl; abort(); + } } + + std::cout << "Total allocated: " << count << " out of " << size << std::endl; + std::cout << "Overhead: 1/" << (double)size / (double)(size - count) + << std::endl; + + a.teardown(); #endif } diff --git a/src/test/func/jemalloc/jemalloc.cc b/src/test/func/jemalloc/jemalloc.cc new file mode 100644 index 000000000..5baddd76e --- /dev/null +++ b/src/test/func/jemalloc/jemalloc.cc @@ -0,0 +1,370 @@ +#include +#include +#include +#include + +#define SNMALLOC_NAME_MANGLE(a) our_##a +#undef SNMALLOC_NO_REALLOCARRAY +#undef SNMALLOC_NO_REALLOCARR +#define SNMALLOC_BOOTSTRAP_ALLOCATOR +#define SNMALLOC_JEMALLOC3_EXPERIMENTAL +#define SNMALLOC_JEMALLOC_NONSTANDARD +#include +#include + +#if __has_include() +# include +#endif + +#ifdef __FreeBSD__ +/** + * Enable testing against the versions that we get from libc or elsewhere. + * Enabled by default on FreeBSD where all of the jemalloc functions are + * exported from libc. + */ +# define TEST_JEMALLOC_MALLOCX +#endif + +#define OUR_MALLOCX_LG_ALIGN(la) (static_cast(la)) +#define OUR_MALLOCX_ZERO (one_at_bit(6)) + +#define OUR_ALLOCM_NO_MOVE (one_at_bit(7)) + +#define OUR_ALLOCM_SUCCESS 0 +#define OUR_ALLOCM_ERR_OOM 1 +#define OUR_ALLOCM_ERR_NOT_MOVED 2 + +#ifndef MALLOCX_LG_ALIGN +# define MALLOCX_LG_ALIGN(la) OUR_MALLOCX_LG_ALIGN(la) +#endif +#ifndef MALLOCX_ZERO +# define MALLOCX_ZERO OUR_MALLOCX_ZERO +#endif + +#ifndef ALLOCM_LG_ALIGN +# define ALLOCM_LG_ALIGN(la) OUR_MALLOCX_LG_ALIGN(la) +#endif +#ifndef ALLOCM_ZERO +# define ALLOCM_ZERO OUR_MALLOCX_ZERO +#endif +#ifndef ALLOCM_NO_MOVE +# define ALLOCM_NO_MOVE OUR_ALLOCM_NO_MOVE +#endif +#ifndef ALLOCM_SUCCESS +# define ALLOCM_SUCCESS OUR_ALLOCM_SUCCESS +#endif +#ifndef ALLOCM_ERR_OOM +# define ALLOCM_ERR_OOM OUR_ALLOCM_ERR_OOM +#endif +#ifndef ALLOCM_ERR_NOT_MOVED +# define ALLOCM_ERR_NOT_MOVED OUR_ALLOCM_ERR_NOT_MOVED +#endif + +using namespace snmalloc; +using namespace snmalloc::bits; + +namespace +{ + /** + * Test whether the MALLOCX_LG_ALIGN macro is defined correctly. This test + * will pass trivially if we don't have the malloc_np.h header from + * jemalloc, but at least the FreeBSD action runners in CI do have this + * header. + */ + template + void check_lg_align_macro() + { + static_assert( + OUR_MALLOCX_LG_ALIGN(Size) == MALLOCX_LG_ALIGN(Size), + "Our definition of MALLOCX_LG_ALIGN is wrong"); + static_assert( + OUR_MALLOCX_LG_ALIGN(Size) == ALLOCM_LG_ALIGN(Size), + "Our definition of ALLOCM_LG_ALIGN is wrong"); + static_assert( + JEMallocFlags(Size).log2align() == Size, "Out log2 align mask is wrong"); + if constexpr (Size > 0) + { + check_lg_align_macro(); + } + } + + /** + * The default maximum number of bits of address space to use for tests. + * This is clamped on platforms without lazy commit because this much RAM + * (or, at least, commit charge) will be used on such systems. + * + * Thread sanitizer makes these tests *very* slow, so reduce the size + * significantly when it's enabled. + */ + constexpr size_t DefaultMax = 22; + + /** + * Run a test with a range of sizes and alignments. The `test` argument is + * called with a size and log2 alignment as parameters. + */ + template + void test_sizes_and_alignments(std::function test) + { + constexpr size_t low = 5; + for (size_t base = low; base < Log2MaxSize; base++) + { + INFO("\tTrying {}-byte allocations\n", one_at_bit(base)); + for (size_t i = 0; i < one_at_bit(low); i++) + { + for (int align = 1; align < 20; align++) + { + test(one_at_bit(base) + (i << (base - low)), align); + } + } + } + } + + /** + * Test that the size reported by nallocx corresponds to the size reported by + * sallocx on the return value from mallocx. + */ + template< + void*(Mallocx)(size_t, int), + void(Dallocx)(void*, int), + size_t(Sallocx)(const void*, int), + size_t(Nallocx)(size_t, int)> + void test_size() + { + START_TEST("nallocx and mallocx return the same size"); + test_sizes_and_alignments([](size_t size, int align) { + int flags = MALLOCX_LG_ALIGN(align); + size_t expected = Nallocx(size, flags); + void* ptr = Mallocx(size, flags); + EXPECT( + ptr != nullptr, + "Failed to allocate {} bytes with {}-bit alignment", + size, + align); + size_t allocated = Sallocx(ptr, 0); + EXPECT( + allocated == expected, + "Expected to have allocated {} bytes, got {} bytes", + expected, + allocated); + Dallocx(ptr, 0); + }); + } + + /** + * Test that, when we request zeroing in rallocx, we get zeroed memory. + */ + template< + void*(Mallocx)(size_t, int), + void(Dallocx)(void*, int), + void*(Rallocx)(void*, size_t, int)> + void test_zeroing() + { + START_TEST("rallocx can zero the remaining space."); + // The Rallocx call will copy everything in the first malloc, so stay + // fairly small. + auto test = [](size_t size, int align) { + int flags = MALLOCX_LG_ALIGN(align) | MALLOCX_ZERO; + char* ptr = static_cast(Mallocx(size, flags)); + ptr = static_cast(Rallocx(ptr, size * 2, flags)); + EXPECT( + ptr != nullptr, + "Failed to reallocate for {} byte allocation", + size * 2); + EXPECT( + ptr[size] == 0, + "Memory not zero initialised for {} byte reallocation from {} " + "byte allocation", + size * 2, + size); + // The second time we run this test, we if we're allocating from a free + // list then we will reuse this, so make sure it requires explicit + // zeroing. + ptr[size] = 12; + Dallocx(ptr, 0); + }; + test_sizes_and_alignments<22>(test); + test_sizes_and_alignments<22>(test); + } + + /** + * Test that xallocx reports a size that is at least the requested amount. + */ + template< + void*(Mallocx)(size_t, int), + void(Dallocx)(void*, int), + size_t(Xallocx)(void*, size_t, size_t, int)> + void test_xallocx() + { + START_TEST("xallocx returns a sensible value."); + // The Rallocx call will copy all of these, so stay fairly small. + auto test = [](size_t size, int align) { + int flags = MALLOCX_LG_ALIGN(align); + void* ptr = Mallocx(size, flags); + EXPECT(ptr != nullptr, "Failed to allocate for zx byte allocation", size); + size_t sz = Xallocx(ptr, size, 1024, flags); + EXPECT(sz >= size, "xalloc returned {}, expected at least {}", sz, size); + Dallocx(ptr, 0); + }; + test_sizes_and_alignments(test); + } + + template< + int(Allocm)(void**, size_t*, size_t, int), + int(Sallocm)(const void*, size_t*, int), + int(Dallocm)(void*, int), + int(Nallocm)(size_t*, size_t, int)> + void test_nallocm_size() + { + START_TEST("nallocm and allocm return the same size"); + test_sizes_and_alignments([](size_t size, int align) { + int flags = ALLOCM_LG_ALIGN(align); + size_t expected; + int ret = Nallocm(&expected, size, flags); + EXPECT( + (ret == ALLOCM_SUCCESS), + "nallocm({}, {}) failed with error {}", + size, + flags, + ret); + void* ptr; + size_t allocated; + ret = Allocm(&ptr, &allocated, size, flags); + EXPECT( + (ptr != nullptr) && (ret == ALLOCM_SUCCESS), + "Failed to allocate {} bytes with {} bit alignment", + size, + align); + EXPECT( + allocated == expected, + "Expected to have allocated {} bytes, got {} bytes", + expected, + allocated); + ret = Sallocm(ptr, &expected, 0); + EXPECT( + (ret == ALLOCM_SUCCESS) && (allocated == expected), + "Expected to have allocated {} bytes, got {} bytes", + expected, + allocated); + + Dallocm(ptr, 0); + }); + } + + template< + int(Allocm)(void**, size_t*, size_t, int), + int(Rallocm)(void**, size_t*, size_t, size_t, int), + int(Dallocm)(void*, int)> + void test_rallocm_nomove() + { + START_TEST("rallocm non-moving behaviour"); + test_sizes_and_alignments([](size_t size, int align) { + int flags = ALLOCM_LG_ALIGN(align); + void* ptr; + size_t allocated; + int ret = Allocm(&ptr, &allocated, size, flags); + void* orig = ptr; + EXPECT( + (ptr != nullptr) && (ret == ALLOCM_SUCCESS), + "Failed to allocate {} bytes with {} bit alignment", + size, + align); + ret = Rallocm(&ptr, nullptr, allocated + 1, 12, flags | ALLOCM_NO_MOVE); + EXPECT( + (ret == ALLOCM_ERR_NOT_MOVED) || (ptr == orig), + "Expected rallocm not to be able to move or reallocate, but return was " + "{}\n", + ret); + Dallocm(ptr, 0); + }); + } + + template< + int(Allocm)(void**, size_t*, size_t, int), + int(Rallocm)(void**, size_t*, size_t, size_t, int), + int(Sallocm)(const void*, size_t*, int), + int(Dallocm)(void*, int), + int(Nallocm)(size_t*, size_t, int)> + void test_legacy_experimental_apis() + { + START_TEST("allocm out-of-memory behaviour"); + void* ptr = nullptr; + int ret = Allocm(&ptr, nullptr, std::numeric_limits::max() / 2, 0); + EXPECT( + (ptr == nullptr) && (ret == OUR_ALLOCM_ERR_OOM), + "Expected massive allocation to fail with out of memory ({}), received " + "allocation {}, return code {}", + OUR_ALLOCM_ERR_OOM, + ptr, + ret); + test_nallocm_size(); + test_rallocm_nomove(); + } +} + +extern "C" +{ + /** + * The jemalloc 3.x experimental APIs are gone from the headers in newer + * versions, but are still present in FreeBSD libc, so declare them here + * for testing. + */ + int allocm(void**, size_t*, size_t, int); + int rallocm(void**, size_t*, size_t, size_t, int); + int sallocm(const void*, size_t*, int); + int dallocm(void*, int); + int nallocm(size_t*, size_t, int); +} + +int main() +{ +#ifdef SNMALLOC_PASS_THROUGH + return 0; +#endif + check_lg_align_macro<63>(); + static_assert( + OUR_MALLOCX_ZERO == MALLOCX_ZERO, "Our MALLOCX_ZERO macro is wrong"); + static_assert( + OUR_MALLOCX_ZERO == ALLOCM_ZERO, "Our ALLOCM_ZERO macro is wrong"); + static_assert( + OUR_ALLOCM_NO_MOVE == ALLOCM_NO_MOVE, "Our ALLOCM_NO_MOVE macro is wrong"); + static_assert( + JEMallocFlags(MALLOCX_ZERO).should_zero(), + "Our MALLOCX_ZERO is not the value that we are using"); + static_assert( + !JEMallocFlags(~MALLOCX_ZERO).should_zero(), + "Our MALLOCX_ZERO is not the value that we are using"); + static_assert( + JEMallocFlags(ALLOCM_NO_MOVE).may_not_move(), + "Our ALLOCM_NO_MOVE is not the value that we are using"); + static_assert( + !JEMallocFlags(~ALLOCM_NO_MOVE).may_not_move(), + "Our ALLOCM_NO_MOVE is not the value that we are using"); + test_size(); + test_zeroing(); + test_xallocx(); + test_legacy_experimental_apis< + our_allocm, + our_rallocm, + our_sallocm, + our_dallocm, + our_nallocm>(); + +#ifndef __PIC__ + void* bootstrap = __je_bootstrap_malloc(42); + if (bootstrap == nullptr) + { + printf("Failed to allocate from bootstrap malloc\n"); + } + __je_bootstrap_free(bootstrap); +#endif + + // These tests are for jemalloc compatibility and so should work with + // jemalloc's implementation of these functions. If TEST_JEMALLOC is + // defined then we try +#ifdef TEST_JEMALLOC_MALLOCX + test_size(); + test_zeroing(); + test_xallocx(); + test_legacy_experimental_apis(); +#endif +} diff --git a/src/test/func/malloc/malloc.cc b/src/test/func/malloc/malloc.cc index cba2beb06..1d4c31da9 100644 --- a/src/test/func/malloc/malloc.cc +++ b/src/test/func/malloc/malloc.cc @@ -1,85 +1,130 @@ #include +#include #include #define SNMALLOC_NAME_MANGLE(a) our_##a -#include "../../../override/malloc.cc" +#undef SNMALLOC_NO_REALLOCARRAY +#undef SNMALLOC_NO_REALLOCARR +#define SNMALLOC_BOOTSTRAP_ALLOCATOR +#include using namespace snmalloc; +constexpr int SUCCESS = 0; + void check_result(size_t size, size_t align, void* p, int err, bool null) { - if (errno != err) - abort(); - + bool failed = false; + EXPECT( + (errno == err) || (err == SUCCESS), + "Expected error: {} but got {}", + err, + errno); if (null) { - if (p != nullptr) - abort(); - - our_free(p); + EXPECT(p == nullptr, "Expected null but got {}", p); return; } + if ((p == nullptr) && (size != 0)) + { + INFO("Unexpected null returned.\n"); + failed = true; + } const auto alloc_size = our_malloc_usable_size(p); - const auto expected_size = round_size(size); + auto expected_size = our_malloc_good_size(size); #ifdef SNMALLOC_PASS_THROUGH // Calling system allocator may allocate a larger block than // snmalloc. Note, we have called the system allocator with // the size snmalloc would allocate, so it won't be smaller. const auto exact_size = false; + // We allocate MIN_ALLOC_SIZE byte for 0-sized allocations (and so round_size + // will tell us that the minimum size is MIN_ALLOC_SIZE), but the system + // allocator may return a 0-sized allocation. + if (size == 0) + { + expected_size = 0; + } #else const auto exact_size = align == 1; #endif - if (exact_size && (alloc_size != expected_size)) +#ifdef __CHERI_PURE_CAPABILITY__ + const auto cheri_size = __builtin_cheri_length_get(p); + if (cheri_size != alloc_size && (size != 0)) { - printf( - "Usable size is %zu, but required to be %zu.\n", - alloc_size, - expected_size); - abort(); + INFO("Cheri size is {}, but required to be {}.", cheri_size, alloc_size); + failed = true; + } +# if defined(CHERI_PERM_SW_VMEM) + const auto cheri_perms = __builtin_cheri_perms_get(p); + if (cheri_perms & CHERI_PERM_SW_VMEM) + { + INFO("Cheri permissions include VMEM authority"); + failed = true; + } +# endif + if (p != nullptr) + { + /* + * Scan the allocation for any tagged capabilities. Since this test doesn't + * use the allocated memory if there is a valid cap it must have leaked from + * the allocator, which is bad. + */ + void** vp = static_cast(p); + for (size_t n = 0; n < alloc_size / sizeof(*vp); vp++, n++) + { + void* c = *vp; + if (__builtin_cheri_tag_get(c)) + { + printf("Found cap tag set in alloc: %#p at %#p\n", c, vp); + failed = true; + } + } + } +#endif + if (exact_size && (alloc_size != expected_size) && (size != 0)) + { + INFO( + "Usable size is {}, but required to be {}.", alloc_size, expected_size); + failed = true; } if ((!exact_size) && (alloc_size < expected_size)) { - printf( - "Usable size is %zu, but required to be at least %zu.\n", + INFO( + "Usable size is {}, but required to be at least {}.", alloc_size, expected_size); - abort(); + failed = true; } - if (static_cast(reinterpret_cast(p) % align) != 0) + if (((address_cast(p) % align) != 0) && (size != 0)) { - printf( - "Address is 0x%zx, but required to be aligned to 0x%zx.\n", - reinterpret_cast(p), - align); - abort(); + INFO("Address is {}, but required to be aligned to {}.\n", p, align); + failed = true; } - if ( - static_cast( - reinterpret_cast(p) % natural_alignment(size)) != 0) + if ((address_cast(p) % natural_alignment(size)) != 0) { - printf( - "Address is 0x%zx, but should have natural alignment to 0x%zx.\n", - reinterpret_cast(p), + INFO( + "Address is {}, but should have natural alignment to {}.\n", + p, natural_alignment(size)); - abort(); + failed = true; } + EXPECT(!failed, "check_result failed! {}", p); our_free(p); } void test_calloc(size_t nmemb, size_t size, int err, bool null) { - fprintf(stderr, "calloc(%zu, %zu)\n", nmemb, size); - errno = 0; + START_TEST("calloc({}, {}) combined size {}\n", nmemb, size, nmemb * size); + errno = SUCCESS; void* p = our_calloc(nmemb, size); - if ((p != nullptr) && (errno == 0)) + if (p != nullptr) { for (size_t i = 0; i < (size * nmemb); i++) { - if (((uint8_t*)p)[i] != 0) - abort(); + EXPECT(((uint8_t*)p)[i] == 0, "non-zero at {}", i); } } check_result(nmemb * size, 1, p, err, null); @@ -91,18 +136,19 @@ void test_realloc(void* p, size_t size, int err, bool null) if (p != nullptr) old_size = our_malloc_usable_size(p); - fprintf(stderr, "realloc(%p(%zu), %zu)\n", p, old_size, size); - errno = 0; + START_TEST("realloc({}({}), {})", p, old_size, size); + errno = SUCCESS; auto new_p = our_realloc(p, size); - // Realloc failure case, deallocate original block + check_result(size, 1, new_p, err, null); + // Realloc failure case, deallocate original block as not + // handled by check_result. if (new_p == nullptr && size != 0) our_free(p); - check_result(size, 1, new_p, err, null); } void test_posix_memalign(size_t size, size_t align, int err, bool null) { - fprintf(stderr, "posix_memalign(&p, %zu, %zu)\n", align, size); + START_TEST("posix_memalign(&p, {}, {})", align, size); void* p = nullptr; errno = our_posix_memalign(&p, align, size); check_result(size, align, p, err, null); @@ -110,12 +156,58 @@ void test_posix_memalign(size_t size, size_t align, int err, bool null) void test_memalign(size_t size, size_t align, int err, bool null) { - fprintf(stderr, "memalign(%zu, %zu)\n", align, size); - errno = 0; + START_TEST("memalign({}, {})", align, size); + errno = SUCCESS; void* p = our_memalign(align, size); check_result(size, align, p, err, null); } +void test_reallocarray(void* p, size_t nmemb, size_t size, int err, bool null) +{ + size_t old_size = 0; + size_t tsize = nmemb * size; + if (p != nullptr) + old_size = our_malloc_usable_size(p); + + START_TEST("reallocarray({}({}), {})", p, old_size, tsize); + errno = SUCCESS; + auto new_p = our_reallocarray(p, nmemb, size); + if (new_p == nullptr && tsize != 0) + our_free(p); + check_result(tsize, 1, new_p, err, null); +} + +void test_reallocarr( + size_t size_old, size_t nmemb, size_t size, int err, bool null) +{ + void* p = nullptr; + + if (size_old != (size_t)~0) + p = our_malloc(size_old); + START_TEST("reallocarr({}({}), {})", p, nmemb, size); + errno = SUCCESS; + int r = our_reallocarr(&p, nmemb, size); + EXPECT(r == err, "reallocarr failed! expected {} got {}\n", err, r); + + check_result(nmemb * size, 1, p, err, null); + p = our_malloc(size); + if (!p) + { + return; + } + for (size_t i = 1; i < size; i++) + static_cast(p)[i] = 1; + our_reallocarr(&p, nmemb, size); + if (r != SUCCESS) + our_free(p); + + for (size_t i = 1; i < size; i++) + { + EXPECT(static_cast(p)[i] == 1, "data consistency failed! at {}", i); + } + our_free(p); +} + int main(int argc, char** argv) { UNUSED(argc); @@ -123,26 +215,63 @@ int main(int argc, char** argv) setup(); - constexpr int SUCCESS = 0; + // Smoke test the fatal error builder. Check that it can generate strings + // including all of the kinds of things that it expects to be able to format. + // + // Note: We cannot use the check or assert macros here because they depend on + // `MessageBuilder` working. They are safe to use in any other test. + void* fakeptr = unsafe_from_uintptr(static_cast(0x42)); + MessageBuilder<1024> b{ + "testing pointer {} size_t {} message, {} world, null is {}, -123456 is " + "{}, 1234567 is {}", + fakeptr, + size_t(42), + "hello", + nullptr, + -123456, + 1234567}; + if ( + strcmp( + "testing pointer 0x42 size_t 0x2a message, hello world, null is 0x0, " + "-123456 is -123456, 1234567 is 1234567", + b.get_message()) != 0) + { + printf("Incorrect rendering of fatal error message: %s\n", b.get_message()); + abort(); + } + + our_free(nullptr); - test_realloc(our_malloc(64), 4194304, SUCCESS, false); + /* A very large allocation size that we expect to fail. */ + const size_t too_big_size = ((size_t)-1) / 2; + check_result(too_big_size, 1, our_malloc(too_big_size), ENOMEM, true); + errno = SUCCESS; - for (sizeclass_t sc = 0; sc < (SUPERSLAB_BITS + 4); sc++) + for (smallsizeclass_t sc = 0; sc < (MAX_SMALL_SIZECLASS_BITS + 4); sc++) { const size_t size = bits::one_at_bit(sc); - printf("malloc: %zu\n", size); + START_TEST("malloc: {}", size); + errno = SUCCESS; check_result(size, 1, our_malloc(size), SUCCESS, false); + errno = SUCCESS; check_result(size + 1, 1, our_malloc(size + 1), SUCCESS, false); } test_calloc(0, 0, SUCCESS, false); - for (sizeclass_t sc = 0; sc < NUM_SIZECLASSES; sc++) + our_free(nullptr); + + test_calloc(1, too_big_size, ENOMEM, true); + errno = SUCCESS; + + for (smallsizeclass_t sc = 0; sc < NUM_SMALL_SIZECLASSES; sc++) { const size_t size = sizeclass_to_size(sc); bool overflow = false; - for (size_t n = 1; bits::umul(size, n, overflow) <= SUPERSLAB_SIZE; n *= 5) + for (size_t n = 1; + bits::umul(size, n, overflow) <= MAX_SMALL_SIZECLASS_SIZE; + n *= 5) { if (overflow) break; @@ -153,14 +282,13 @@ int main(int argc, char** argv) test_calloc(0, size, SUCCESS, false); } - for (sizeclass_t sc = 0; sc < NUM_SIZECLASSES; sc++) + for (smallsizeclass_t sc = 0; sc < NUM_SMALL_SIZECLASSES; sc++) { const size_t size = sizeclass_to_size(sc); test_realloc(our_malloc(size), size, SUCCESS, false); - test_realloc(our_malloc(size), 0, SUCCESS, true); test_realloc(nullptr, size, SUCCESS, false); - test_realloc(our_malloc(size), (size_t)-1, ENOMEM, true); - for (sizeclass_t sc2 = 0; sc2 < NUM_SIZECLASSES; sc2++) + test_realloc(our_malloc(size), too_big_size, ENOMEM, true); + for (smallsizeclass_t sc2 = 0; sc2 < NUM_SMALL_SIZECLASSES; sc2++) { const size_t size2 = sizeclass_to_size(sc2); test_realloc(our_malloc(size), size2, SUCCESS, false); @@ -168,30 +296,31 @@ int main(int argc, char** argv) } } - for (sizeclass_t sc = 0; sc < (SUPERSLAB_BITS + 4); sc++) + for (smallsizeclass_t sc = 0; sc < (MAX_SMALL_SIZECLASS_BITS + 4); sc++) { const size_t size = bits::one_at_bit(sc); test_realloc(our_malloc(size), size, SUCCESS, false); - test_realloc(our_malloc(size), 0, SUCCESS, true); test_realloc(nullptr, size, SUCCESS, false); - test_realloc(our_malloc(size), (size_t)-1, ENOMEM, true); - for (sizeclass_t sc2 = 0; sc2 < (SUPERSLAB_BITS + 4); sc2++) + test_realloc(our_malloc(size), too_big_size, ENOMEM, true); + for (smallsizeclass_t sc2 = 0; sc2 < (MAX_SMALL_SIZECLASS_BITS + 4); sc2++) { const size_t size2 = bits::one_at_bit(sc2); - printf("size1: %zu, size2:%zu\n", size, size2); + INFO("size1: {}, size2:{}\n", size, size2); test_realloc(our_malloc(size), size2, SUCCESS, false); test_realloc(our_malloc(size + 1), size2, SUCCESS, false); } } + test_realloc(our_malloc(64), 4194304, SUCCESS, false); + test_posix_memalign(0, 0, EINVAL, true); - test_posix_memalign((size_t)-1, 0, EINVAL, true); + test_posix_memalign(too_big_size, 0, EINVAL, true); test_posix_memalign(OS_PAGE_SIZE, sizeof(uintptr_t) / 2, EINVAL, true); - for (size_t align = sizeof(uintptr_t); align <= SUPERSLAB_SIZE * 8; + for (size_t align = sizeof(uintptr_t); align < MAX_SMALL_SIZECLASS_SIZE * 8; align <<= 1) { - for (sizeclass_t sc = 0; sc < NUM_SIZECLASSES; sc++) + for (smallsizeclass_t sc = 0; sc < NUM_SMALL_SIZECLASSES - 6; sc++) { const size_t size = sizeclass_to_size(sc); test_posix_memalign(size, align, SUCCESS, false); @@ -199,10 +328,53 @@ int main(int argc, char** argv) test_memalign(size, align, SUCCESS, false); } test_posix_memalign(0, align, SUCCESS, false); - test_posix_memalign((size_t)-1, align, ENOMEM, true); + test_posix_memalign(too_big_size, align, ENOMEM, true); test_posix_memalign(0, align + 1, EINVAL, true); } - current_alloc_pool()->debug_check_empty(); + test_reallocarray(nullptr, 1, 0, SUCCESS, false); + for (smallsizeclass_t sc = 0; sc < (MAX_SMALL_SIZECLASS_BITS + 4); sc++) + { + const size_t size = bits::one_at_bit(sc); + test_reallocarray(our_malloc(size), 1, size, SUCCESS, false); + test_reallocarray(our_malloc(size), 1, 0, SUCCESS, false); + test_reallocarray(nullptr, 1, size, SUCCESS, false); + test_reallocarray(our_malloc(size), 1, too_big_size, ENOMEM, true); + for (smallsizeclass_t sc2 = 0; sc2 < (MAX_SMALL_SIZECLASS_BITS + 4); sc2++) + { + const size_t size2 = bits::one_at_bit(sc2); + test_reallocarray(our_malloc(size), 1, size2, SUCCESS, false); + test_reallocarray(our_malloc(size + 1), 1, size2, SUCCESS, false); + } + } + + test_reallocarr((size_t)~0, 1, 0, SUCCESS, false); + test_reallocarr((size_t)~0, 1, 16, SUCCESS, false); + + for (smallsizeclass_t sc = 0; sc < (MAX_SMALL_SIZECLASS_BITS + 4); sc++) + { + const size_t size = bits::one_at_bit(sc); + test_reallocarr(size, 1, size, SUCCESS, false); + test_reallocarr(size, 1, 0, SUCCESS, false); + test_reallocarr(size, 2, size, SUCCESS, false); + void* p = our_malloc(size); + EXPECT(p != nullptr, "realloc alloc failed with {}", size); + int r = our_reallocarr(&p, 1, too_big_size); + EXPECT(r == ENOMEM, "expected failure on allocation\n"); + our_free(p); + + for (smallsizeclass_t sc2 = 0; sc2 < (MAX_SMALL_SIZECLASS_BITS + 4); sc2++) + { + const size_t size2 = bits::one_at_bit(sc2); + START_TEST("size1: {}, size2:{}", size, size2); + test_reallocarr(size, 1, size2, SUCCESS, false); + } + } + + EXPECT( + our_malloc_usable_size(nullptr) == 0, + "malloc_usable_size(nullptr) should be zero"); + + snmalloc::debug_check_empty(); return 0; } diff --git a/src/test/func/memcpy/func-memcpy.cc b/src/test/func/memcpy/func-memcpy.cc new file mode 100644 index 000000000..ff1856fac --- /dev/null +++ b/src/test/func/memcpy/func-memcpy.cc @@ -0,0 +1,174 @@ +// Windows doesn't like changing the linkage spec of abort. +#if defined(_MSC_VER) +int main() +{ + return 0; +} +#else +// QEMU user mode does not support the code that generates backtraces and so we +// also need to skip this test if we are doing a debug build and targeting +// QEMU. +# if defined(SNMALLOC_QEMU_WORKAROUND) && defined(SNMALLOC_BACKTRACE_HEADER) +# undef SNMALLOC_BACKTRACE_HEADER +# endif +# ifdef SNMALLOC_STATIC_LIBRARY_PREFIX +# undef SNMALLOC_STATIC_LIBRARY_PREFIX +# endif +# ifdef SNMALLOC_FAIL_FAST +# undef SNMALLOC_FAIL_FAST +# endif +# define SNMALLOC_FAIL_FAST false +# define SNMALLOC_STATIC_LIBRARY_PREFIX my_ +# ifndef SNMALLOC_PASS_THROUGH +# include "snmalloc/override/malloc.cc" +# else +# define my_malloc(x) malloc(x) +# define my_free(x) free(x) +# endif +# include "snmalloc/override/memcpy.cc" +# include "test/helpers.h" + +# include +# include +# include +# include +# include + +using namespace snmalloc; + +/** + * Jump buffer used to jump out of `abort()` for recoverable errors. + */ +static std::jmp_buf jmp; + +/** + * Flag indicating whether `jmp` is valid. If this is set then calls to + * `abort` will jump to the jump buffer, rather than exiting. + */ +static bool can_longjmp; + +/** + * Replacement for the C standard `abort` that returns to the `setjmp` call for + * recoverable errors. + */ +extern "C" void abort() +{ + if (can_longjmp) + { + longjmp(jmp, 1); + } + exit(-1); +} + +/** + * Check that memcpy works in correct use. This allocates a pair of buffers, + * fills one with a well-known pattern, and then copies subsets of this at + * one-byte increments to a target. This gives us unaligned starts. + */ +void check_size(size_t size) +{ + START_TEST("checking {}-byte memcpy", size); + auto* s = static_cast(my_malloc(size + 1)); + auto* d = static_cast(my_malloc(size + 1)); + d[size] = 0; + s[size] = 255; + for (size_t start = 0; start < size; start++) + { + unsigned char* src = s + start; + unsigned char* dst = d + start; + size_t sz = (size - start); + for (size_t i = 0; i < sz; ++i) + { + src[i] = static_cast(i); + } + for (size_t i = 0; i < sz; ++i) + { + dst[i] = 0; + } + void* ret = my_memcpy(dst, src, sz); + EXPECT(ret == dst, "Return value should be {}, was {}", dst, ret); + for (size_t i = 0; i < sz; ++i) + { + if (dst[i] != static_cast(i)) + { + fprintf( + stderr, + "Testing size %zd %hhx == %hhx\n", + sz, + static_cast(i), + dst[i]); + } + EXPECT( + dst[i] == (unsigned char)i, + "dst[i] == {}, i == {}", + size_t(dst[i]), + i & 0xff); + } + EXPECT(d[size] == 0, "d[size] == {}", d[size]); + } + my_free(s); + my_free(d); +} + +void check_bounds(size_t size, size_t out_of_bounds) +{ + START_TEST( + "memcpy bounds, size {}, {} bytes out of bounds", size, out_of_bounds); + auto* s = static_cast(my_malloc(size)); + auto* d = static_cast(my_malloc(size)); + for (size_t i = 0; i < size; ++i) + { + s[i] = static_cast(i); + } + for (size_t i = 0; i < size; ++i) + { + d[i] = 0; + } + bool bounds_error = false; + can_longjmp = true; + if (setjmp(jmp) == 0) + { + my_memcpy(d, s, size + out_of_bounds); + } + else + { + bounds_error = true; + } + can_longjmp = false; + EXPECT( + bounds_error == (out_of_bounds > 0), + "bounds error: {}, out_of_bounds: {}", + bounds_error, + out_of_bounds); + my_free(s); + my_free(d); +} + +int main() +{ + // Skip the checks that expect bounds checks to fail when we are not the + // malloc implementation. +# if !defined(SNMALLOC_PASS_THROUGH) + // Some sizes to check for out-of-bounds access. As we are only able to + // catch overflows past the end of the sizeclass-padded allocation, make + // sure we don't try to test on smaller allocations. + std::initializer_list sizes = {MIN_ALLOC_SIZE, 1024, 2 * 1024 * 1024}; + static_assert( + MIN_ALLOC_SIZE < 1024, + "Can't detect overflow except at sizeclass boundaries"); + for (auto sz : sizes) + { + // Check in bounds + check_bounds(sz, 0); + // Check one byte out + check_bounds(sz, 1); + // Check one object out of bounds + check_bounds(sz, sz); + } +# endif + for (size_t x = 0; x < 2048; x++) + { + check_size(x); + } +} +#endif diff --git a/src/test/func/memory/memory.cc b/src/test/func/memory/memory.cc index cb8e0e7ea..2a2ada2ee 100644 --- a/src/test/func/memory/memory.cc +++ b/src/test/func/memory/memory.cc @@ -1,12 +1,14 @@ #include -#include +#include #include #include #include #include -#if defined(__linux__) && !defined(SNMALLOC_QEMU_WORKAROUND) +#include +#if ((defined(__linux__) && !defined(__ANDROID__)) || defined(__sun)) && \ + !defined(SNMALLOC_QEMU_WORKAROUND) /* - * We only test allocations with limited AS on linux for now. + * We only test allocations with limited AS on linux and Solaris for now. * It should be a good representative for POSIX systems. * QEMU `setrlimit64` does not behave as the same as native linux, * so we need to exclude it from such tests. @@ -19,13 +21,16 @@ # define KiB (1024ull) # define MiB (KiB * KiB) # define GiB (KiB * MiB) +#else +using rlim64_t = size_t; #endif using namespace snmalloc; -#ifdef TEST_LIMITED void test_limited(rlim64_t as_limit, size_t& count) { + UNUSED(as_limit, count); +#if false && defined(TEST_LIMITED) auto pid = fork(); if (!pid) { @@ -54,10 +59,10 @@ void test_limited(rlim64_t as_limit, size_t& count) upper_bound = std::min( upper_bound, static_cast(info.freeram >> 3u)); std::cout << "trying to alloc " << upper_bound / KiB << " KiB" << std::endl; - auto alloc = ThreadAlloc::get(); + auto& alloc = ThreadAlloc::get(); std::cout << "allocator initialised" << std::endl; - auto chunk = alloc->alloc(upper_bound); - alloc->dealloc(chunk); + auto chunk = alloc.alloc(upper_bound); + alloc.dealloc(chunk); std::cout << "success" << std::endl; std::exit(0); } @@ -71,12 +76,12 @@ void test_limited(rlim64_t as_limit, size_t& count) count++; } } -} #endif +} void test_alloc_dealloc_64k() { - auto alloc = ThreadAlloc::get(); + auto& alloc = ThreadAlloc::get(); constexpr size_t count = 1 << 12; constexpr size_t outer_count = 12; @@ -89,26 +94,26 @@ void test_alloc_dealloc_64k() // This will fill the short slab, and then start a new slab. for (size_t i = 0; i < count; i++) { - garbage[i] = alloc->alloc(16); + garbage[i] = alloc.alloc(16); } // Allocate one object on the second slab - keep_alive[j] = alloc->alloc(16); + keep_alive[j] = alloc.alloc(16); for (size_t i = 0; i < count; i++) { - alloc->dealloc(garbage[i]); + alloc.dealloc(garbage[i]); } } for (size_t j = 0; j < outer_count; j++) { - alloc->dealloc(keep_alive[j]); + alloc.dealloc(keep_alive[j]); } } void test_random_allocation() { - auto alloc = ThreadAlloc::get(); + auto& alloc = ThreadAlloc::get(); std::unordered_set allocated; constexpr size_t count = 10000; @@ -130,14 +135,14 @@ void test_random_allocation() auto& cell = objects[index % count]; if (cell != nullptr) { - alloc->dealloc(cell); allocated.erase(cell); + alloc.dealloc(cell); cell = nullptr; alloc_count--; } if (!just_dealloc) { - cell = alloc->alloc(16); + cell = alloc.alloc(16); auto pair = allocated.insert(cell); // Check not already allocated SNMALLOC_CHECK(pair.second); @@ -155,20 +160,20 @@ void test_random_allocation() // Deallocate all the remaining objects for (size_t i = 0; i < count; i++) if (objects[i] != nullptr) - alloc->dealloc(objects[i]); + alloc.dealloc(objects[i]); } void test_calloc() { - auto alloc = ThreadAlloc::get(); + auto& alloc = ThreadAlloc::get(); for (size_t size = 16; size <= (1 << 24); size <<= 1) { - void* p = alloc->alloc(size); + void* p = alloc.alloc(size); memset(p, 0xFF, size); - alloc->dealloc(p, size); + alloc.dealloc(p, size); - p = alloc->alloc(size); + p = alloc.alloc(size); for (size_t i = 0; i < size; i++) { @@ -176,94 +181,111 @@ void test_calloc() abort(); } - alloc->dealloc(p, size); + alloc.dealloc(p, size); } - current_alloc_pool()->debug_check_empty(); + snmalloc::debug_check_empty(); } void test_double_alloc() { - auto* a1 = current_alloc_pool()->acquire(); - auto* a2 = current_alloc_pool()->acquire(); - - const size_t n = (1 << 16) / 32; - - for (size_t k = 0; k < 4; k++) { - std::unordered_set set1; - std::unordered_set set2; + auto a1 = snmalloc::get_scoped_allocator(); + auto a2 = snmalloc::get_scoped_allocator(); - for (size_t i = 0; i < (n * 2); i++) - { - void* p = a1->alloc(20); - SNMALLOC_CHECK(set1.find(p) == set1.end()); - set1.insert(p); - } + const size_t n = (1 << 16) / 32; - for (size_t i = 0; i < (n * 2); i++) + for (size_t k = 0; k < 4; k++) { - void* p = a2->alloc(20); - SNMALLOC_CHECK(set2.find(p) == set2.end()); - set2.insert(p); - } + std::unordered_set set1; + std::unordered_set set2; - while (!set1.empty()) - { - auto it = set1.begin(); - a2->dealloc(*it, 20); - set1.erase(it); - } + for (size_t i = 0; i < (n * 2); i++) + { + void* p = a1->alloc(20); + SNMALLOC_CHECK(set1.find(p) == set1.end()); + set1.insert(p); + } - while (!set2.empty()) - { - auto it = set2.begin(); - a1->dealloc(*it, 20); - set2.erase(it); + for (size_t i = 0; i < (n * 2); i++) + { + void* p = a2->alloc(20); + SNMALLOC_CHECK(set2.find(p) == set2.end()); + set2.insert(p); + } + + while (!set1.empty()) + { + auto it = set1.begin(); + a2->dealloc(*it, 20); + set1.erase(it); + } + + while (!set2.empty()) + { + auto it = set2.begin(); + a1->dealloc(*it, 20); + set2.erase(it); + } } } - - current_alloc_pool()->release(a1); - current_alloc_pool()->release(a2); - current_alloc_pool()->debug_check_empty(); + snmalloc::debug_check_empty(); } void test_external_pointer() { // Malloc does not have an external pointer querying mechanism. - auto alloc = ThreadAlloc::get(); + auto& alloc = ThreadAlloc::get(); - for (uint8_t sc = 0; sc < NUM_SIZECLASSES; sc++) + for (uint8_t sc = 0; sc < NUM_SMALL_SIZECLASSES; sc++) { size_t size = sizeclass_to_size(sc); - void* p1 = alloc->alloc(size); + void* p1 = alloc.alloc(size); + + if (size != alloc.alloc_size(p1)) + { + std::cout << "Requested size: " << size + << " alloc_size: " << alloc.alloc_size(p1) << std::endl; + abort(); + } for (size_t offset = 0; offset < size; offset += 17) { void* p2 = pointer_offset(p1, offset); - void* p3 = alloc->external_pointer(p2); - void* p4 = alloc->external_pointer(p2); + void* p3 = alloc.external_pointer(p2); + void* p4 = alloc.external_pointer(p2); if (p1 != p3) { - std::cout << "size: " << size << " offset: " << offset << " p1: " << p1 - << " p3: " << p3 << std::endl; + std::cout << "size: " << size << " alloc_size: " << alloc.alloc_size(p1) + << " offset: " << offset << " p1: " << p1 << " p3: " << p3 + << std::endl; } SNMALLOC_CHECK(p1 == p3); + if ((size_t)p4 != (size_t)p1 + size - 1) + { + std::cout << "size: " << size << " end(p4): " << p4 << " p1: " << p1 + << " p1+size-1: " << pointer_offset(p1, size - 1) + << std::endl; + } SNMALLOC_CHECK((size_t)p4 == (size_t)p1 + size - 1); } - alloc->dealloc(p1, size); + alloc.dealloc(p1, size); } - current_alloc_pool()->debug_check_empty(); + snmalloc::debug_check_empty(); }; void check_offset(void* base, void* interior) { - auto alloc = ThreadAlloc::get(); - void* calced_base = alloc->external_pointer((void*)interior); + auto& alloc = ThreadAlloc::get(); + void* calced_base = alloc.external_pointer((void*)interior); if (calced_base != (void*)base) + { + std::cout << "Calced base: " << calced_base << " actual base: " << base + << " for interior: " << interior << std::endl; abort(); + } } void check_external_pointer_large(size_t* base) @@ -281,9 +303,9 @@ void test_external_pointer_large() { xoroshiro::p128r64 r; - auto alloc = ThreadAlloc::get(); + auto& alloc = ThreadAlloc::get(); - constexpr size_t count_log = snmalloc::bits::is64() ? 5 : 3; + constexpr size_t count_log = DefaultPal::address_bits > 32 ? 5 : 3; constexpr size_t count = 1 << count_log; // Pre allocate all the objects size_t* objects[count]; @@ -292,14 +314,14 @@ void test_external_pointer_large() for (size_t i = 0; i < count; i++) { - size_t b = SUPERSLAB_BITS + 3; + size_t b = MAX_SMALL_SIZECLASS_BITS + 3; size_t rand = r.next() & ((1 << b) - 1); size_t size = (1 << 24) + rand; total_size += size; // store object - objects[i] = (size_t*)alloc->alloc(size); + objects[i] = (size_t*)alloc.alloc(size); // Store allocators size for this object - *objects[i] = alloc->alloc_size(objects[i]); + *objects[i] = alloc.alloc_size(objects[i]); check_external_pointer_large(objects[i]); if (i > 0) @@ -317,87 +339,111 @@ void test_external_pointer_large() // Deallocate everything for (size_t i = 0; i < count; i++) { - alloc->dealloc(objects[i]); + alloc.dealloc(objects[i]); } } void test_external_pointer_dealloc_bug() { - auto alloc = ThreadAlloc::get(); - constexpr size_t count = (SUPERSLAB_SIZE / SLAB_SIZE) * 2; + std::cout << "Testing external pointer dealloc bug" << std::endl; + auto& alloc = ThreadAlloc::get(); + constexpr size_t count = MIN_CHUNK_SIZE; void* allocs[count]; for (size_t i = 0; i < count; i++) { - allocs[i] = alloc->alloc(SLAB_SIZE / 2); + allocs[i] = alloc.alloc(MIN_CHUNK_BITS / 2); } for (size_t i = 1; i < count; i++) { - alloc->dealloc(allocs[i]); + alloc.dealloc(allocs[i]); } for (size_t i = 0; i < count; i++) { - alloc->external_pointer(allocs[i]); + alloc.external_pointer(allocs[i]); + } + + alloc.dealloc(allocs[0]); + std::cout << "Testing external pointer dealloc bug - done" << std::endl; +} + +void test_external_pointer_stack() +{ + std::cout << "Testing external pointer stack" << std::endl; + + std::array stack; + + auto& alloc = ThreadAlloc::get(); + + for (size_t i = 0; i < stack.size(); i++) + { + if (alloc.external_pointer(&stack[i]) > &stack[i]) + { + std::cout << "Stack pointer: " << &stack[i] + << " external pointer: " << alloc.external_pointer(&stack[i]) + << std::endl; + abort(); + } } - alloc->dealloc(allocs[0]); + std::cout << "Testing external pointer stack - done" << std::endl; } void test_alloc_16M() { - auto alloc = ThreadAlloc::get(); + auto& alloc = ThreadAlloc::get(); // sizes >= 16M use large_alloc const size_t size = 16'000'000; - void* p1 = alloc->alloc(size); - SNMALLOC_CHECK(alloc->alloc_size(alloc->external_pointer(p1)) >= size); - alloc->dealloc(p1); + void* p1 = alloc.alloc(size); + SNMALLOC_CHECK(alloc.alloc_size(alloc.external_pointer(p1)) >= size); + alloc.dealloc(p1); } void test_calloc_16M() { - auto alloc = ThreadAlloc::get(); + auto& alloc = ThreadAlloc::get(); // sizes >= 16M use large_alloc const size_t size = 16'000'000; - void* p1 = alloc->alloc(size); - SNMALLOC_CHECK(alloc->alloc_size(alloc->external_pointer(p1)) >= size); - alloc->dealloc(p1); + void* p1 = alloc.alloc(size); + SNMALLOC_CHECK(alloc.alloc_size(alloc.external_pointer(p1)) >= size); + alloc.dealloc(p1); } void test_calloc_large_bug() { - auto alloc = ThreadAlloc::get(); + auto& alloc = ThreadAlloc::get(); // Perform large calloc, to check for correct zeroing from PAL. // Some PALS have special paths for PAGE aligned zeroing of large // allocations. This is a large allocation that is intentionally // not a multiple of page size. - const size_t size = (SUPERSLAB_SIZE << 3) - 7; + const size_t size = (MAX_SMALL_SIZECLASS_SIZE << 3) - 7; - void* p1 = alloc->alloc(size); - SNMALLOC_CHECK(alloc->alloc_size(alloc->external_pointer(p1)) >= size); - alloc->dealloc(p1); + void* p1 = alloc.alloc(size); + SNMALLOC_CHECK(alloc.alloc_size(alloc.external_pointer(p1)) >= size); + alloc.dealloc(p1); } template void test_static_sized_alloc() { - auto alloc = ThreadAlloc::get(); - auto p = alloc->alloc(); + auto& alloc = ThreadAlloc::get(); + auto p = alloc.alloc(); static_assert((dealloc >= 0) && (dealloc <= 2), "bad dealloc flavor"); switch (dealloc) { case 0: - alloc->dealloc(p); + alloc.dealloc(p); break; case 1: - alloc->dealloc(p, asz); + alloc.dealloc(p, asz); break; case 2: - alloc->dealloc(p); + alloc.dealloc(p); break; } } @@ -406,18 +452,67 @@ void test_static_sized_allocs() { // For each small, medium, and large class, do each kind dealloc. This is // mostly to ensure that all of these forms compile. + for (size_t sc = 0; sc < NUM_SMALL_SIZECLASSES; sc++) + { + // test_static_sized_alloc(); + // test_static_sized_alloc(); + // test_static_sized_alloc(); + } + // test_static_sized_alloc(); + // test_static_sized_alloc(); + // test_static_sized_alloc(); - test_static_sized_alloc(); - test_static_sized_alloc(); - test_static_sized_alloc(); + // test_static_sized_alloc(); + // test_static_sized_alloc(); + // test_static_sized_alloc(); +} - test_static_sized_alloc(); - test_static_sized_alloc(); - test_static_sized_alloc(); +void test_remaining_bytes() +{ + auto& alloc = ThreadAlloc::get(); + for (size_t sc = 0; sc < NUM_SMALL_SIZECLASSES; sc++) + { + auto size = sizeclass_to_size(sc); + char* p = (char*)alloc.alloc(size); + for (size_t offset = 0; offset < size; offset++) + { + auto rem = alloc.remaining_bytes(address_cast(pointer_offset(p, offset))); + if (rem != (size - offset)) + { + printf( + "Allocation size: %zu, Offset: %zu, Remaining bytes: %zu, " + "Expected: %zu\n", + size, + offset, + rem, + size - offset); + abort(); + } + } + alloc.dealloc(p); + } +} + +void test_consolidaton_bug() +{ + /** + * Check for consolidation of various sizes, but allocating and deallocating, + * then requesting larger sizes. See issue #506 + */ + auto& alloc = ThreadAlloc::get(); - test_static_sized_alloc(); - test_static_sized_alloc(); - test_static_sized_alloc(); + for (size_t i = 0; i < 27; i++) + { + std::vector allocs; + for (size_t j = 0; j < 4; j++) + { + allocs.push_back(alloc.alloc(bits::one_at_bit(i))); + } + for (auto a : allocs) + { + alloc.dealloc(a); + } + } } int main(int argc, char** argv) @@ -442,22 +537,23 @@ int main(int argc, char** argv) size_t seed = opt.is("--seed", 0); Virtual::systematic_bump_ptr() += seed << 17; #else - UNUSED(argc); - UNUSED(argv); + UNUSED(argc, argv); #endif - test_alloc_dealloc_64k(); test_random_allocation(); test_calloc(); test_double_alloc(); #ifndef SNMALLOC_PASS_THROUGH // Depends on snmalloc specific features + test_remaining_bytes(); test_static_sized_allocs(); test_calloc_large_bug(); + test_external_pointer_stack(); test_external_pointer_dealloc_bug(); test_external_pointer_large(); test_external_pointer(); test_alloc_16M(); test_calloc_16M(); #endif + test_consolidaton_bug(); return 0; } diff --git a/src/test/func/memory_usage/memory_usage.cc b/src/test/func/memory_usage/memory_usage.cc index 47e4ce9c8..6d7dc40f6 100644 --- a/src/test/func/memory_usage/memory_usage.cc +++ b/src/test/func/memory_usage/memory_usage.cc @@ -2,14 +2,13 @@ * Memory usage test * Query memory usage repeatedly */ - #include #include #include #define SNMALLOC_NAME_MANGLE(a) our_##a -#include "../../../override/malloc-extensions.cc" -#include "../../../override/malloc.cc" +#include "../../../snmalloc/override/malloc-extensions.cc" +#include "../../../snmalloc/override/malloc.cc" using namespace snmalloc; @@ -35,7 +34,7 @@ bool print_memory_usage() return false; } -std::vector allocs; +std::vector allocs{}; /** * Add allocs until the statistics have changed n times. @@ -44,7 +43,8 @@ void add_n_allocs(size_t n) { while (true) { - allocs.push_back(our_malloc(1024)); + auto p = our_malloc(1024); + allocs.push_back(p); if (print_memory_usage()) { n--; @@ -61,7 +61,10 @@ void remove_n_allocs(size_t n) { while (true) { - our_free(allocs.back()); + if (allocs.empty()) + return; + auto p = allocs.back(); + our_free(p); allocs.pop_back(); if (print_memory_usage()) { diff --git a/src/test/func/pagemap/pagemap.cc b/src/test/func/pagemap/pagemap.cc index 887e92739..e6318dfbc 100644 --- a/src/test/func/pagemap/pagemap.cc +++ b/src/test/func/pagemap/pagemap.cc @@ -6,51 +6,126 @@ * but no examples were using multiple levels of pagemap. */ -#include #include -#include +#include #include using namespace snmalloc; -using T = size_t; -static constexpr size_t GRANULARITY_BITS = 9; -static constexpr T PRIME = 251; -Pagemap pagemap_test; - -int main(int argc, char** argv) +static constexpr size_t GRANULARITY_BITS = 20; +struct T { - UNUSED(argc); - UNUSED(argv); + size_t v = 99; + T(size_t v) : v(v) {} + T() {} +}; - setup(); +FlatPagemap pagemap_test_unbound; + +FlatPagemap pagemap_test_bound; + +size_t failure_count = 0; - constexpr int bits_to_test = bits::is64() ? 36 : 31; +void check_get( + bool bounded, address_t address, T expected, const char* file, size_t lineno) +{ T value = 0; - for (uintptr_t ptr = 0; ptr < bits::one_at_bit(bits_to_test); + if (bounded) + value = pagemap_test_bound.get(address); + else + value = pagemap_test_unbound.get(address); + + if (value.v != expected.v) + { + std::cout << "Location: " << std::hex << address << " Read: " << value.v + << " Expected: " << expected.v << " on " << file << ":" << lineno + << std::endl; + failure_count++; + } +} + +void set(bool bounded, address_t address, T new_value) +{ + if (bounded) + pagemap_test_bound.set(address, new_value); + else + pagemap_test_unbound.set(address, new_value); +} + +#define CHECK_GET(b, a, e) check_get(b, a, e, __FILE__, __LINE__) + +void test_pagemap(bool bounded) +{ + address_t low = bits::one_at_bit(23); + address_t high = bits::one_at_bit(30); + + // Nullptr needs to work before initialisation + CHECK_GET(bounded, 0, T()); + + // Initialise the pagemap + if (bounded) + { + auto size = bits::one_at_bit(30); + auto base = DefaultPal::reserve(size); + DefaultPal::notify_using(base, size); + std::cout << "Fixed base: " << base << " (" << size << ") " + << " end: " << pointer_offset(base, size) << std::endl; + auto [heap_base, heap_size] = pagemap_test_bound.init(base, size); + std::cout << "Heap base: " << heap_base << " (" << heap_size << ") " + << " end: " << pointer_offset(heap_base, heap_size) << std::endl; + low = address_cast(heap_base); + high = low + heap_size; + } + else + { + pagemap_test_unbound.init(); + pagemap_test_unbound.register_range(low, high - low); + } + + // Nullptr should still work after init. + CHECK_GET(bounded, 0, T()); + + // Store a pattern into page map + T value = 1; + for (address_t ptr = low; ptr < high; ptr += bits::one_at_bit(GRANULARITY_BITS + 3)) { - pagemap_test.set(ptr, value); - value++; - if (value == PRIME) + set(bounded, ptr, value); + value.v++; + if (value.v == T().v) value = 0; - if ((ptr % (1ULL << 32)) == 0) + if (((ptr - low) % (1ULL << 26)) == 0) std::cout << "." << std::flush; } + // Check pattern is correctly stored std::cout << std::endl; - value = 0; - for (uintptr_t ptr = 0; ptr < bits::one_at_bit(bits_to_test); + value = 1; + for (address_t ptr = low; ptr < high; ptr += bits::one_at_bit(GRANULARITY_BITS + 3)) { - T result = pagemap_test.get(ptr); - if (value != result) - Pal::error("Pagemap corrupt!"); - value++; - if (value == PRIME) + CHECK_GET(bounded, ptr, value); + value.v++; + if (value.v == T().v) value = 0; - if ((ptr % (1ULL << 32)) == 0) + if (((ptr - low) % (1ULL << 26)) == 0) std::cout << "." << std::flush; } std::cout << std::endl; } + +int main(int argc, char** argv) +{ + UNUSED(argc, argv); + + setup(); + + test_pagemap(false); + test_pagemap(true); + + if (failure_count != 0) + { + std::cout << "Failure count: " << failure_count << std::endl; + abort(); + } +} diff --git a/src/test/func/pool/pool.cc b/src/test/func/pool/pool.cc new file mode 100644 index 000000000..7eaca6bcd --- /dev/null +++ b/src/test/func/pool/pool.cc @@ -0,0 +1,137 @@ +#include +#include +#include +#include + +using namespace snmalloc; + +struct PoolAEntry : Pooled +{ + int field; + + PoolAEntry() : field(1){}; +}; + +using PoolA = Pool; + +struct PoolBEntry : Pooled +{ + int field; + + PoolBEntry() : field(0){}; + PoolBEntry(int f) : field(f){}; +}; + +using PoolB = Pool; + +void test_alloc() +{ + auto ptr = PoolA::acquire(); + SNMALLOC_CHECK(ptr != nullptr); + // Pool allocations should not be visible to debug_check_empty. + snmalloc::debug_check_empty(); + PoolA::release(ptr); +} + +void test_constructor() +{ + auto ptr1 = PoolA::acquire(); + SNMALLOC_CHECK(ptr1 != nullptr); + SNMALLOC_CHECK(ptr1->field == 1); + + auto ptr2 = PoolB::acquire(); + SNMALLOC_CHECK(ptr2 != nullptr); + SNMALLOC_CHECK(ptr2->field == 0); + + auto ptr3 = PoolB::acquire(1); + SNMALLOC_CHECK(ptr3 != nullptr); + SNMALLOC_CHECK(ptr3->field == 1); + + PoolA::release(ptr1); + PoolB::release(ptr2); + PoolB::release(ptr3); +} + +void test_alloc_many() +{ + constexpr size_t count = 16'000'000 / MIN_CHUNK_SIZE; + + std::unordered_set allocated; + + for (size_t i = 0; i < count; ++i) + { + auto ptr = PoolA::acquire(); + SNMALLOC_CHECK(ptr != nullptr); + allocated.insert(ptr); + } + + for (auto ptr : allocated) + { + PoolA::release(ptr); + } +} + +void test_double_alloc() +{ + auto ptr1 = PoolA::acquire(); + SNMALLOC_CHECK(ptr1 != nullptr); + auto ptr2 = PoolA::acquire(); + SNMALLOC_CHECK(ptr2 != nullptr); + SNMALLOC_CHECK(ptr1 != ptr2); + PoolA::release(ptr2); + auto ptr3 = PoolA::acquire(); + SNMALLOC_CHECK(ptr2 == ptr3); + PoolA::release(ptr1); + PoolA::release(ptr3); +} + +void test_different_alloc() +{ + auto ptr1 = PoolA::acquire(); + SNMALLOC_CHECK(ptr1 != nullptr); + PoolA::release(ptr1); + auto ptr2 = PoolB::acquire(); + SNMALLOC_CHECK(ptr2 != nullptr); + SNMALLOC_CHECK(static_cast(ptr1) != static_cast(ptr2)); + PoolB::release(ptr2); +} + +void test_iterator() +{ + PoolAEntry* before_iteration_ptr = PoolA::acquire(); + + PoolAEntry* ptr = nullptr; + while ((ptr = PoolA::iterate(ptr)) != nullptr) + { + ptr->field = 2; + } + + SNMALLOC_CHECK(before_iteration_ptr->field == 2); + + PoolAEntry* after_iteration_ptr = PoolA::acquire(); + + SNMALLOC_CHECK(after_iteration_ptr->field == 2); + + PoolA::release(before_iteration_ptr); + PoolA::release(after_iteration_ptr); +} + +int main(int argc, char** argv) +{ + setup(); +#ifdef USE_SYSTEMATIC_TESTING + opt::Opt opt(argc, argv); + size_t seed = opt.is("--seed", 0); + Virtual::systematic_bump_ptr() += seed << 17; +#else + UNUSED(argc, argv); +#endif + + test_alloc(); + test_constructor(); + test_alloc_many(); + test_double_alloc(); + test_different_alloc(); + test_iterator(); + return 0; +} diff --git a/src/test/func/redblack/redblack.cc b/src/test/func/redblack/redblack.cc new file mode 100644 index 000000000..f13c72ebb --- /dev/null +++ b/src/test/func/redblack/redblack.cc @@ -0,0 +1,230 @@ +#include "test/opt.h" +#include "test/setup.h" +#include "test/usage.h" +#include "test/xoroshiro.h" + +#include +#include +#include +#include + +#ifndef SNMALLOC_TRACING +# define SNMALLOC_TRACING +#endif +// Redblack tree needs some libraries with trace enabled. +#include "snmalloc/snmalloc.h" + +struct NodeRef +{ + // The redblack tree is going to be used inside the pagemap, + // and the redblack tree cannot use all the bits. Applying an offset + // to the stored value ensures that we have some abstraction over + // the representation. + static constexpr size_t offset = 10000; + + size_t* ptr; + constexpr NodeRef(size_t* p) : ptr(p) {} + constexpr NodeRef() : ptr(nullptr) {} + constexpr NodeRef(const NodeRef& other) : ptr(other.ptr) {} + constexpr NodeRef(NodeRef&& other) : ptr(other.ptr) {} + + bool operator!=(const NodeRef& other) const + { + return ptr != other.ptr; + } + NodeRef& operator=(const NodeRef& other) + { + ptr = other.ptr; + return *this; + } + void set(uint16_t val) + { + *ptr = ((size_t(val) + offset) << 1) + (*ptr & 1); + } + explicit operator uint16_t() + { + return uint16_t((*ptr >> 1) - offset); + } + explicit operator size_t*() + { + return ptr; + } +}; + +// Simple representation that is like the pagemap. +// Bottom bit of left is used to store the colour. +// We shift the fields up to make room for the colour. +struct node +{ + size_t left; + size_t right; +}; + +inline static node array[2048]; + +class Rep +{ +public: + using key = uint16_t; + + static constexpr key null = 0; + static constexpr size_t root{NodeRef::offset << 1}; + + using Handle = NodeRef; + using Contents = uint16_t; + + static void set(Handle ptr, Contents r) + { + ptr.set(r); + } + + static Contents get(Handle ptr) + { + return static_cast(ptr); + } + + static Handle ref(bool direction, key k) + { + if (direction) + return {&array[k].left}; + else + return {&array[k].right}; + } + + static bool is_red(key k) + { + return (array[k].left & 1) == 1; + } + + static void set_red(key k, bool new_is_red) + { + if (new_is_red != is_red(k)) + array[k].left ^= 1; + } + + static bool compare(key k1, key k2) + { + return k1 > k2; + } + + static bool equal(key k1, key k2) + { + return k1 == k2; + } + + static size_t printable(key k) + { + return k; + } + + static size_t* printable(NodeRef k) + { + return static_cast(k); + } + + static const char* name() + { + return "TestRep"; + } +}; + +template +void test(size_t size, unsigned int seed) +{ + /// Perform a pseudo-random series of + /// additions and removals from the tree. + + xoroshiro::p64r32 rand(seed); + snmalloc::RBTree tree; + std::vector entries; + + bool first = true; + std::cout << "size: " << size << " seed: " << seed << std::endl; + for (size_t i = 0; i < 20 * size; i++) + { + auto batch = 1 + rand.next() % (3 + (size / 2)); + auto op = rand.next() % 4; + if (op < 2 || first) + { + first = false; + for (auto j = batch; j > 0; j--) + { + auto index = 1 + rand.next() % size; + if (tree.insert_elem(Rep::key(index))) + { + entries.push_back(Rep::key(index)); + } + } + } + else if (op == 3) + { + for (auto j = batch; j > 0; j--) + { + if (entries.size() == 0) + continue; + auto index = rand.next() % entries.size(); + auto elem = entries[index]; + if (!tree.remove_elem(elem)) + { + std::cout << "Failed to remove element: " << elem << std::endl; + abort(); + } + entries.erase(entries.begin() + static_cast(index)); + } + } + else + { + for (auto j = batch; j > 0; j--) + { + // print(); + auto min = tree.remove_min(); + auto s = entries.size(); + if (min == 0) + break; + + entries.erase( + std::remove(entries.begin(), entries.end(), min), entries.end()); + if (s != entries.size() + 1) + { + std::cout << "Failed to remove min: " << min << std::endl; + abort(); + } + } + } + if (entries.size() == 0) + { + break; + } + } +} + +int main(int argc, char** argv) +{ + setup(); + + opt::Opt opt(argc, argv); + + auto seed = opt.is("--seed", 0); + auto size = opt.is("--size", 0); + + if (seed == 0 && size == 0) + { + for (size = 1; size <= 300; size = size + 1 + (size >> 3)) + for (seed = 1; seed < 5 + (8 * size); seed++) + { + test(size, seed); + } + + return 0; + } + + if (seed == 0 || size == 0) + { + std::cout << "Set both --seed and --size" << std::endl; + return 1; + } + + // Trace particular example + test(size, seed); + return 0; +} diff --git a/src/test/func/release-rounding/rounding.cc b/src/test/func/release-rounding/rounding.cc index b0c391cb7..f9541331c 100644 --- a/src/test/func/release-rounding/rounding.cc +++ b/src/test/func/release-rounding/rounding.cc @@ -1,6 +1,6 @@ -#include +#include +#include #include - using namespace snmalloc; // Check for all sizeclass that we correctly round every offset within @@ -15,32 +15,39 @@ int main(int argc, char** argv) UNUSED(argc); UNUSED(argv); - for (size_t size_class = 0; size_class < NUM_SIZECLASSES; size_class++) + bool failed = false; + + for (size_t size_class = 0; size_class < NUM_SMALL_SIZECLASSES; size_class++) { size_t rsize = sizeclass_to_size((uint8_t)size_class); - size_t max_offset = - size_class < NUM_SMALL_CLASSES ? SLAB_SIZE : SUPERSLAB_SIZE; + size_t max_offset = sizeclass_to_slab_size(size_class); + sizeclass_t sc = sizeclass_t::from_small_class(size_class); for (size_t offset = 0; offset < max_offset; offset++) { - size_t rounded = (offset / rsize) * rsize; + size_t mod = offset % rsize; bool mod_0 = (offset % rsize) == 0; - size_t opt_rounded = round_by_sizeclass(size_class, offset); - if (rounded != opt_rounded) + size_t opt_mod = index_in_object(sc, offset); + if (mod != opt_mod) { std::cout << "rsize " << rsize << " offset " << offset << " opt " - << opt_rounded << std::endl; - abort(); + << opt_mod << " correct " << mod << std::endl + << std::flush; + failed = true; } - bool opt_mod_0 = is_multiple_of_sizeclass(size_class, offset); + bool opt_mod_0 = is_start_of_object(sc, offset); if (opt_mod_0 != mod_0) { - std::cout << "rsize " << rsize << " offset " << offset << " opt_mod " - << opt_mod_0 << std::endl; - abort(); + std::cout << "rsize " << rsize << " offset " << offset + << " opt_mod0 " << opt_mod_0 << " correct " << mod_0 + << std::endl + << std::flush; + failed = true; } } + if (failed) + abort(); } return 0; } diff --git a/src/test/func/sandbox/sandbox.cc b/src/test/func/sandbox/sandbox.cc index 1b4ffa352..519488070 100644 --- a/src/test/func/sandbox/sandbox.cc +++ b/src/test/func/sandbox/sandbox.cc @@ -1,4 +1,4 @@ -#ifdef SNMALLOC_PASS_THROUGH +#if defined(SNMALLOC_PASS_THROUGH) || true /* * This test does not make sense with malloc pass-through, skip it. */ @@ -16,25 +16,14 @@ using namespace snmalloc; namespace { - /** - * Helper for Alloc that is never used as a thread-local allocator and so is - * always initialised. - * - * CapPtr-vs-MSVC triggering; xref CapPtr's constructor - */ - bool never_init(void*) - { - return false; - } /** * Helper for Alloc that never needs lazy initialisation. * * CapPtr-vs-MSVC triggering; xref CapPtr's constructor */ - void* no_op_init(function_ref) + void no_op_register_clean_up() { SNMALLOC_CHECK(0 && "Should never be called!"); - return nullptr; } /** * Sandbox class. Allocates a memory region and an allocator that can @@ -71,7 +60,7 @@ namespace * outside the sandbox proper: no memory allocation operations and * amplification confined to sandbox memory. */ - using NoOpMemoryProvider = MemoryProviderStateMixin; + using NoOpMemoryProvider = ChunkAllocator; /** * Type for the allocator that lives outside of the sandbox and allocates @@ -81,12 +70,12 @@ namespace * memory. It (insecurely) routes messages to in-sandbox snmallocs, * though, so it can free any sandbox-backed snmalloc allocation. */ - using ExternalAlloc = Allocator< - never_init, - no_op_init, - NoOpMemoryProvider, - SNMALLOC_DEFAULT_CHUNKMAP, - false>; + using ExternalCoreAlloc = + Allocator; + + using ExternalAlloc = + LocalAllocator; + /** * Proxy class that forwards requests for large allocations to the real * memory provider. @@ -158,8 +147,9 @@ namespace * Note that a real version of this would not have access to the shared * pagemap and would not be used outside of the sandbox. */ + using InternalCoreAlloc = Allocator; using InternalAlloc = - Allocator; + LocalAllocator; /** * The start of the sandbox memory region. @@ -253,7 +243,7 @@ namespace // Use the outside-sandbox snmalloc to allocate memory, rather than using // the PAL directly, so that our out-of-sandbox can amplify sandbox // pointers - return ThreadAlloc::get_noncachable()->alloc(sb_size); + return ThreadAlloc::get().alloc(sb_size); } }; } @@ -269,7 +259,7 @@ int main() auto check = [](Sandbox& sb, auto& alloc, size_t sz) { void* ptr = alloc.alloc(sz); SNMALLOC_CHECK(sb.is_in_sandbox_heap(ptr, sz)); - ThreadAlloc::get_noncachable()->dealloc(ptr); + ThreadAlloc::get().dealloc(ptr); }; auto check_with_sb = [&](Sandbox& sb) { // Check with a range of sizes diff --git a/src/test/func/sizeclass/sizeclass.cc b/src/test/func/sizeclass/sizeclass.cc index 696d0140b..d42794e44 100644 --- a/src/test/func/sizeclass/sizeclass.cc +++ b/src/test/func/sizeclass/sizeclass.cc @@ -1,9 +1,9 @@ #include -#include +#include #include NOINLINE -snmalloc::sizeclass_t size_to_sizeclass(size_t size) +snmalloc::smallsizeclass_t size_to_sizeclass(size_t size) { return snmalloc::size_to_sizeclass(size); } @@ -15,7 +15,7 @@ void test_align_size() SNMALLOC_CHECK(snmalloc::aligned_size(128, 160) == 256); for (size_t size = 1; - size < snmalloc::sizeclass_to_size(snmalloc::NUM_SIZECLASSES - 1); + size < snmalloc::sizeclass_to_size(snmalloc::NUM_SMALL_SIZECLASSES - 1); size++) { size_t rsize = snmalloc::round_size(size); @@ -38,7 +38,8 @@ void test_align_size() failed |= true; } - for (size_t alignment_bits = 0; alignment_bits < snmalloc::SUPERSLAB_BITS; + for (size_t alignment_bits = 0; + alignment_bits < snmalloc::MAX_SMALL_SIZECLASS_BITS; alignment_bits++) { auto alignment = (size_t)1 << alignment_bits; @@ -76,11 +77,17 @@ int main(int, char**) std::cout << "sizeclass |-> [size_low, size_high] " << std::endl; - for (snmalloc::sizeclass_t sz = 0; sz < snmalloc::NUM_SIZECLASSES; sz++) + size_t slab_size = 0; + for (snmalloc::smallsizeclass_t sz = 0; sz < snmalloc::NUM_SMALL_SIZECLASSES; + sz++) { - // Separate printing for small and medium sizeclasses - if (sz == snmalloc::NUM_SMALL_CLASSES) + if ( + sz < snmalloc::NUM_SMALL_SIZECLASSES && + slab_size != snmalloc::sizeclass_to_slab_size(sz)) + { + slab_size = snmalloc::sizeclass_to_slab_size(sz); std::cout << std::endl; + } size_t size = snmalloc::sizeclass_to_size(sz); std::cout << (size_t)sz << " |-> " @@ -111,4 +118,4 @@ int main(int, char**) abort(); test_align_size(); -} \ No newline at end of file +} diff --git a/src/test/func/statistics/stats.cc b/src/test/func/statistics/stats.cc index bd9dfec86..2de3e2d9b 100644 --- a/src/test/func/statistics/stats.cc +++ b/src/test/func/statistics/stats.cc @@ -1,41 +1,41 @@ -#include +#include int main() { #ifndef SNMALLOC_PASS_THROUGH // This test depends on snmalloc internals - snmalloc::Alloc* a = snmalloc::ThreadAlloc::get(); + snmalloc::Alloc& a = snmalloc::ThreadAlloc::get(); bool result; - auto r = a->alloc(16); + auto r = a.alloc(16); - snmalloc::current_alloc_pool()->debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != false) { abort(); } - a->dealloc(r); + a.dealloc(r); - snmalloc::current_alloc_pool()->debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != true) { abort(); } - r = a->alloc(16); + r = a.alloc(16); - snmalloc::current_alloc_pool()->debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != false) { abort(); } - a->dealloc(r); + a.dealloc(r); - snmalloc::current_alloc_pool()->debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != true) { abort(); } #endif -} \ No newline at end of file +} diff --git a/src/test/func/teardown/teardown.cc b/src/test/func/teardown/teardown.cc new file mode 100644 index 000000000..f68ed4d03 --- /dev/null +++ b/src/test/func/teardown/teardown.cc @@ -0,0 +1,209 @@ +/** + * After a thread has started teardown a different path is taken for + * allocation and deallocation. This tests causes the state to be torn + * down early, and then use the teardown path for multiple allocations + * and deallocation. + */ + +#include "test/setup.h" + +#include +#include +#include + +void trigger_teardown() +{ + auto& a = snmalloc::ThreadAlloc::get(); + // Trigger init + void* r = a.alloc(16); + a.dealloc(r); + // Force teardown + a.teardown(); +} + +void alloc1(size_t size) +{ + trigger_teardown(); + void* r = snmalloc::ThreadAlloc::get().alloc(size); + snmalloc::ThreadAlloc::get().dealloc(r); +} + +void alloc2(size_t size) +{ + trigger_teardown(); + auto& a = snmalloc::ThreadAlloc::get(); + void* r = a.alloc(size); + a.dealloc(r); +} + +void alloc3(size_t size) +{ + trigger_teardown(); + auto& a = snmalloc::ThreadAlloc::get(); + void* r = a.alloc(size); + a.dealloc(r, size); +} + +void alloc4(size_t size) +{ + trigger_teardown(); + auto& a = snmalloc::ThreadAlloc::get(); + void* r = a.alloc(size); + a.dealloc(r); +} + +void check_calloc(void* p, size_t size) +{ + if (p != nullptr) + { + for (size_t i = 0; i < size; i++) + { + if (((uint8_t*)p)[i] != 0) + { + std::cout << "Calloc contents:" << std::endl; + for (size_t j = 0; j < size; j++) + { + std::cout << std::hex << (size_t)((uint8_t*)p)[j] << " "; + if (j % 32 == 0) + std::cout << std::endl; + } + abort(); + } + // ((uint8_t*)p)[i] = 0x5a; + } + } +} + +void calloc1(size_t size) +{ + trigger_teardown(); + void* r = + snmalloc::ThreadAlloc::get().alloc(size); + check_calloc(r, size); + snmalloc::ThreadAlloc::get().dealloc(r); +} + +void calloc2(size_t size) +{ + trigger_teardown(); + auto& a = snmalloc::ThreadAlloc::get(); + void* r = a.alloc(size); + check_calloc(r, size); + a.dealloc(r); +} + +void calloc3(size_t size) +{ + trigger_teardown(); + auto& a = snmalloc::ThreadAlloc::get(); + void* r = a.alloc(size); + check_calloc(r, size); + a.dealloc(r, size); +} + +void calloc4(size_t size) +{ + trigger_teardown(); + auto& a = snmalloc::ThreadAlloc::get(); + void* r = a.alloc(size); + check_calloc(r, size); + a.dealloc(r); +} + +void dealloc1(void* p, size_t) +{ + trigger_teardown(); + snmalloc::ThreadAlloc::get().dealloc(p); +} + +void dealloc2(void* p, size_t size) +{ + trigger_teardown(); + snmalloc::ThreadAlloc::get().dealloc(p, size); +} + +void dealloc3(void* p, size_t) +{ + trigger_teardown(); + snmalloc::ThreadAlloc::get().dealloc(p); +} + +void dealloc4(void* p, size_t size) +{ + trigger_teardown(); + snmalloc::ThreadAlloc::get().dealloc(p, size); +} + +void f(size_t size) +{ + auto t1 = std::thread(alloc1, size); + auto t2 = std::thread(alloc2, size); + auto t3 = std::thread(alloc3, size); + auto t4 = std::thread(alloc4, size); + + auto t5 = std::thread(calloc1, size); + auto t6 = std::thread(calloc2, size); + auto t7 = std::thread(calloc3, size); + auto t8 = std::thread(calloc4, size); + + { + auto a = snmalloc::get_scoped_allocator(); + auto p1 = a->alloc(size); + auto p2 = a->alloc(size); + auto p3 = a->alloc(size); + auto p4 = a->alloc(size); + + auto t9 = std::thread(dealloc1, p1, size); + auto t10 = std::thread(dealloc2, p2, size); + auto t11 = std::thread(dealloc3, p3, size); + auto t12 = std::thread(dealloc4, p4, size); + + t1.join(); + t2.join(); + t3.join(); + t4.join(); + t5.join(); + t6.join(); + t7.join(); + t8.join(); + t9.join(); + t10.join(); + t11.join(); + t12.join(); + } // Drops a. + // snmalloc::current_alloc_pool()->debug_in_use(0); + printf("."); + fflush(stdout); +} + +int main(int, char**) +{ + setup(); + printf("."); + fflush(stdout); + + f(0); + f(1); + f(3); + f(5); + f(7); + printf("\n"); + for (size_t exp = 1; exp < snmalloc::MAX_SMALL_SIZECLASS_BITS; exp++) + { + auto shifted = [exp](size_t v) { return v << exp; }; + + f(shifted(1)); + f(shifted(3)); + f(shifted(5)); + f(shifted(7)); + f(shifted(1) + 1); + f(shifted(3) + 1); + f(shifted(5) + 1); + f(shifted(7) + 1); + f(shifted(1) - 1); + f(shifted(3) - 1); + f(shifted(5) - 1); + f(shifted(7) - 1); + printf("\n"); + } +} diff --git a/src/test/func/thread_alloc_external/thread_alloc_external.cc b/src/test/func/thread_alloc_external/thread_alloc_external.cc index f5427595b..2b10ed8cb 100644 --- a/src/test/func/thread_alloc_external/thread_alloc_external.cc +++ b/src/test/func/thread_alloc_external/thread_alloc_external.cc @@ -1,37 +1,89 @@ +#ifdef SNMALLOC_USE_PTHREAD_DESTRUCTORS +# undef SNMALLOC_USE_PTHREAD_DESTRUCTORS +#endif + +#include #include +// Specify using own #define SNMALLOC_EXTERNAL_THREAD_ALLOC -#include + +#include + +namespace snmalloc +{ + using Alloc = snmalloc::LocalAllocator; +} + using namespace snmalloc; -class ThreadAllocUntyped +class ThreadAllocExternal { public: - static void* get() + static Alloc*& get_inner() { - static thread_local void* alloc = nullptr; - if (alloc != nullptr) - { - return alloc; - } - - alloc = current_alloc_pool()->acquire(); + static thread_local Alloc* alloc; return alloc; } + + static Alloc& get() + { + return *get_inner(); + } }; -#include +#include + +void allocator_thread_init(void) +{ + void* aptr; + { + // Create bootstrap allocator + auto a = snmalloc::ScopedAllocator(); + // Create storage for the thread-local allocator + aptr = a->alloc(sizeof(snmalloc::Alloc)); + } + // Initialize the thread-local allocator + ThreadAllocExternal::get_inner() = new (aptr) snmalloc::Alloc(); + ThreadAllocExternal::get().init(); +} + +void allocator_thread_cleanup(void) +{ + // Teardown the thread-local allocator + ThreadAllocExternal::get().teardown(); + // Need a bootstrap allocator to deallocate the thread-local allocator + auto a = snmalloc::ScopedAllocator(); + // Deallocate the storage for the thread local allocator + a->dealloc(ThreadAllocExternal::get_inner()); +} int main() { setup(); + allocator_thread_init(); + + auto& a = ThreadAlloc::get(); - auto a = ThreadAlloc::get(); + for (size_t i = 0; i < 1000; i++) + { + auto r1 = a.alloc(i); + + a.dealloc(r1); + } + + ThreadAlloc::get().teardown(); + // This checks that the scoped allocator does not call + // register clean up, as this configuration will fault + // if that occurs. + auto a2 = ScopedAllocator(); for (size_t i = 0; i < 1000; i++) { - auto r1 = a->alloc(i); + auto r1 = a2->alloc(i); - a->dealloc(r1); + a2->dealloc(r1); } + + allocator_thread_cleanup(); } diff --git a/src/test/func/two_alloc_types/alloc1.cc b/src/test/func/two_alloc_types/alloc1.cc index 11a3b230d..74996b517 100644 --- a/src/test/func/two_alloc_types/alloc1.cc +++ b/src/test/func/two_alloc_types/alloc1.cc @@ -1,15 +1,26 @@ -#undef SNMALLOC_USE_LARGE_CHUNKS -#define OPEN_ENCLAVE -#define OE_OK 0 -#define OPEN_ENCLAVE_SIMULATION -#define NO_BOOTSTRAP_ALLOCATOR -#define SNMALLOC_EXPOSE_PAGEMAP -#define SNMALLOC_NAME_MANGLE(a) enclave_##a +#ifndef SNMALLOC_TRACING +# define SNMALLOC_TRACING +#endif + // Redefine the namespace, so we can have two versions. #define snmalloc snmalloc_enclave -#include "../../../override/malloc.cc" + +#include +#include + +// Specify type of allocator +#define SNMALLOC_PROVIDE_OWN_CONFIG +namespace snmalloc +{ + using CustomGlobals = FixedRangeConfig>; + using Alloc = LocalAllocator; +} + +#define SNMALLOC_NAME_MANGLE(a) enclave_##a +#include extern "C" void oe_allocator_init(void* base, void* end) { - snmalloc_enclave::PALOpenEnclave::setup_initial_range(base, end); + snmalloc::CustomGlobals::init( + nullptr, base, address_cast(end) - address_cast(base)); } diff --git a/src/test/func/two_alloc_types/alloc2.cc b/src/test/func/two_alloc_types/alloc2.cc index 21316ebd9..f25b5dcf0 100644 --- a/src/test/func/two_alloc_types/alloc2.cc +++ b/src/test/func/two_alloc_types/alloc2.cc @@ -1,10 +1,8 @@ -// Remove parameters feed from test harness -#undef SNMALLOC_USE_LARGE_CHUNKS -#undef SNMALLOC_USE_SMALL_CHUNKS +#ifndef SNMALLOC_TRACING +# define SNMALLOC_TRACING +#endif #define SNMALLOC_NAME_MANGLE(a) host_##a -#define NO_BOOTSTRAP_ALLOCATOR -#define SNMALLOC_EXPOSE_PAGEMAP // Redefine the namespace, so we can have two versions. #define snmalloc snmalloc_host -#include "../../../override/malloc.cc" +#include diff --git a/src/test/func/two_alloc_types/main.cc b/src/test/func/two_alloc_types/main.cc index 2daf44026..b7f6ded9e 100644 --- a/src/test/func/two_alloc_types/main.cc +++ b/src/test/func/two_alloc_types/main.cc @@ -1,20 +1,18 @@ -#include "../../../snmalloc.h" - #include +#include #include #include #include extern "C" void* oe_memset_s(void* p, size_t p_size, int c, size_t size) { - UNUSED(p_size); + snmalloc::UNUSED(p_size); return memset(p, c, size); } extern "C" int oe_random(void* p, size_t p_size) { - UNUSED(p_size); - UNUSED(p); + snmalloc::UNUSED(p_size, p); // Stub for random data. return 0; } @@ -31,35 +29,20 @@ extern "C" void host_free(void*); extern "C" void* enclave_malloc(size_t); extern "C" void enclave_free(void*); -extern "C" void* -enclave_snmalloc_chunkmap_global_get(snmalloc::PagemapConfig const**); -extern "C" void* -host_snmalloc_chunkmap_global_get(snmalloc::PagemapConfig const**); - using namespace snmalloc; int main() { setup(); - MemoryProviderStateMixin< - DefaultPal, - DefaultArenaMap> - mp; - // 26 is large enough to produce a nested allocator. - // It is also large enough for the example to run in. - // For 1MiB superslabs, SUPERSLAB_BITS + 2 is not big enough for the example. - size_t large_class = 26 - SUPERSLAB_BITS; - size_t size = bits::one_at_bit(SUPERSLAB_BITS + large_class); - void* oe_base = mp.reserve(large_class).unsafe_capptr; - void* oe_end = (uint8_t*)oe_base + size; + // many other sizes would work. + size_t length = bits::one_at_bit(26); + auto oe_base = host_malloc(length); + + auto oe_end = pointer_offset(oe_base, length); oe_allocator_init(oe_base, oe_end); - std::cout << "Allocated region " << oe_base << " - " << oe_end << std::endl; - // Call these functions to trigger asserts if the cast-to-self doesn't work. - const PagemapConfig* c; - enclave_snmalloc_chunkmap_global_get(&c); - host_snmalloc_chunkmap_global_get(&c); + std::cout << "Allocated region " << oe_base << " - " << oe_end << std::endl; auto a = host_malloc(128); auto b = enclave_malloc(128); @@ -68,5 +51,7 @@ int main() std::cout << "Enclave alloc " << b << std::endl; host_free(a); + std::cout << "Host freed!" << std::endl; enclave_free(b); + std::cout << "Enclace freed!" << std::endl; } diff --git a/src/test/helpers.h b/src/test/helpers.h new file mode 100644 index 000000000..30f6e4655 --- /dev/null +++ b/src/test/helpers.h @@ -0,0 +1,39 @@ +#pragma once +#ifdef _MSC_VER +# define __PRETTY_FUNCTION__ __FUNCSIG__ +#endif + +namespace snmalloc +{ + /** + * The name of the function under test. This is set in the START_TEST macro + * and used for error reporting in EXPECT. + */ + const char* current_test = ""; + + /** + * Log that the test started. + */ +#define START_TEST(msg, ...) \ + do \ + { \ + current_test = __PRETTY_FUNCTION__; \ + MessageBuilder<1024> mb{"Starting test: " msg "\n", ##__VA_ARGS__}; \ + DefaultPal::message(mb.get_message()); \ + } while (0) + + /** + * An assertion that fires even in debug builds. Uses the value set by + * START_TEST. + */ +#define EXPECT(x, msg, ...) \ + SNMALLOC_CHECK_MSG(x, " in test {} " msg "\n", current_test, ##__VA_ARGS__) + +#define INFO(msg, ...) \ + do \ + { \ + MessageBuilder<1024> mb{msg "\n", ##__VA_ARGS__}; \ + DefaultPal::message(mb.get_message()); \ + } while (0) + +} diff --git a/src/test/measuretime.h b/src/test/measuretime.h index a67a65e10..903961cc4 100644 --- a/src/test/measuretime.h +++ b/src/test/measuretime.h @@ -5,17 +5,40 @@ #include #include -class MeasureTime : public std::stringstream +class MeasureTime { + std::stringstream ss; std::chrono::time_point start = std::chrono::high_resolution_clock::now(); + bool quiet = false; + public: ~MeasureTime() { auto finish = std::chrono::high_resolution_clock::now(); auto diff = finish - start; - std::cout << str() << ": " << std::setw(12) << diff.count() << " ns" - << std::endl; + if (!quiet) + { + std::cout << ss.str() << ": " << std::setw(12) << diff.count() << " ns" + << std::endl; + } + } + + MeasureTime(bool quiet = false) : quiet(quiet) {} + + template + MeasureTime& operator<<(const T& s) + { + ss << s; + start = std::chrono::high_resolution_clock::now(); + return *this; + } + + std::chrono::nanoseconds get_time() + { + auto finish = std::chrono::high_resolution_clock::now(); + auto diff = finish - start; + return diff; } }; \ No newline at end of file diff --git a/src/test/opt.h b/src/test/opt.h index 1d48e7abb..356b10c04 100644 --- a/src/test/opt.h +++ b/src/test/opt.h @@ -11,10 +11,10 @@ namespace opt { private: int argc; - char** argv; + const char* const* argv; public: - Opt(int argc, char** argv) : argc(argc), argv(argv) {} + Opt(int argc, const char* const* argv) : argc(argc), argv(argv) {} bool has(const char* opt) { diff --git a/src/test/perf/contention/contention.cc b/src/test/perf/contention/contention.cc index bcc66d310..e266f0491 100644 --- a/src/test/perf/contention/contention.cc +++ b/src/test/perf/contention/contention.cc @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include @@ -75,15 +75,24 @@ size_t swapcount; void test_tasks_f(size_t id) { - Alloc* a = ThreadAlloc::get(); + auto& a = ThreadAlloc::get(); xoroshiro::p128r32 r(id + 5000); for (size_t n = 0; n < swapcount; n++) { size_t size = 16 + (r.next() % 1024); - size_t* res = (size_t*)(use_malloc ? malloc(size) : a->alloc(size)); + size_t* res = (size_t*)(use_malloc ? malloc(size) : a.alloc(size)); + + if (res != nullptr) + { + *res = size; + } + else + { + std::cout << "Failed to allocate " << size << " bytes" << std::endl; + // Continue as this is not an important failure. + } - *res = size; size_t* out = contention[n % swapsize].exchange(res, std::memory_order_acq_rel); @@ -93,14 +102,16 @@ void test_tasks_f(size_t id) if (use_malloc) free(out); else - a->dealloc(out, size); + a.dealloc(out, size); } } }; void test_tasks(size_t num_tasks, size_t count, size_t size) { - Alloc* a = ThreadAlloc::get(); + std::cout << "Sequential setup" << std::endl; + + auto& a = ThreadAlloc::get(); contention = new std::atomic[size]; xoroshiro::p128r32 r; @@ -109,7 +120,7 @@ void test_tasks(size_t num_tasks, size_t count, size_t size) { size_t alloc_size = 16 + (r.next() % 1024); size_t* res = - (size_t*)(use_malloc ? malloc(alloc_size) : a->alloc(alloc_size)); + (size_t*)(use_malloc ? malloc(alloc_size) : a.alloc(alloc_size)); *res = alloc_size; contention[n] = res; } @@ -120,6 +131,7 @@ void test_tasks(size_t num_tasks, size_t count, size_t size) Stats s0; current_alloc_pool()->aggregate_stats(s0); #endif + std::cout << "Begin parallel test:" << std::endl; { ParallelTest test(num_tasks); @@ -134,7 +146,7 @@ void test_tasks(size_t num_tasks, size_t count, size_t size) if (use_malloc) free(contention[n]); else - a->dealloc(contention[n], *contention[n]); + a.dealloc(contention[n], *contention[n]); } } @@ -142,7 +154,7 @@ void test_tasks(size_t num_tasks, size_t count, size_t size) } #ifndef NDEBUG - current_alloc_pool()->debug_check_empty(); + snmalloc::debug_check_empty(); #endif }; diff --git a/src/test/perf/external_pointer/externalpointer.cc b/src/test/perf/external_pointer/externalpointer.cc index 0a88ea5d9..be3306cba 100644 --- a/src/test/perf/external_pointer/externalpointer.cc +++ b/src/test/perf/external_pointer/externalpointer.cc @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -13,13 +13,13 @@ namespace test // Pre allocate all the objects size_t* objects[count]; - NOINLINE void setup(xoroshiro::p128r64& r, Alloc* alloc) + NOINLINE void setup(xoroshiro::p128r64& r, Alloc& alloc) { for (size_t i = 0; i < count; i++) { size_t rand = (size_t)r.next(); size_t offset = bits::clz(rand); - if constexpr (bits::is64()) + if constexpr (DefaultPal::address_bits > 32) { if (offset > 30) offset = 30; @@ -31,27 +31,31 @@ namespace test if (size < 16) size = 16; // store object - objects[i] = (size_t*)alloc->alloc(size); + objects[i] = (size_t*)alloc.alloc(size); + if (objects[i] == nullptr) + abort(); // Store allocators size for this object - *objects[i] = alloc->alloc_size(objects[i]); + *objects[i] = alloc.alloc_size(objects[i]); } } - NOINLINE void teardown(Alloc* alloc) + NOINLINE void teardown(Alloc& alloc) { // Deallocate everything for (size_t i = 0; i < count; i++) { - alloc->dealloc(objects[i]); + alloc.dealloc(objects[i]); } - current_alloc_pool()->debug_check_empty(); + snmalloc::debug_check_empty(); } void test_external_pointer(xoroshiro::p128r64& r) { - auto alloc = ThreadAlloc::get(); -#ifdef NDEBUG + auto& alloc = ThreadAlloc::get(); + // This is very slow on Windows at the moment. Until this is fixed, help + // CI terminate. +#if defined(NDEBUG) && !defined(_MSC_VER) static constexpr size_t iterations = 10000000; #else # ifdef _MSC_VER @@ -75,7 +79,7 @@ namespace test size_t size = *external_ptr; size_t offset = (size >> 4) * (rand & 15); void* interior_ptr = pointer_offset(external_ptr, offset); - void* calced_external = alloc->external_pointer(interior_ptr); + void* calced_external = alloc.external_pointer(interior_ptr); if (calced_external != external_ptr) abort(); } @@ -91,11 +95,8 @@ int main(int, char**) setup(); xoroshiro::p128r64 r; -# ifdef NDEBUG - size_t nn = 30; -# else - size_t nn = 3; -# endif + + size_t nn = snmalloc::DEBUG ? 30 : 3; for (size_t n = 0; n < nn; n++) test::test_external_pointer(r); diff --git a/src/test/perf/low_memory/low-memory.cc b/src/test/perf/low_memory/low-memory.cc index 22ba0ba04..fa2997fdf 100644 --- a/src/test/perf/low_memory/low-memory.cc +++ b/src/test/perf/low_memory/low-memory.cc @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -19,7 +19,7 @@ class Queue Node* new_node(size_t size) { - auto result = (Node*)ThreadAlloc::get()->alloc(size); + auto result = (Node*)ThreadAlloc::get().alloc(size); result->next = nullptr; return result; } @@ -43,7 +43,7 @@ class Queue return false; Node* next = head->next; - ThreadAlloc::get()->dealloc(head); + ThreadAlloc::get().dealloc(head); head = next; return true; } @@ -107,58 +107,61 @@ int main(int argc, char** argv) { opt::Opt opt(argc, argv); - if constexpr (pal_supports) - { - register_for_pal_notifications(); - } - else - { - std::cout << "Pal does not support low-memory notification! Test not run" - << std::endl; - return 0; - } - -#ifdef NDEBUG -# if defined(WIN32) && !defined(SNMALLOC_VA_BITS_64) - std::cout << "32-bit windows not supported for this test." << std::endl; -# else - - bool interactive = opt.has("--interactive"); - - Queue allocations; - - std::cout - << "Expected use:" << std::endl - << " run first instances with --interactive. Wait for first to print " - << std::endl - << " 'No allocations left. Press any key to terminate'" << std::endl - << "watch working set, and start second instance working set of first " - << "should drop to almost zero," << std::endl - << "and second should climb to physical ram." << std::endl - << std::endl; - - setup(); - - for (size_t i = 0; i < 10; i++) - { - reach_pressure(allocations); - std::cout << "Pressure " << i << std::endl; - - reduce_pressure(allocations); - } - - // Deallocate everything - while (allocations.try_remove()) - ; - - if (interactive) - { - std::cout << "No allocations left. Press any key to terminate" << std::endl; - getchar(); - } -# endif -#else - std::cout << "Release test only." << std::endl; -#endif + // TODO reinstate + + // if constexpr (pal_supports) + // { + // register_for_pal_notifications(); + // } + // else + // { + // std::cout << "Pal does not support low-memory notification! Test not + // run" + // << std::endl; + // return 0; + // } + + // #ifdef NDEBUG + // # if defined(WIN32) && !defined(SNMALLOC_VA_BITS_64) + // std::cout << "32-bit windows not supported for this test." << std::endl; + // # else + + // bool interactive = opt.has("--interactive"); + + // Queue allocations; + + // std::cout + // << "Expected use:" << std::endl + // << " run first instances with --interactive. Wait for first to print " + // << std::endl + // << " 'No allocations left. Press any key to terminate'" << std::endl + // << "watch working set, and start second instance working set of first " + // << "should drop to almost zero," << std::endl + // << "and second should climb to physical ram." << std::endl + // << std::endl; + + // setup(); + + // for (size_t i = 0; i < 10; i++) + // { + // reach_pressure(allocations); + // std::cout << "Pressure " << i << std::endl; + + // reduce_pressure(allocations); + // } + + // // Deallocate everything + // while (allocations.try_remove()) + // ; + + // if (interactive) + // { + // std::cout << "No allocations left. Press any key to terminate" << + // std::endl; getchar(); + // } + // # endif + // #else + // std::cout << "Release test only." << std::endl; + // #endif return 0; -} \ No newline at end of file +} diff --git a/src/test/perf/memcpy/memcpy.cc b/src/test/perf/memcpy/memcpy.cc new file mode 100644 index 000000000..64efad8cc --- /dev/null +++ b/src/test/perf/memcpy/memcpy.cc @@ -0,0 +1,177 @@ +#include "snmalloc/global/memcpy.h" + +#include +#include +#include + +using namespace snmalloc; + +struct Shape +{ + void* object; + void* dst; +}; + +size_t my_random() +{ + return (size_t)rand(); +} + +std::vector allocs; + +void shape(size_t size) +{ + for (size_t i = 0; i < 1000; i++) + { + auto rsize = size * 2; + auto offset = 0; + // Uncomment the next two lines to introduce some randomness to the start of + // the memcpys. constexpr size_t alignment = 16; offset = (my_random() % + // size / alignment) * alignment; + Shape s; + s.object = ThreadAlloc::get().alloc(rsize); + s.dst = static_cast(s.object) + offset; + // Bring into cache the destination of the copy. + memset(s.dst, 0xFF, size); + allocs.push_back(s); + } +} + +void unshape() +{ + for (auto& s : allocs) + { + ThreadAlloc::get().dealloc(s.object); + } + allocs.clear(); +} + +template +void test_memcpy(size_t size, void* src, Memcpy mc) +{ + for (auto& s : allocs) + { + auto* dst = static_cast(s.dst); + mc(dst, src, size); + } +} + +template +void test( + size_t size, + Memcpy mc, + std::vector>& stats) +{ + auto src = ThreadAlloc::get().alloc(size); + shape(size); + for (size_t i = 0; i < 10; i++) + { + MeasureTime m(true); + test_memcpy(size, src, mc); + auto time = m.get_time(); + stats.push_back({size, time}); + } + ThreadAlloc::get().dealloc(src); + unshape(); +} + +NOINLINE +void memcpy_checked(void* dst, const void* src, size_t size) +{ + memcpy(dst, src, size); +} + +NOINLINE +void memcpy_unchecked(void* dst, const void* src, size_t size) +{ + memcpy(dst, src, size); +} + +NOINLINE +void memcpy_platform_checked(void* dst, const void* src, size_t size) +{ + if (SNMALLOC_UNLIKELY(!check_bounds(dst, size))) + { + report_fatal_bounds_error(dst, size, ""); + return; + } + + memcpy(dst, src, size); +} + +int main(int argc, char** argv) +{ + opt::Opt opt(argc, argv); +#ifndef SNMALLOC_PASS_THROUGH + bool full_test = opt.has("--full_test"); + + // size_t size = 0; + auto mc1 = [](void* dst, const void* src, size_t len) { + memcpy_platform_checked(dst, src, len); + }; + auto mc2 = [](void* dst, const void* src, size_t len) { + memcpy_unchecked(dst, src, len); + }; + auto mc3 = [](void* dst, const void* src, size_t len) { + memcpy(dst, src, len); + }; + + std::vector sizes; + for (size_t size = 0; size < 64; size++) + { + sizes.push_back(size); + } + for (size_t size = 64; size < 256; size += 16) + { + sizes.push_back(size); + sizes.push_back(size + 5); + } + for (size_t size = 256; size < 1024; size += 64) + { + sizes.push_back(size); + sizes.push_back(size + 5); + } + for (size_t size = 1024; size < 8192; size += 256) + { + sizes.push_back(size); + sizes.push_back(size + 5); + } + for (size_t size = 8192; size < bits::one_at_bit(18); size <<= 1) + { + sizes.push_back(size); + sizes.push_back(size + 5); + } + + std::vector> stats_checked; + std::vector> stats_unchecked; + std::vector> stats_platform; + + printf("size, checked, unchecked, platform\n"); + + size_t repeats = full_test ? 80 : 1; + + for (auto repeat = repeats; 0 < repeat; repeat--) + { + for (auto copy_size : sizes) + { + test(copy_size, mc1, stats_checked); + test(copy_size, mc2, stats_unchecked); + test(copy_size, mc3, stats_platform); + } + for (size_t i = 0; i < stats_checked.size(); i++) + { + auto& s1 = stats_checked[i]; + auto& s2 = stats_unchecked[i]; + auto& s3 = stats_platform[i]; + std::cout << s1.first << ", " << s1.second.count() << ", " + << s2.second.count() << ", " << s3.second.count() << std::endl; + } + stats_checked.clear(); + stats_unchecked.clear(); + stats_platform.clear(); + } +#else + snmalloc::UNUSED(opt); +#endif + return 0; +} diff --git a/src/test/perf/singlethread/singlethread.cc b/src/test/perf/singlethread/singlethread.cc index 61dee2f7b..b93dcd428 100644 --- a/src/test/perf/singlethread/singlethread.cc +++ b/src/test/perf/singlethread/singlethread.cc @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -8,7 +8,7 @@ using namespace snmalloc; template void test_alloc_dealloc(size_t count, size_t size, bool write) { - auto* alloc = ThreadAlloc::get(); + auto& alloc = ThreadAlloc::get(); { MeasureTime m; @@ -20,7 +20,7 @@ void test_alloc_dealloc(size_t count, size_t size, bool write) // alloc 1.5x objects for (size_t i = 0; i < ((count * 3) / 2); i++) { - void* p = alloc->alloc(size); + void* p = alloc.alloc(size); SNMALLOC_CHECK(set.find(p) == set.end()); if (write) @@ -34,7 +34,7 @@ void test_alloc_dealloc(size_t count, size_t size, bool write) { auto it = set.begin(); void* p = *it; - alloc->dealloc(p, size); + alloc.dealloc(p, size); set.erase(it); SNMALLOC_CHECK(set.find(p) == set.end()); } @@ -42,7 +42,7 @@ void test_alloc_dealloc(size_t count, size_t size, bool write) // alloc 1x objects for (size_t i = 0; i < count; i++) { - void* p = alloc->alloc(size); + void* p = alloc.alloc(size); SNMALLOC_CHECK(set.find(p) == set.end()); if (write) @@ -55,12 +55,12 @@ void test_alloc_dealloc(size_t count, size_t size, bool write) while (!set.empty()) { auto it = set.begin(); - alloc->dealloc(*it, size); + alloc.dealloc(*it, size); set.erase(it); } } - current_alloc_pool()->debug_check_empty(); + snmalloc::debug_check_empty(); } int main(int, char**) diff --git a/src/test/setup.h b/src/test/setup.h index 20936cca7..61f9a991c 100644 --- a/src/test/setup.h +++ b/src/test/setup.h @@ -1,12 +1,13 @@ -#if defined(WIN32) && defined(SNMALLOC_CI_BUILD) -# include -# include -# include -# include -# include +#if defined(SNMALLOC_CI_BUILD) +# include +# if defined(WIN32) +# include +# include +# include +# include // Has to come after the PAL. -# include -# pragma comment(lib, "dbghelp.lib") +# include +# pragma comment(lib, "dbghelp.lib") void print_stack_trace() { @@ -62,8 +63,8 @@ void print_stack_trace() void _cdecl error(int signal) { - UNUSED(signal); - puts("*****ABORT******"); + snmalloc::UNUSED(signal); + snmalloc::DefaultPal::message("*****ABORT******"); print_stack_trace(); @@ -72,9 +73,9 @@ void _cdecl error(int signal) LONG WINAPI VectoredHandler(struct _EXCEPTION_POINTERS* ExceptionInfo) { - UNUSED(ExceptionInfo); + snmalloc::UNUSED(ExceptionInfo); - puts("*****UNHANDLED EXCEPTION******"); + snmalloc::DefaultPal::message("*****UNHANDLED EXCEPTION******"); print_stack_trace(); @@ -94,6 +95,19 @@ void setup() // Disable OS level dialog boxes during CI. SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX); } +# else +# include +void error_handle(int signal) +{ + snmalloc::UNUSED(signal); + snmalloc::error("Seg Fault"); + _exit(1); +} +void setup() +{ + signal(SIGSEGV, error_handle); +} +# endif #else void setup() {} #endif