diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 531a463c7..ebae6086a 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -49,7 +49,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=OFF -DUMF_TESTS_FAIL_ON_SKIP=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - name: Build diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 7a6335ed6..44f2ba2ca 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -76,7 +76,6 @@ jobs: -DCMAKE_BUILD_TYPE=Debug -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_BUILD_CUDA_PROVIDER=OFF @@ -138,7 +137,6 @@ jobs: -DUMF_LINK_HWLOC_STATICALLY=${{matrix.static_hwloc}} -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON @@ -219,7 +217,6 @@ jobs: -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} ^ -DUMF_FORMAT_CODE_STYLE=OFF ^ -DUMF_DEVELOPER_MODE=ON ^ - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON ^ -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON ^ -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON ^ -DUMF_BUILD_CUDA_PROVIDER=ON ^ diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index d23e646dd..7170ec418 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -165,7 +165,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_DISABLE_HWLOC=${{matrix.disable_hwloc}} -DUMF_LINK_HWLOC_STATICALLY=${{matrix.link_hwloc_statically}} @@ -208,7 +207,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{matrix.build_type}} - --disjoint-pool ${{ matrix.install_tbb == 'ON' && matrix.disable_hwloc != 'ON' && matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || '' }} @@ -284,7 +282,6 @@ jobs: -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=${{matrix.level_zero_provider}} -DUMF_BUILD_CUDA_PROVIDER=${{matrix.cuda_provider}} @@ -304,7 +301,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{matrix.build_type}} - --disjoint-pool ${{matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || ''}} @@ -342,7 +338,6 @@ jobs: -DUMF_BUILD_EXAMPLES=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON @@ -385,7 +380,6 @@ jobs: -DUMF_BUILD_EXAMPLES=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON @@ -496,7 +490,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_SHARED_LIBRARY=ON -DUMF_TESTS_FAIL_ON_SKIP=ON @@ -511,7 +504,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{env.BUILD_TYPE}} - --disjoint-pool --proxy --umf-version ${{env.UMF_VERSION}} --shared-library diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index b33fdb25e..b41c99f3a 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -93,7 +93,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - name: Build UMF diff --git a/.github/workflows/reusable_dax.yml b/.github/workflows/reusable_dax.yml index f7f4fbe50..1a41b11c7 100644 --- a/.github/workflows/reusable_dax.yml +++ b/.github/workflows/reusable_dax.yml @@ -84,7 +84,6 @@ jobs: -DUMF_BUILD_GPU_EXAMPLES=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_TESTS_FAIL_ON_SKIP=ON diff --git a/.github/workflows/reusable_fast.yml b/.github/workflows/reusable_fast.yml index 58a172a74..5166f2b96 100644 --- a/.github/workflows/reusable_fast.yml +++ b/.github/workflows/reusable_fast.yml @@ -19,24 +19,20 @@ jobs: matrix: include: - os: windows-latest - disjoint: 'OFF' build_tests: 'ON' simple_cmake: 'OFF' # pure C build (Windows) - os: windows-latest - disjoint: 'OFF' # Tests' building is off for a pure C build build_tests: 'OFF' simple_cmake: 'OFF' - os: ubuntu-latest - disjoint: 'ON' build_tests: 'ON' # Windows doesn't recognize 'CMAKE_BUILD_TYPE', it uses '--config' param in build command extra_build_options: '-DCMAKE_BUILD_TYPE=Release -DUMF_BUILD_BENCHMARKS=ON -DUMF_BUILD_BENCHMARKS_MT=ON' simple_cmake: 'OFF' # pure C build (Linux) - os: ubuntu-latest - disjoint: 'OFF' # Windows doesn't recognize 'CMAKE_BUILD_TYPE', it uses '--config' param in build command # Tests' building is off for a pure C build build_tests: 'OFF' @@ -44,13 +40,11 @@ jobs: simple_cmake: 'OFF' # simplest CMake on ubuntu-latest - os: ubuntu-latest - disjoint: 'OFF' build_tests: 'ON' extra_build_options: '-DCMAKE_BUILD_TYPE=Release' simple_cmake: 'ON' # simplest CMake ubuntu-20.04 - os: ubuntu-20.04 - disjoint: 'OFF' build_tests: 'ON' extra_build_options: '-DCMAKE_BUILD_TYPE=Release' simple_cmake: 'ON' @@ -97,7 +91,6 @@ jobs: -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=${{matrix.disjoint}} -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_TESTS=${{matrix.build_tests}} -DUMF_BUILD_EXAMPLES=ON diff --git a/.github/workflows/reusable_gpu.yml b/.github/workflows/reusable_gpu.yml index 47f48f6a8..87a7cfd30 100644 --- a/.github/workflows/reusable_gpu.yml +++ b/.github/workflows/reusable_gpu.yml @@ -93,7 +93,6 @@ jobs: -DUMF_BUILD_GPU_TESTS=ON -DUMF_BUILD_GPU_EXAMPLES=ON -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_CUDA_PROVIDER=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF diff --git a/.github/workflows/reusable_multi_numa.yml b/.github/workflows/reusable_multi_numa.yml index f546b0545..7c7750551 100644 --- a/.github/workflows/reusable_multi_numa.yml +++ b/.github/workflows/reusable_multi_numa.yml @@ -45,7 +45,6 @@ jobs: -DUMF_BUILD_BENCHMARKS=OFF -DUMF_BUILD_TESTS=ON -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=${{ matrix.os == 'rhel-9.1' && 'OFF' || 'ON' }} -DUMF_TESTS_FAIL_ON_SKIP=ON ${{ matrix.build_type == 'Debug' && matrix.os == 'ubuntu-22.04' && '-DUMF_USE_COVERAGE=ON' || '' }} @@ -61,11 +60,12 @@ jobs: # On RHEL, hwloc version is just a little too low. # Skip some tests until we upgrade hwloc and update CMake to properly handle local hwloc installation. # TODO: fix issue #560 + # TODO: add issue for -E umf-init_teardown - it is not clear why it fails - name: Run tests (on RHEL) if: matrix.os == 'rhel-9.1' working-directory: ${{github.workspace}}/build run: | - ctest --output-on-failure --test-dir test -E "umf-provider_os_memory_multiple_numa_nodes" + ctest --output-on-failure --test-dir test -E "umf-provider_os_memory_multiple_numa_nodes|umf-init_teardown" ./test/umf_test-provider_os_memory_multiple_numa_nodes \ --gtest_filter="-*checkModeLocal/*:*checkModePreferredEmptyNodeset/*:testNuma.checkModeInterleave" diff --git a/.github/workflows/reusable_proxy_lib.yml b/.github/workflows/reusable_proxy_lib.yml index a1f5975fa..bb4a3278e 100644 --- a/.github/workflows/reusable_proxy_lib.yml +++ b/.github/workflows/reusable_proxy_lib.yml @@ -48,7 +48,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_PROXY_LIB_BASED_ON_POOL=${{matrix.proxy_lib_pool}} ${{ matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }} diff --git a/.github/workflows/reusable_sanitizers.yml b/.github/workflows/reusable_sanitizers.yml index 25458da51..1a044308e 100644 --- a/.github/workflows/reusable_sanitizers.yml +++ b/.github/workflows/reusable_sanitizers.yml @@ -55,7 +55,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_USE_ASAN=${{matrix.sanitizers.asan}} -DUMF_USE_UBSAN=${{matrix.sanitizers.ubsan}} -DUMF_USE_TSAN=${{matrix.sanitizers.tsan}} @@ -127,7 +126,6 @@ jobs: -DUMF_BUILD_SHARED_LIBRARY=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_USE_ASAN=${{matrix.sanitizers.asan}} -DUMF_BUILD_EXAMPLES=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF diff --git a/.github/workflows/reusable_valgrind.yml b/.github/workflows/reusable_valgrind.yml index aba0e3260..5999297d6 100644 --- a/.github/workflows/reusable_valgrind.yml +++ b/.github/workflows/reusable_valgrind.yml @@ -29,7 +29,6 @@ jobs: -DCMAKE_BUILD_TYPE=Debug -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_BUILD_CUDA_PROVIDER=OFF diff --git a/CMakeLists.txt b/CMakeLists.txt index f8c393609..396a27c1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,8 +60,6 @@ endmacro() umf_option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) umf_option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON) umf_option(UMF_BUILD_CUDA_PROVIDER "Build CUDA memory provider" ON) -umf_option(UMF_BUILD_LIBUMF_POOL_DISJOINT - "Build the libumf_pool_disjoint static library" OFF) umf_option(UMF_BUILD_LIBUMF_POOL_JEMALLOC "Build the libumf_pool_jemalloc static library" OFF) umf_option(UMF_BUILD_TESTS "Build UMF tests" ON) @@ -497,8 +495,8 @@ endif() # For using the options listed in the OPTIONS_REQUIRING_CXX variable a C++17 # compiler is required. Moreover, if these options are not set, CMake will set # up a strict C build, without C++ support. -set(OPTIONS_REQUIRING_CXX "UMF_BUILD_TESTS" "UMF_BUILD_LIBUMF_POOL_DISJOINT" - "UMF_BUILD_BENCHMARKS_MT" "UMF_BUILD_BENCHMARKS") +set(OPTIONS_REQUIRING_CXX "UMF_BUILD_TESTS" "UMF_BUILD_BENCHMARKS_MT" + "UMF_BUILD_BENCHMARKS") foreach(option_name ${OPTIONS_REQUIRING_CXX}) if(${option_name}) enable_language(CXX) diff --git a/README.md b/README.md index 5bd0b9b2f..00d6136df 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ For development and contributions: - cmake-format-0.6 (can be installed with `python -m pip install cmake-format==0.6.13`) - black (can be installed with `python -m pip install black==24.3.0`) -For building tests, multithreaded benchmarks and Disjoint Pool: +For building tests and multithreaded benchmarks: - C++ compiler with C++17 support @@ -106,7 +106,6 @@ List of options provided by CMake: | UMF_BUILD_SHARED_LIBRARY | Build UMF as shared library | ON/OFF | OFF | | UMF_BUILD_LEVEL_ZERO_PROVIDER | Build Level Zero memory provider | ON/OFF | ON | | UMF_BUILD_CUDA_PROVIDER | Build CUDA memory provider | ON/OFF | ON | -| UMF_BUILD_LIBUMF_POOL_DISJOINT | Build the libumf_pool_disjoint static library | ON/OFF | OFF | | UMF_BUILD_LIBUMF_POOL_JEMALLOC | Build the libumf_pool_jemalloc static library | ON/OFF | OFF | | UMF_BUILD_TESTS | Build UMF tests | ON/OFF | ON | | UMF_BUILD_GPU_TESTS | Build UMF GPU tests | ON/OFF | OFF | @@ -267,13 +266,11 @@ This memory pool is distributed as part of libumf. It forwards all requests to t memory provider. Currently umfPoolRealloc, umfPoolCalloc and umfPoolMallocUsableSize functions are not supported by the proxy pool. -#### Disjoint pool +#### Disjoint pool (part of libumf) -TODO: Add a description - -##### Requirements - -To enable this feature, the `UMF_BUILD_LIBUMF_POOL_DISJOINT` option needs to be turned `ON`. +The Disjoint pool is designed to keep internal metadata separate from user data. +This separation is particularly useful when user data needs to be placed in memory with relatively high latency, +such as GPU memory or disk storage. #### Jemalloc pool diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 73b9b257a..80c8ba5ec 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -97,10 +97,6 @@ function(add_umf_benchmark) ) endif() - if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - target_compile_definitions(${BENCH_NAME} - PRIVATE UMF_POOL_DISJOINT_ENABLED=1) - endif() if(UMF_POOL_JEMALLOC_ENABLED) target_compile_definitions(${BENCH_NAME} PRIVATE UMF_POOL_JEMALLOC_ENABLED=1) @@ -131,9 +127,6 @@ endfunction() set(LIB_DIRS ${LIBHWLOC_LIBRARY_DIRS}) # optional libraries -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - set(LIBS_OPTIONAL ${LIBS_OPTIONAL} disjoint_pool) -endif() if(LINUX) set(LIBS_OPTIONAL ${LIBS_OPTIONAL} m) endif() diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 6c8175e1d..401b06d26 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -66,7 +66,6 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, proxy_pool, fixed_alloc_size, UMF_BENCHMARK_REGISTER_F(alloc_benchmark, proxy_pool) ->Apply(&default_alloc_fix_size); -#ifdef UMF_POOL_DISJOINT_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, disjoint_pool_fix, fixed_alloc_size, pool_allocator>); @@ -80,7 +79,6 @@ UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_fix) UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_uniform) ->Apply(&default_alloc_uniform_size); */ -#endif #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, jemalloc_pool_fix, @@ -150,21 +148,17 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, os_provider, UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, os_provider) ->Apply(&default_multiple_alloc_fix_size); -#ifdef UMF_POOL_DISJOINT_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix, fixed_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_fix) ->Apply(&default_multiple_alloc_fix_size); -// TODO: debug why this crashes -/*UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) ->Apply(&default_multiple_alloc_uniform_size); -*/ -#endif #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, diff --git a/benchmark/benchmark_umf.hpp b/benchmark/benchmark_umf.hpp index 389c224ed..86cba4877 100644 --- a/benchmark/benchmark_umf.hpp +++ b/benchmark/benchmark_umf.hpp @@ -13,16 +13,14 @@ #include #include +#include #include + #ifdef UMF_POOL_SCALABLE_ENABLED #include #endif #include -#ifdef UMF_POOL_DISJOINT_ENABLED -#include -#endif - #ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif @@ -167,7 +165,6 @@ struct proxy_pool : public pool_interface { static std::string name() { return "proxy_pool<" + Provider::name() + ">"; } }; -#ifdef UMF_POOL_DISJOINT_ENABLED template struct disjoint_pool : public pool_interface { umf_memory_pool_ops_t * @@ -221,7 +218,6 @@ struct disjoint_pool : public pool_interface { return "disjoint_pool<" + Provider::name() + ">"; } }; -#endif #ifdef UMF_POOL_JEMALLOC_ENABLED template diff --git a/benchmark/multithread.cpp b/benchmark/multithread.cpp index ecc238529..d00ffba90 100644 --- a/benchmark/multithread.cpp +++ b/benchmark/multithread.cpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -121,7 +121,7 @@ int main() { std::cout << "skipping jemalloc_pool mt_alloc_free" << std::endl; #endif -#if defined(UMF_POOL_DISJOINT_ENABLED) + // NOTE: disjoint pool is always enabled umf_disjoint_pool_params_handle_t hDisjointParams = nullptr; umf_result_t ret = umfDisjointPoolParamsCreate(&hDisjointParams); if (ret != UMF_RESULT_SUCCESS) { @@ -132,20 +132,15 @@ int main() { std::cout << "disjoint_pool mt_alloc_free: "; mt_alloc_free(poolCreateExtParams{umfDisjointPoolOps(), hDisjointParams, umfOsMemoryProviderOps(), osParams}); -#else - std::cout << "skipping disjoint_pool mt_alloc_free" << std::endl; -#endif // ctest looks for "PASSED" in the output std::cout << "PASSED" << std::endl; -#if defined(UMF_POOL_DISJOINT_ENABLED) ret = umfDisjointPoolParamsDestroy(hDisjointParams); if (ret != UMF_RESULT_SUCCESS) { std::cerr << "disjoint pool params destroy failed" << std::endl; return -1; } -#endif return 0; } diff --git a/benchmark/ubench.c b/benchmark/ubench.c index 3892740e8..5beaa62be 100644 --- a/benchmark/ubench.c +++ b/benchmark/ubench.c @@ -15,23 +15,19 @@ #include #include +#include #include #include #include #include -#ifdef UMF_POOL_DISJOINT_ENABLED -#include -#endif - #ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif #include "utils_common.h" -#if (defined UMF_POOL_DISJOINT_ENABLED && \ - defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) +#if (defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) #include "utils_level_zero.h" #endif @@ -244,7 +240,6 @@ UBENCH_EX(simple, proxy_pool_with_os_memory_provider) { free(array); } -#if (defined UMF_POOL_DISJOINT_ENABLED) ////////////////// DISJOINT POOL WITH OS MEMORY PROVIDER UBENCH_EX(simple, disjoint_pool_with_os_memory_provider) { @@ -327,7 +322,6 @@ UBENCH_EX(simple, disjoint_pool_with_os_memory_provider) { umfMemoryProviderDestroy(os_memory_provider); free(array); } -#endif /* (defined UMF_POOL_DISJOINT_ENABLED) */ #if (defined UMF_POOL_JEMALLOC_ENABLED) ////////////////// JEMALLOC POOL WITH OS MEMORY PROVIDER @@ -421,8 +415,7 @@ UBENCH_EX(simple, scalable_pool_with_os_memory_provider) { } #endif /* (defined UMF_POOL_SCALABLE_ENABLED) */ -#if (defined UMF_POOL_DISJOINT_ENABLED && \ - defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) +#if (defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) static void do_ipc_get_put_benchmark(alloc_t *allocs, size_t num_allocs, size_t repeats, umf_ipc_handle_t *ipc_handles) { @@ -630,7 +623,7 @@ UBENCH_EX(ipc, disjoint_pool_with_level_zero_provider) { err_destroy_context: utils_ze_destroy_context(context); } -#endif /* (defined UMF_POLL_DISJOINT_ENABLED && defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */ +#endif /* (defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */ // TODO add IPC benchmark for CUDA diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 89f80ee2d..a26b8915e 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -41,16 +41,14 @@ if(UMF_POOL_SCALABLE_ENABLED) endif() endif() -if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) +if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLE_NAME umf_example_level_zero_shared_memory) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS level_zero_shared_memory/level_zero_shared_memory.c common/examples_level_zero_helpers.c - LIBS disjoint_pool ze_loader umf) + LIBS ze_loader umf) target_include_directories( ${EXAMPLE_NAME} @@ -84,12 +82,11 @@ if(UMF_BUILD_GPU_EXAMPLES endif() else() message(STATUS "GPU Level Zero shared memory example requires " - "UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and " - "UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping") + "UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER " + "to be turned ON - skipping") endif() if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_CUDA_PROVIDER AND UMF_CUDA_ENABLED) set(EXAMPLE_NAME umf_example_cuda_shared_memory) @@ -97,7 +94,7 @@ if(UMF_BUILD_GPU_EXAMPLES add_umf_executable( NAME ${EXAMPLE_NAME} SRCS cuda_shared_memory/cuda_shared_memory.c - LIBS disjoint_pool cuda umf) + LIBS cuda umf) target_include_directories( ${EXAMPLE_NAME} @@ -123,14 +120,13 @@ if(UMF_BUILD_GPU_EXAMPLES else() message( STATUS - "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_CUDA_PROVIDER, UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON and installed CUDA libraries - skipping" + "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_CUDA_PROVIDER to be turned ON and installed CUDA libraries - skipping" ) endif() # TODO: it looks like there is some problem with IPC implementation in Level # Zero on windows if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_LEVEL_ZERO_PROVIDER AND LINUX) set(EXAMPLE_NAME umf_example_ipc_level_zero) @@ -139,7 +135,7 @@ if(UMF_BUILD_GPU_EXAMPLES NAME ${EXAMPLE_NAME} SRCS ipc_level_zero/ipc_level_zero.c common/examples_level_zero_helpers.c - LIBS disjoint_pool ze_loader umf) + LIBS ze_loader umf) target_include_directories( ${EXAMPLE_NAME} @@ -174,7 +170,7 @@ if(UMF_BUILD_GPU_EXAMPLES else() message( STATUS - "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" + "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping" ) endif() diff --git a/examples/README.md b/examples/README.md index e7823347e..70d114a63 100644 --- a/examples/README.md +++ b/examples/README.md @@ -24,7 +24,7 @@ cleans up and exits with an error status. ### Requirements * Level Zero headers and libraries * compatible GPU with installed driver -* set UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LIBUMF_POOL_DISJOINT and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON +* set UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON ## IPC example with Level Zero memory provider This example demonstrates how to use UMF IPC API. The example creates two @@ -35,7 +35,7 @@ and build this example Level Zero development package should be installed. ### Requirements * Level Zero headers and libraries * compatible GPU with installed driver -* set UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LIBUMF_POOL_DISJOINT and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON +* set UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON ## IPC example with shared memory This example also demonstrates how to use UMF IPC API. The example creates two diff --git a/examples/cuda_shared_memory/CMakeLists.txt b/examples/cuda_shared_memory/CMakeLists.txt index dd8567c14..0e57ec607 100644 --- a/examples/cuda_shared_memory/CMakeLists.txt +++ b/examples/cuda_shared_memory/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -55,9 +55,8 @@ target_link_directories( ${LIBHWLOC_LIBRARY_DIRS} ${CUDA_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries( - ${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a ${CUDA_LIBRARIES} - ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ${CUDA_LIBRARIES} + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( diff --git a/examples/ipc_level_zero/CMakeLists.txt b/examples/ipc_level_zero/CMakeLists.txt index 273a88bb0..d672d3e92 100644 --- a/examples/ipc_level_zero/CMakeLists.txt +++ b/examples/ipc_level_zero/CMakeLists.txt @@ -53,8 +53,8 @@ target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a - ze_loader ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ze_loader + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( diff --git a/examples/level_zero_shared_memory/CMakeLists.txt b/examples/level_zero_shared_memory/CMakeLists.txt index d05072ca2..f4aaf09e9 100644 --- a/examples/level_zero_shared_memory/CMakeLists.txt +++ b/examples/level_zero_shared_memory/CMakeLists.txt @@ -53,8 +53,8 @@ target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a - ze_loader ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ze_loader + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( diff --git a/scripts/qemu/run-build.sh b/scripts/qemu/run-build.sh index c6314153c..724e6d7ff 100755 --- a/scripts/qemu/run-build.sh +++ b/scripts/qemu/run-build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -26,7 +26,6 @@ cmake .. \ -DUMF_BUILD_CUDA_PROVIDER=ON \ -DUMF_FORMAT_CODE_STYLE=OFF \ -DUMF_DEVELOPER_MODE=ON \ - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON \ -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON \ -DUMF_BUILD_EXAMPLES=ON \ -DUMF_USE_COVERAGE=${COVERAGE} \ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c0072be7e..49fa2c5d8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -85,8 +85,9 @@ set(UMF_SOURCES provider/provider_tracking.c critnib/critnib.c ravl/ravl.c - pool/pool_proxy.c + pool/pool_disjoint.c pool/pool_jemalloc.c + pool/pool_proxy.c pool/pool_scalable.c) if(UMF_POOL_JEMALLOC_ENABLED) diff --git a/src/base_alloc/base_alloc_global.c b/src/base_alloc/base_alloc_global.c index 2aca5d29c..f3b61566a 100644 --- a/src/base_alloc/base_alloc_global.c +++ b/src/base_alloc/base_alloc_global.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -23,6 +23,7 @@ // global base allocator used by all providers and pools static UTIL_ONCE_FLAG ba_is_initialized = UTIL_ONCE_FLAG_INIT; +static bool ba_is_destroyed = false; #define ALLOC_METADATA_SIZE (sizeof(size_t)) @@ -40,6 +41,8 @@ struct base_alloc_t { static struct base_alloc_t BASE_ALLOC = {.ac_sizes = ALLOCATION_CLASSES}; void umf_ba_destroy_global(void) { + ba_is_destroyed = true; + for (int i = 0; i < NUM_ALLOCATION_CLASSES; i++) { if (BASE_ALLOC.ac[i]) { umf_ba_destroy(BASE_ALLOC.ac[i]); @@ -48,10 +51,12 @@ void umf_ba_destroy_global(void) { } // portable version of "ba_is_initialized = UTIL_ONCE_FLAG_INIT;" - static UTIL_ONCE_FLAG is_initialized = UTIL_ONCE_FLAG_INIT; - memcpy(&ba_is_initialized, &is_initialized, sizeof(ba_is_initialized)); + static UTIL_ONCE_FLAG set_once = UTIL_ONCE_FLAG_INIT; + memcpy(&ba_is_initialized, &set_once, sizeof(ba_is_initialized)); } +bool umf_ba_global_is_destroyed(void) { return ba_is_destroyed; } + static void umf_ba_create_global(void) { for (int i = 0; i < NUM_ALLOCATION_CLASSES; i++) { // allocation classes need to be powers of 2 @@ -202,6 +207,12 @@ void umf_ba_global_free(void *ptr) { return; } + if (ba_is_destroyed) { + LOG_WARN( + "base_alloc: calling free() after the base allocator is destroyed"); + return; + } + size_t total_size; ptr = get_original_alloc(ptr, &total_size, NULL); diff --git a/src/base_alloc/base_alloc_global.h b/src/base_alloc/base_alloc_global.h index ad7f12ce5..bd55d352f 100644 --- a/src/base_alloc/base_alloc_global.h +++ b/src/base_alloc/base_alloc_global.h @@ -8,6 +8,8 @@ #ifndef UMF_BASE_ALLOC_GLOBAL_H #define UMF_BASE_ALLOC_GLOBAL_H 1 +#include + #include "base_alloc.h" #ifdef __cplusplus @@ -17,6 +19,7 @@ extern "C" { void *umf_ba_global_alloc(size_t size); void umf_ba_global_free(void *ptr); void umf_ba_destroy_global(void); +bool umf_ba_global_is_destroyed(void); size_t umf_ba_global_malloc_usable_size(void *ptr); void *umf_ba_global_aligned_alloc(size_t size, size_t alignment); diff --git a/src/libumf.def b/src/libumf.def index 98226dace..ce8820a8f 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -119,6 +119,18 @@ EXPORTS umfScalablePoolParamsSetKeepAllMemory ; Added in UMF_0.11 umfCUDAMemoryProviderParamsSetAllocFlags + umfDisjointPoolOps + umfDisjointPoolParamsCreate + umfDisjointPoolParamsDestroy + umfDisjointPoolParamsSetCapacity + umfDisjointPoolParamsSetMaxPoolableSize + umfDisjointPoolParamsSetMinBucketSize + umfDisjointPoolParamsSetName + umfDisjointPoolParamsSetSharedLimits + umfDisjointPoolParamsSetSlabMinSize + umfDisjointPoolParamsSetTrace + umfDisjointPoolSharedLimitsCreate + umfDisjointPoolSharedLimitsDestroy umfFixedMemoryProviderOps umfFixedMemoryProviderParamsCreate umfFixedMemoryProviderParamsDestroy diff --git a/src/libumf.map b/src/libumf.map index bbf664dcf..6582fd0f8 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -117,6 +117,18 @@ UMF_0.10 { UMF_0.11 { umfCUDAMemoryProviderParamsSetAllocFlags; + umfDisjointPoolOps; + umfDisjointPoolParamsCreate; + umfDisjointPoolParamsDestroy; + umfDisjointPoolParamsSetCapacity; + umfDisjointPoolParamsSetMaxPoolableSize; + umfDisjointPoolParamsSetMinBucketSize; + umfDisjointPoolParamsSetName; + umfDisjointPoolParamsSetSharedLimits; + umfDisjointPoolParamsSetSlabMinSize; + umfDisjointPoolParamsSetTrace; + umfDisjointPoolSharedLimitsCreate; + umfDisjointPoolSharedLimitsDestroy; umfFixedMemoryProviderOps; umfFixedMemoryProviderParamsCreate; umfFixedMemoryProviderParamsDestroy; diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt index f54e70185..22aeab783 100644 --- a/src/pool/CMakeLists.txt +++ b/src/pool/CMakeLists.txt @@ -8,33 +8,3 @@ if(UMF_BUILD_SHARED_LIBRARY) endif() set(POOL_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS}) - -# libumf_pool_disjoint -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - add_umf_library( - NAME disjoint_pool - TYPE STATIC - SRCS pool_disjoint.cpp ${POOL_EXTRA_SRCS} - LIBS ${POOL_EXTRA_LIBS}) - - target_compile_definitions(disjoint_pool - PRIVATE ${POOL_COMPILE_DEFINITIONS}) - - if(WINDOWS) - target_compile_options(disjoint_pool PRIVATE /DWIN32_LEAN_AND_MEAN - /DNOMINMAX) - endif() - - add_library(${PROJECT_NAME}::disjoint_pool ALIAS disjoint_pool) - - add_dependencies(disjoint_pool umf) - - target_link_libraries(disjoint_pool PRIVATE umf) - - target_include_directories( - disjoint_pool - PUBLIC $ - $) - - install(TARGETS disjoint_pool EXPORT ${PROJECT_NAME}-targets) -endif() diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c new file mode 100644 index 000000000..ef7b3875d --- /dev/null +++ b/src/pool/pool_disjoint.c @@ -0,0 +1,1029 @@ +/* + * Copyright (C) 2022-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include "pool_disjoint_internal.h" + +// Temporary solution for disabling memory poisoning. This is needed because +// AddressSanitizer does not support memory poisoning for GPU allocations. +// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634 +#ifndef POISON_MEMORY +#undef __SANITIZE_ADDRESS__ +#endif +#include "utils_sanitizers.h" + +// Forward declarations +static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool); +static bool bucket_can_pool(bucket_t *bucket); +static void bucket_decrement_pool(bucket_t *bucket); +static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, + bool *from_pool); + +static __TLS umf_result_t TLS_last_allocation_error; + +// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is +// requested. The implementation distinguishes between allocations of size +// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger. +// Allocation requests smaller than ChunkCutoff use chunks taken from a single +// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation +// size, and 8-byte allocations, only 1 in ~8000 requests results in a new +// coarse-grain allocation. Freeing results only in a chunk of a larger +// allocation to be marked as available and no real return to the system. An +// allocation is returned to the system only when all chunks in the larger +// allocation are freed by the program. Allocations larger than ChunkCutOff use +// a separate coarse-grain allocation for each request. These are subject to +// "pooling". That is, when such an allocation is freed by the program it is +// retained in a pool. The pool is available for future allocations, which means +// there are fewer actual coarse-grain allocations/deallocations. + +// The largest size which is allocated via the allocator. +// Allocations with size > CutOff bypass the pool and +// go directly to the provider. +static size_t CutOff = (size_t)1 << 31; // 2GB + +static size_t bucket_slab_min_size(bucket_t *bucket) { + return bucket->pool->params.slab_min_size; +} + +static size_t bucket_slab_alloc_size(bucket_t *bucket) { + return utils_max(bucket->size, bucket_slab_min_size(bucket)); +} + +static slab_t *create_slab(bucket_t *bucket) { + assert(bucket); + + umf_result_t res = UMF_RESULT_SUCCESS; + umf_memory_provider_handle_t provider = bucket->pool->provider; + + slab_t *slab = umf_ba_global_alloc(sizeof(*slab)); + if (slab == NULL) { + LOG_ERR("allocation of new slab failed!"); + return NULL; + } + + slab->num_chunks_allocated = 0; + slab->first_free_chunk_idx = 0; + slab->bucket = bucket; + + slab->iter.val = slab; + slab->iter.prev = slab->iter.next = NULL; + + slab->num_chunks_total = + utils_max(bucket_slab_min_size(bucket) / bucket->size, 1); + slab->chunks = + umf_ba_global_alloc(sizeof(*slab->chunks) * slab->num_chunks_total); + if (slab->chunks == NULL) { + LOG_ERR("allocation of slab chunks failed!"); + goto free_slab; + } + memset(slab->chunks, 0, sizeof(*slab->chunks) * slab->num_chunks_total); + + // if slab_min_size is not a multiple of bucket size, we would have some + // padding at the end of the slab + slab->slab_size = bucket_slab_alloc_size(bucket); + + // TODO not true + // NOTE: originally slabs memory were allocated without alignment + // with this registering a slab is simpler and doesn't require multimap + res = umfMemoryProviderAlloc(provider, slab->slab_size, 0, &slab->mem_ptr); + if (res != UMF_RESULT_SUCCESS) { + LOG_ERR("allocation of slab data failed!"); + goto free_slab_chunks; + } + + // TODO + // ASSERT_IS_ALIGNED((uintptr_t)slab->mem_ptr, bucket->size); + + // raw allocation is not available for user so mark it as inaccessible + utils_annotate_memory_inaccessible(slab->mem_ptr, slab->slab_size); + + LOG_DEBUG("bucket: %p, slab_size: %zu", (void *)bucket, slab->slab_size); + return slab; + +free_slab_chunks: + umf_ba_global_free(slab->chunks); + +free_slab: + umf_ba_global_free(slab); + return NULL; +} + +static void destroy_slab(slab_t *slab) { + LOG_DEBUG("bucket: %p, slab_size: %zu", (void *)slab->bucket, + slab->slab_size); + + umf_memory_provider_handle_t provider = slab->bucket->pool->provider; + umf_result_t res = + umfMemoryProviderFree(provider, slab->mem_ptr, slab->slab_size); + if (res != UMF_RESULT_SUCCESS) { + LOG_ERR("deallocation of slab data failed!"); + } + + umf_ba_global_free(slab->chunks); + umf_ba_global_free(slab); +} + +// return the index of the first available chunk, SIZE_MAX otherwise +static size_t slab_find_first_available_chunk_idx(const slab_t *slab) { + // use the first free chunk index as a hint for the search + for (bool *chunk = slab->chunks + slab->first_free_chunk_idx; + chunk != slab->chunks + slab->num_chunks_total; chunk++) { + + // false means not used + if (*chunk == false) { + size_t idx = chunk - slab->chunks; + LOG_DEBUG("idx: %zu", idx); + return idx; + } + } + + LOG_DEBUG("idx: SIZE_MAX"); + return SIZE_MAX; +} + +static void *slab_get_chunk(slab_t *slab) { + // free chunk must exist, otherwise we would have allocated another slab + const size_t chunk_idx = slab_find_first_available_chunk_idx(slab); + assert(chunk_idx != SIZE_MAX); + + void *free_chunk = + (void *)((uintptr_t)slab->mem_ptr + chunk_idx * slab->bucket->size); + + // mark chunk as used + slab->chunks[chunk_idx] = true; + slab->num_chunks_allocated += 1; + + // use the found index as the next hint + slab->first_free_chunk_idx = chunk_idx + 1; + + return free_chunk; +} + +static void *slab_get(const slab_t *slab) { return slab->mem_ptr; } +static void *slab_get_end(const slab_t *slab) { + return (void *)((uintptr_t)slab->mem_ptr + + bucket_slab_min_size(slab->bucket)); +} + +static void slab_free_chunk(slab_t *slab, void *ptr) { + // This method should be called through bucket (since we might remove the + // slab as a result), therefore all locks are done on bucket level. + + // Make sure that we're in the right slab + assert(ptr >= slab_get(slab) && ptr < slab_get_end(slab)); + + // Even if the pointer p was previously aligned, it's still inside the + // corresponding chunk, so we get the correct index here. + size_t chunk_idx = + ((uintptr_t)ptr - (uintptr_t)slab->mem_ptr) / slab->bucket->size; + + // Make sure that the chunk was allocated + assert(slab->chunks[chunk_idx] && "double free detected"); + slab->chunks[chunk_idx] = false; + slab->num_chunks_allocated -= 1; + + if (chunk_idx < slab->first_free_chunk_idx) { + slab->first_free_chunk_idx = chunk_idx; + } + + LOG_DEBUG("chunk_idx: %zu, num_chunks_allocated: %zu, " + "first_free_chunk_idx: %zu", + chunk_idx, slab->num_chunks_allocated, + slab->first_free_chunk_idx); +} + +static bool slab_has_avail(const slab_t *slab) { + return slab->num_chunks_allocated < slab->num_chunks_total; +} + +static umf_result_t pool_register_slab(disjoint_pool_t *pool, slab_t *slab) { + critnib *slabs = pool->known_slabs; + + // NOTE: changed vs original DisjointPool implementation - currently slab + // is already aligned to bucket size. + void *slab_addr = slab_get(slab); + // TODO ASSERT_IS_ALIGNED((uintptr_t)slab_addr, bucket->size); + LOG_DEBUG("slab: %p, start: %p", (void *)slab, slab_addr); + + // NOTE: we don't need to lock the slabs map as the critnib already has a + // lock inside it + int ret = critnib_insert(slabs, (uintptr_t)slab_addr, slab, 0); + umf_result_t res = UMF_RESULT_SUCCESS; + if (ret == ENOMEM) { + LOG_ERR("register failed because of out of memory!"); + res = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } else if (ret == EEXIST) { + LOG_ERR("register failed because the address is already registered!"); + res = UMF_RESULT_ERROR_UNKNOWN; + } + + return res; +} + +static umf_result_t pool_unregister_slab(disjoint_pool_t *pool, slab_t *slab) { + critnib *slabs = pool->known_slabs; + + void *slab_addr = slab_get(slab); + // TODO ASSERT_IS_ALIGNED((uintptr_t)slab_addr, bucket->size); + LOG_DEBUG("slab: %p, start: %p", (void *)slab, slab_addr); + + critnib_remove(slabs, (uintptr_t)slab_addr); + + return UMF_RESULT_SUCCESS; +} + +static bucket_t * +create_bucket(size_t sz, disjoint_pool_t *pool, + umf_disjoint_pool_shared_limits_handle_t shared_limits) { + + bucket_t *bucket = umf_ba_global_alloc(sizeof(*bucket)); + if (bucket == NULL) { + LOG_ERR("allocation of new bucket failed!"); + return NULL; + } + + memset(bucket, 0, sizeof(*bucket)); + bucket->size = sz; + bucket->pool = pool; + bucket->shared_limits = shared_limits; + + utils_mutex_init(&bucket->bucket_lock); + return bucket; +} + +static void destroy_bucket(bucket_t *bucket) { + // use an extra tmp to store the next iterator before destroying the slab + slab_list_item_t *it = NULL, *tmp = NULL; + LL_FOREACH_SAFE(bucket->available_slabs, it, tmp) { + LL_DELETE(bucket->available_slabs, it); + destroy_slab(it->val); + } + + LL_FOREACH_SAFE(bucket->unavailable_slabs, it, tmp) { + LL_DELETE(bucket->unavailable_slabs, it); + destroy_slab(it->val); + } + + utils_mutex_destroy_not_free(&bucket->bucket_lock); + umf_ba_global_free(bucket); +} + +static size_t slab_get_num_free_chunks(const slab_t *slab) { + return slab->num_chunks_total - slab->num_chunks_allocated; +} + +// NOTE: this function must be called under bucket->bucket_lock +static void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, + bool *to_pool) { + slab_free_chunk(slab, ptr); + + // in case if the slab was previously full and now has single available + // chunk, it should be moved to the list of available slabs + if (slab_get_num_free_chunks(slab) == 1) { + slab_list_item_t *slab_it = &slab->iter; + assert(slab_it->val != NULL); + DL_DELETE(bucket->unavailable_slabs, slab_it); + DL_PREPEND(bucket->available_slabs, slab_it); + bucket->available_slabs_num++; + } + + // check if slab is empty, and pool it if we can + if (slab->num_chunks_allocated == 0) { + // The slab is now empty. + // If the pool has capacity then put the slab in the pool. + // The to_pool parameter indicates whether the slab will be put in the + // pool or freed. + *to_pool = bucket_can_pool(bucket); + if (*to_pool == false) { + // remove slab + slab_list_item_t *slab_it = &slab->iter; + assert(slab_it->val != NULL); + pool_unregister_slab(bucket->pool, slab_it->val); + DL_DELETE(bucket->available_slabs, slab_it); + bucket->available_slabs_num--; + destroy_slab(slab_it->val); + } + } else { + // return this chunk to the pool + *to_pool = true; + } +} + +// NOTE: this function must be called under bucket->bucket_lock +static void *bucket_get_free_chunk(bucket_t *bucket, bool *from_pool) { + slab_list_item_t *slab_it = bucket_get_avail_slab(bucket, from_pool); + if (slab_it == NULL) { + return NULL; + } + + void *free_chunk = slab_get_chunk(slab_it->val); + + // if we allocated last free chunk from the slab and now it is full, move + // it to unavailable slabs and update its iterator + if (!(slab_has_avail(slab_it->val))) { + DL_DELETE(bucket->available_slabs, slab_it); + bucket->available_slabs_num--; + slab_it->prev = NULL; + DL_PREPEND(bucket->unavailable_slabs, slab_it); + } + + return free_chunk; +} + +static size_t bucket_chunk_cut_off(bucket_t *bucket) { + return bucket_slab_min_size(bucket) / 2; +} + +static slab_t *bucket_create_slab(bucket_t *bucket) { + slab_t *slab = create_slab(bucket); + if (slab == NULL) { + LOG_ERR("create_slab failed!") + return NULL; + } + + umf_result_t res = pool_register_slab(bucket->pool, slab); + if (res != UMF_RESULT_SUCCESS) { + LOG_ERR("slab_reg failed!") + destroy_slab(slab); + return NULL; + } + + DL_PREPEND(bucket->available_slabs, &slab->iter); + bucket->available_slabs_num++; + bucket_update_stats(bucket, 1, 0); + + return slab; +} + +static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, + bool *from_pool) { + if (bucket->available_slabs == NULL) { + bucket_create_slab(bucket); + *from_pool = false; + } else { + slab_t *slab = bucket->available_slabs->val; + // Allocation from existing slab is treated as from pool for statistics. + *from_pool = true; + if (slab->num_chunks_allocated == 0) { + // If this was an empty slab, it was in the pool. + // Now it is no longer in the pool, so update count. + --bucket->chunked_slabs_in_pool; + bucket_decrement_pool(bucket); + } + } + + return bucket->available_slabs; +} + +static size_t bucket_max_pooled_slabs(bucket_t *bucket) { + // For small buckets where slabs are split to chunks, just one pooled slab is sufficient. + // For larger buckets, the capacity could be more and is adjustable. + if (bucket->size <= bucket_chunk_cut_off(bucket)) { + return 1; + } else { + return bucket->pool->params.capacity; + } +} + +static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) { + if (bucket->pool->params.pool_trace == 0) { + return; + } + + bucket->curr_slabs_in_use += in_use; + bucket->max_slabs_in_use = + utils_max(bucket->curr_slabs_in_use, bucket->max_slabs_in_use); + + bucket->curr_slabs_in_pool += in_pool; + bucket->max_slabs_in_pool = + utils_max(bucket->curr_slabs_in_pool, bucket->max_slabs_in_pool); + + // Increment or decrement current pool sizes based on whether + // slab was added to or removed from pool. + bucket->pool->params.cur_pool_size += + in_pool * bucket_slab_alloc_size(bucket); +} + +static void bucket_decrement_pool(bucket_t *bucket) { + bucket_update_stats(bucket, 1, -1); + utils_fetch_and_add64(&bucket->shared_limits->total_size, + -(long long)bucket_slab_alloc_size(bucket)); +} + +static bool bucket_can_pool(bucket_t *bucket) { + size_t new_free_slabs_in_bucket; + + new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1; + + // we keep at most params.capacity slabs in the pool + if (bucket_max_pooled_slabs(bucket) >= new_free_slabs_in_bucket) { + size_t pool_size = 0; + utils_atomic_load_acquire(&bucket->shared_limits->total_size, + &pool_size); + while (true) { + size_t new_pool_size = pool_size + bucket_slab_alloc_size(bucket); + + if (bucket->shared_limits->max_size < new_pool_size) { + break; + } + + if (utils_compare_exchange(&bucket->shared_limits->total_size, + &pool_size, &new_pool_size)) { + ++bucket->chunked_slabs_in_pool; + + bucket_update_stats(bucket, -1, 1); + return true; + } + } + } + + bucket_update_stats(bucket, -1, 0); + return false; +} + +static size_t size_to_idx(disjoint_pool_t *pool, size_t size) { + assert(size <= CutOff && "Unexpected size"); + assert(size > 0 && "Unexpected size"); + + size_t min_bucket_size = (size_t)1 << pool->min_bucket_size_exp; + if (size < min_bucket_size) { + return 0; + } + + // get the position of the leftmost set bit + size_t position = getLeftmostSetBitPos(size); + + bool is_power_of_2 = 0 == (size & (size - 1)); + bool larger_than_halfway_between_powers_of_2 = + !is_power_of_2 && + (bool)((size - 1) & ((uint64_t)(1) << (position - 1))); + size_t index = (position - pool->min_bucket_size_exp) * 2 + + (int)(!is_power_of_2) + + (int)larger_than_halfway_between_powers_of_2; + + return index; +} + +static umf_disjoint_pool_shared_limits_t * +disjoint_pool_get_limits(disjoint_pool_t *pool) { + if (pool->params.shared_limits) { + return pool->params.shared_limits; + } else { + return pool->default_shared_limits; + } +} + +static bucket_t *disjoint_pool_find_bucket(disjoint_pool_t *pool, size_t size) { + size_t calculated_idx = size_to_idx(pool, size); + return pool->buckets[calculated_idx]; +} + +static void disjoint_pool_print_stats(disjoint_pool_t *pool) { + size_t high_bucket_size = 0; + size_t high_peak_slabs_in_use = 0; + const char *name = pool->params.name; + + LOG_DEBUG("\"%s\" pool memory statistics", name); + LOG_DEBUG("%14s %12s %12s %18s %20s %21s", "Bucket Size", "Allocs", "Frees", + "Allocs from Pool", "Peak Slabs in Use", "Peak Slabs in Pool"); + + for (size_t i = 0; i < pool->buckets_num; i++) { + bucket_t *bucket = pool->buckets[i]; + if (bucket->alloc_count) { + LOG_DEBUG("%14zu %12zu %12zu %18zu %20zu %21zu", bucket->size, + bucket->alloc_count, bucket->free_count, + bucket->alloc_pool_count, bucket->max_slabs_in_use, + bucket->max_slabs_in_pool); + high_bucket_size = + utils_max(bucket_slab_alloc_size(bucket), high_bucket_size); + } + high_peak_slabs_in_use = + utils_max(bucket->max_slabs_in_use, high_peak_slabs_in_use); + } + + LOG_DEBUG("current pool size: %zu", + disjoint_pool_get_limits(pool)->total_size); + LOG_DEBUG("suggested setting=;%c%s:%zu,%zu,64K", (char)tolower(name[0]), + (name + 1), high_bucket_size, high_peak_slabs_in_use); +} + +static void *disjoint_pool_allocate(disjoint_pool_t *pool, size_t size) { + if (size == 0) { + return NULL; + } + + void *ptr = NULL; + + if (size > pool->params.max_poolable_size) { + umf_result_t ret = + umfMemoryProviderAlloc(pool->provider, size, 0, &ptr); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("allocation from the memory provider failed"); + return NULL; + } + + utils_annotate_memory_undefined(ptr, size); + return ptr; + } + + bucket_t *bucket = disjoint_pool_find_bucket(pool, size); + + utils_mutex_lock(&bucket->bucket_lock); + + bool from_pool = false; + ptr = bucket_get_free_chunk(bucket, &from_pool); + + if (ptr == NULL) { + TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + utils_mutex_unlock(&bucket->bucket_lock); + return NULL; + } + + if (pool->params.pool_trace > 1) { + // update stats + ++bucket->alloc_count; + if (from_pool) { + ++bucket->alloc_pool_count; + } + } + + utils_mutex_unlock(&bucket->bucket_lock); + + if (pool->params.pool_trace > 2) { + LOG_DEBUG("Allocated %8zu %s bytes from %s -> %p", size, + pool->params.name, (from_pool ? "pool" : "provider"), ptr); + } + + VALGRIND_DO_MEMPOOL_ALLOC(pool, ptr, size); + utils_annotate_memory_undefined(ptr, bucket->size); + return ptr; +} + +umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, + void *params, void **ppPool) { + // TODO set defaults when user pass the NULL as params + if (!provider || !params || !ppPool) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + disjoint_pool_t *disjoint_pool = + umf_ba_global_alloc(sizeof(*disjoint_pool)); + if (!disjoint_pool) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + umf_disjoint_pool_params_t *dp_params = + (umf_disjoint_pool_params_t *)params; + + // min_bucket_size parameter must be a power of 2 for bucket sizes + // to generate correctly. + if (!dp_params->min_bucket_size || + !IS_POWER_OF_2(dp_params->min_bucket_size)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + VALGRIND_DO_CREATE_MEMPOOL(disjoint_pool, 0, 0); + + disjoint_pool->provider = provider; + disjoint_pool->params = *dp_params; + + disjoint_pool->known_slabs = critnib_new(); + + // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. + // Powers of 2 and the value halfway between the powers of 2. + size_t Size1 = disjoint_pool->params.min_bucket_size; + + // min_bucket_size cannot be larger than CutOff. + Size1 = utils_min(Size1, CutOff); + + // Buckets sized smaller than the bucket default size- 8 aren't needed. + Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE); + + // Calculate the exponent for min_bucket_size used for finding buckets. + disjoint_pool->min_bucket_size_exp = (size_t)log2Utils(Size1); + disjoint_pool->default_shared_limits = + umfDisjointPoolSharedLimitsCreate(SIZE_MAX); + + // count number of buckets, start from 1 + disjoint_pool->buckets_num = 1; + size_t Size2 = Size1 + Size1 / 2; + size_t ts2 = Size2, ts1 = Size1; + for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { + disjoint_pool->buckets_num += 2; + } + disjoint_pool->buckets = umf_ba_global_alloc( + sizeof(*disjoint_pool->buckets) * disjoint_pool->buckets_num); + + int i = 0; + Size1 = ts1; + Size2 = ts2; + for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2, i += 2) { + disjoint_pool->buckets[i] = create_bucket( + Size1, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + disjoint_pool->buckets[i + 1] = create_bucket( + Size2, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + } + disjoint_pool->buckets[i] = create_bucket( + CutOff, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + + umf_result_t ret = umfMemoryProviderGetMinPageSize( + provider, NULL, &disjoint_pool->provider_min_page_size); + if (ret != UMF_RESULT_SUCCESS) { + disjoint_pool->provider_min_page_size = 0; + } + + *ppPool = (void *)disjoint_pool; + + return UMF_RESULT_SUCCESS; +} + +void *disjoint_pool_malloc(void *pool, size_t size) { + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + void *ptr = disjoint_pool_allocate(hPool, size); + + return ptr; +} + +void *disjoint_pool_calloc(void *pool, size_t num, size_t size) { + (void)pool; + (void)num; + (void)size; + + // Not supported + TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED; + return NULL; +} + +void *disjoint_pool_realloc(void *pool, void *ptr, size_t size) { + (void)pool; + (void)ptr; + (void)size; + + // Not supported + TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED; + return NULL; +} + +void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) { + disjoint_pool_t *disjoint_pool = (disjoint_pool_t *)pool; + + void *ptr = NULL; + + if (size == 0) { + return NULL; + } + + if (alignment <= 1) { + return disjoint_pool_allocate(pool, size); + } + + size_t aligned_size; + if (alignment <= disjoint_pool->provider_min_page_size) { + // This allocation will be served from a Bucket which size is multiple + // of Alignment and Slab address is aligned to provider_min_page_size + // so the address will be properly aligned. + aligned_size = (size > 1) ? ALIGN_UP_SAFE(size, alignment) : alignment; + } else { + // Slabs are only aligned to provider_min_page_size, we need to compensate + // for that in case the allocation is within pooling limit. + // TODO: consider creating properly-aligned Slabs on demand + aligned_size = size + alignment - 1; + } + + // Check if requested allocation size is within pooling limit. + // If not, just request aligned pointer from the system. + if (aligned_size > disjoint_pool->params.max_poolable_size) { + + umf_result_t ret = umfMemoryProviderAlloc(disjoint_pool->provider, size, + alignment, &ptr); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("allocation from the memory provider failed"); + return NULL; + } + + assert(ptr); + utils_annotate_memory_undefined(ptr, size); + return ptr; + } + + bool from_pool = false; + bucket_t *bucket = disjoint_pool_find_bucket(pool, aligned_size); + + utils_mutex_lock(&bucket->bucket_lock); + + ptr = bucket_get_free_chunk(bucket, &from_pool); + + if (ptr == NULL) { + TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + utils_mutex_unlock(&bucket->bucket_lock); + return NULL; + } + + if (disjoint_pool->params.pool_trace > 1) { + // update stats + ++bucket->alloc_count; + if (from_pool) { + ++bucket->alloc_pool_count; + } + } + + utils_mutex_unlock(&bucket->bucket_lock); + + if (disjoint_pool->params.pool_trace > 2) { + LOG_DEBUG("Allocated %8zu %s bytes aligned at %zu from %s -> %p", size, + disjoint_pool->params.name, alignment, + (from_pool ? "pool" : "provider"), ptr); + } + + void *aligned_ptr = (void *)ALIGN_UP_SAFE((size_t)ptr, alignment); + VALGRIND_DO_MEMPOOL_ALLOC(disjoint_pool, aligned_ptr, size); + utils_annotate_memory_undefined(aligned_ptr, size); + return aligned_ptr; +} + +size_t disjoint_pool_malloc_usable_size(void *pool, void *ptr) { + (void)pool; + (void)ptr; + + // Not supported + return 0; +} + +umf_result_t disjoint_pool_free(void *pool, void *ptr) { + disjoint_pool_t *disjoint_pool = (disjoint_pool_t *)pool; + if (ptr == NULL) { + return UMF_RESULT_SUCCESS; + } + + // check if given pointer is allocated inside any Disjoint Pool slab + slab_t *slab = + (slab_t *)critnib_find_le(disjoint_pool->known_slabs, (uintptr_t)ptr); + + if (slab == NULL || ptr >= slab_get_end(slab)) { + + // regular free + umf_alloc_info_t allocInfo = {NULL, 0, NULL}; + umf_result_t ret = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("failed to get allocation info from the memory tracker"); + return ret; + } + + size_t size = allocInfo.baseSize; + umf_memory_provider_handle_t provider = disjoint_pool->provider; + ret = umfMemoryProviderFree(provider, ptr, size); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("deallocation from the memory provider failed"); + } + + return ret; + } + + bool to_pool = false; + + if (ptr < slab_get(slab) || ptr >= slab_get_end(slab)) { + assert(0); + return UMF_RESULT_ERROR_UNKNOWN; + } + + // The slab object won't be deleted until it's removed from the map which is + // protected by the lock, so it's safe to access it here. + + bucket_t *bucket = slab->bucket; + + VALGRIND_DO_MEMPOOL_FREE(pool, ptr); + utils_mutex_lock(&bucket->bucket_lock); + + utils_annotate_memory_inaccessible(ptr, bucket->size); + bucket_free_chunk(bucket, ptr, slab, &to_pool); + + if (disjoint_pool->params.pool_trace > 1) { + bucket->free_count++; + } + + utils_mutex_unlock(&bucket->bucket_lock); + + if (disjoint_pool->params.pool_trace > 2) { + const char *name = disjoint_pool->params.name; + LOG_DEBUG("freed %s %p to %s, current total pool size: %zu, current " + "pool size for %s: %zu", + name, ptr, (to_pool ? "pool" : "provider"), + disjoint_pool_get_limits(disjoint_pool)->total_size, name, + disjoint_pool->params.cur_pool_size); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t disjoint_pool_get_last_allocation_error(void *pool) { + (void)pool; + + return TLS_last_allocation_error; +} + +// Define destructor for use with unique_ptr +void disjoint_pool_finalize(void *pool) { + + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + + if (hPool->params.pool_trace > 1) { + disjoint_pool_print_stats(hPool); + } + + for (size_t i = 0; i < hPool->buckets_num; i++) { + destroy_bucket(hPool->buckets[i]); + } + + VALGRIND_DO_DESTROY_MEMPOOL(hPool); + + umfDisjointPoolSharedLimitsDestroy(hPool->default_shared_limits); + critnib_delete(hPool->known_slabs); + + umf_ba_global_free(hPool); +} + +static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = { + .version = UMF_VERSION_CURRENT, + .initialize = disjoint_pool_initialize, + .finalize = disjoint_pool_finalize, + .malloc = disjoint_pool_malloc, + .calloc = disjoint_pool_calloc, + .realloc = disjoint_pool_realloc, + .aligned_malloc = disjoint_pool_aligned_malloc, + .malloc_usable_size = disjoint_pool_malloc_usable_size, + .free = disjoint_pool_free, + .get_last_allocation_error = disjoint_pool_get_last_allocation_error, +}; + +umf_memory_pool_ops_t *umfDisjointPoolOps(void) { + return &UMF_DISJOINT_POOL_OPS; +} + +umf_disjoint_pool_shared_limits_t * +umfDisjointPoolSharedLimitsCreate(size_t max_size) { + umf_disjoint_pool_shared_limits_t *ptr = umf_ba_global_alloc(sizeof(*ptr)); + if (ptr == NULL) { + LOG_ERR("cannot allocate memory for disjoint pool shared limits"); + return NULL; + } + ptr->max_size = max_size; + ptr->total_size = 0; + return ptr; +} + +void umfDisjointPoolSharedLimitsDestroy( + umf_disjoint_pool_shared_limits_t *limits) { + umf_ba_global_free(limits); +} + +umf_result_t +umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams) { + static const char *DEFAULT_NAME = "disjoint_pool"; + + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_disjoint_pool_params_handle_t params = + umf_ba_global_alloc(sizeof(*params)); + if (params == NULL) { + LOG_ERR("cannot allocate memory for disjoint pool params"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + params->slab_min_size = 0; + params->max_poolable_size = 0; + params->capacity = 0; + params->min_bucket_size = UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE; + params->cur_pool_size = 0; + params->pool_trace = 0; + params->shared_limits = NULL; + params->name = NULL; + + umf_result_t ret = umfDisjointPoolParamsSetName(params, DEFAULT_NAME); + if (ret != UMF_RESULT_SUCCESS) { + umf_ba_global_free(params); + return ret; + } + + *hParams = params; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsDestroy(umf_disjoint_pool_params_handle_t hParams) { + // NOTE: dereferencing hParams when BA is already destroyed leads to crash + if (hParams && !umf_ba_global_is_destroyed()) { + umf_ba_global_free(hParams->name); + umf_ba_global_free(hParams); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetSlabMinSize(umf_disjoint_pool_params_handle_t hParams, + size_t slabMinSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->slab_min_size = slabMinSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDisjointPoolParamsSetMaxPoolableSize( + umf_disjoint_pool_params_handle_t hParams, size_t maxPoolableSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->max_poolable_size = maxPoolableSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetCapacity(umf_disjoint_pool_params_handle_t hParams, + size_t maxCapacity) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->capacity = maxCapacity; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetMinBucketSize(umf_disjoint_pool_params_handle_t hParams, + size_t minBucketSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // minBucketSize parameter must be a power of 2 and greater than 0. + if (minBucketSize == 0 || (minBucketSize & (minBucketSize - 1))) { + LOG_ERR("minBucketSize must be a power of 2 and greater than 0"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->min_bucket_size = minBucketSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, + int poolTrace) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->pool_trace = poolTrace; + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDisjointPoolParamsSetSharedLimits( + umf_disjoint_pool_params_handle_t hParams, + umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->shared_limits = hSharedLimits; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, + const char *name) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + char *newName = umf_ba_global_alloc(sizeof(*newName) * (strlen(name) + 1)); + if (newName == NULL) { + LOG_ERR("cannot allocate memory for disjoint pool name"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + umf_ba_global_free(hParams->name); + hParams->name = newName; + strcpy(hParams->name, name); + + return UMF_RESULT_SUCCESS; +} diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp deleted file mode 100644 index 0390f5375..000000000 --- a/src/pool/pool_disjoint.cpp +++ /dev/null @@ -1,1313 +0,0 @@ -// Copyright (C) 2023-2025 Intel Corporation -// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// TODO: replace with logger? -#include - -#include "provider/provider_tracking.h" - -#include "../cpp_helpers.hpp" -#include "pool_disjoint.h" -#include "umf.h" -#include "utils_log.h" -#include "utils_math.h" -#include "utils_sanitizers.h" - -// Temporary solution for disabling memory poisoning. This is needed because -// AddressSanitizer does not support memory poisoning for GPU allocations. -// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634 -#ifndef POISON_MEMORY -#define POISON_MEMORY 0 -#endif - -static inline void annotate_memory_inaccessible([[maybe_unused]] void *ptr, - [[maybe_unused]] size_t size) { -#if (POISON_MEMORY != 0) - utils_annotate_memory_inaccessible(ptr, size); -#endif -} - -static inline void annotate_memory_undefined([[maybe_unused]] void *ptr, - [[maybe_unused]] size_t size) { -#if (POISON_MEMORY != 0) - utils_annotate_memory_undefined(ptr, size); -#endif -} - -typedef struct umf_disjoint_pool_shared_limits_t { - size_t MaxSize; - std::atomic TotalSize; -} umf_disjoint_pool_shared_limits_t; - -// Configuration of Disjoint Pool -typedef struct umf_disjoint_pool_params_t { - // Minimum allocation size that will be requested from the memory provider. - size_t SlabMinSize; - - // Allocations up to this limit will be subject to chunking/pooling - size_t MaxPoolableSize; - - // When pooling, each bucket will hold a max of 'Capacity' unfreed slabs - size_t Capacity; - - // Holds the minimum bucket size valid for allocation of a memory type. - // This value must be a power of 2. - size_t MinBucketSize; - - // Holds size of the pool managed by the allocator. - size_t CurPoolSize; - - // Whether to print pool usage statistics - int PoolTrace; - - // Memory limits that can be shared between multitple pool instances, - // i.e. if multiple pools use the same SharedLimits sum of those pools' - // sizes cannot exceed MaxSize. - umf_disjoint_pool_shared_limits_handle_t SharedLimits; - - // Name used in traces - char *Name; -} umf_disjoint_pool_params_t; - -class DisjointPool { - public: - class AllocImpl; - using Config = umf_disjoint_pool_params_t; - - umf_result_t initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_handle_t parameters); - void *malloc(size_t size); - void *calloc(size_t, size_t); - void *realloc(void *, size_t); - void *aligned_malloc(size_t size, size_t alignment); - size_t malloc_usable_size(void *); - umf_result_t free(void *ptr); - umf_result_t get_last_allocation_error(); - - DisjointPool(); - ~DisjointPool(); - - private: - std::unique_ptr impl; -}; - -umf_disjoint_pool_shared_limits_t * -umfDisjointPoolSharedLimitsCreate(size_t MaxSize) { - return new umf_disjoint_pool_shared_limits_t{MaxSize, 0}; -} - -void umfDisjointPoolSharedLimitsDestroy( - umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { - delete hSharedLimits; -} - -umf_result_t -umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams) { - static const char *DEFAULT_NAME = "disjoint_pool"; - - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - umf_disjoint_pool_params_handle_t params = new umf_disjoint_pool_params_t{}; - if (params == nullptr) { - LOG_ERR("cannot allocate memory for disjoint pool params"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - params->SlabMinSize = 0; - params->MaxPoolableSize = 0; - params->Capacity = 0; - params->MinBucketSize = UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE; - params->CurPoolSize = 0; - params->PoolTrace = 0; - params->SharedLimits = nullptr; - params->Name = nullptr; - - umf_result_t ret = umfDisjointPoolParamsSetName(params, DEFAULT_NAME); - if (ret != UMF_RESULT_SUCCESS) { - delete params; - return ret; - } - - *hParams = params; - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsDestroy(umf_disjoint_pool_params_handle_t hParams) { - if (hParams) { - delete[] hParams->Name; - delete hParams; - } - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetSlabMinSize(umf_disjoint_pool_params_handle_t hParams, - size_t slabMinSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->SlabMinSize = slabMinSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t umfDisjointPoolParamsSetMaxPoolableSize( - umf_disjoint_pool_params_handle_t hParams, size_t maxPoolableSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->MaxPoolableSize = maxPoolableSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetCapacity(umf_disjoint_pool_params_handle_t hParams, - size_t maxCapacity) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->Capacity = maxCapacity; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetMinBucketSize(umf_disjoint_pool_params_handle_t hParams, - size_t minBucketSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - // minBucketSize parameter must be a power of 2 and greater than 0. - if (minBucketSize == 0 || (minBucketSize & (minBucketSize - 1))) { - LOG_ERR("minBucketSize must be a power of 2 and greater than 0"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->MinBucketSize = minBucketSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, - int poolTrace) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->PoolTrace = poolTrace; - return UMF_RESULT_SUCCESS; -} - -umf_result_t umfDisjointPoolParamsSetSharedLimits( - umf_disjoint_pool_params_handle_t hParams, - umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->SharedLimits = hSharedLimits; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, - const char *name) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - char *newName = new char[std::strlen(name) + 1]; - if (newName == nullptr) { - LOG_ERR("cannot allocate memory for disjoint pool name"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - delete[] hParams->Name; - hParams->Name = newName; - std::strcpy(hParams->Name, name); - - return UMF_RESULT_SUCCESS; -} - -// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is -// requested. The implementation distinguishes between allocations of size -// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger. -// Allocation requests smaller than ChunkCutoff use chunks taken from a single -// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation -// size, and 8-byte allocations, only 1 in ~8000 requests results in a new -// coarse-grain allocation. Freeing results only in a chunk of a larger -// allocation to be marked as available and no real return to the system. An -// allocation is returned to the system only when all chunks in the larger -// allocation are freed by the program. Allocations larger than ChunkCutOff use -// a separate coarse-grain allocation for each request. These are subject to -// "pooling". That is, when such an allocation is freed by the program it is -// retained in a pool. The pool is available for future allocations, which means -// there are fewer actual coarse-grain allocations/deallocations. - -// The largest size which is allocated via the allocator. -// Allocations with size > CutOff bypass the pool and -// go directly to the provider. -static constexpr size_t CutOff = (size_t)1 << 31; // 2GB - -// Aligns the pointer down to the specified alignment -// (e.g. returns 8 for Size = 13, Alignment = 8) -static void *AlignPtrDown(void *Ptr, const size_t Alignment) { - return reinterpret_cast((reinterpret_cast(Ptr)) & - (~(Alignment - 1))); -} - -// Aligns the pointer up to the specified alignment -// (e.g. returns 16 for Size = 13, Alignment = 8) -static void *AlignPtrUp(void *Ptr, const size_t Alignment) { - void *AlignedPtr = AlignPtrDown(Ptr, Alignment); - // Special case when the pointer is already aligned - if (Ptr == AlignedPtr) { - return Ptr; - } - return static_cast(AlignedPtr) + Alignment; -} - -// Aligns the value up to the specified alignment -// (e.g. returns 16 for Size = 13, Alignment = 8) -static size_t AlignUp(size_t Val, size_t Alignment) { - assert(Alignment > 0); - return (Val + Alignment - 1) & (~(Alignment - 1)); -} - -typedef struct MemoryProviderError { - umf_result_t code; -} MemoryProviderError_t; - -class Bucket; - -// Represents the allocated memory block of size 'SlabMinSize' -// Internally, it splits the memory block into chunks. The number of -// chunks depends of the size of a Bucket which created the Slab. -// Note: Bucket's methods are responsible for thread safety of Slab access, -// so no locking happens here. -class Slab { - - // Pointer to the allocated memory of SlabMinSize bytes - void *MemPtr; - - // Represents the current state of each chunk: - // if the bit is set then the chunk is allocated - // the chunk is free for allocation otherwise - std::vector Chunks; - - // Total number of allocated chunks at the moment. - size_t NumAllocated = 0; - - // The bucket which the slab belongs to - Bucket &bucket; - - using ListIter = std::list>::iterator; - - // Store iterator to the corresponding node in avail/unavail list - // to achieve O(1) removal - ListIter SlabListIter; - - // Hints where to start search for free chunk in a slab - size_t FirstFreeChunkIdx = 0; - - // Return the index of the first available chunk, SIZE_MAX otherwise - size_t FindFirstAvailableChunkIdx() const; - - // Register/Unregister the slab in the global slab address map. - void regSlab(Slab &); - void unregSlab(Slab &); - static void regSlabByAddr(void *, Slab &); - static void unregSlabByAddr(void *, Slab &); - - public: - Slab(Bucket &); - ~Slab(); - - void setIterator(ListIter It) { SlabListIter = It; } - ListIter getIterator() const { return SlabListIter; } - - size_t getNumAllocated() const { return NumAllocated; } - - // Get pointer to allocation that is one piece of this slab. - void *getChunk(); - - // Get pointer to allocation that is this entire slab. - void *getSlab(); - - void *getPtr() const { return MemPtr; } - void *getEnd() const; - - size_t getChunkSize() const; - size_t getNumChunks() const { return Chunks.size(); } - - bool hasAvail(); - - Bucket &getBucket(); - const Bucket &getBucket() const; - - void freeChunk(void *Ptr); -}; - -class Bucket { - const size_t Size; - - // List of slabs which have at least 1 available chunk. - std::list> AvailableSlabs; - - // List of slabs with 0 available chunk. - std::list> UnavailableSlabs; - - // Protects the bucket and all the corresponding slabs - std::mutex BucketLock; - - // Reference to the allocator context, used access memory allocation - // routines, slab map and etc. - DisjointPool::AllocImpl &OwnAllocCtx; - - // For buckets used in chunked mode, a counter of slabs in the pool. - // For allocations that use an entire slab each, the entries in the Available - // list are entries in the pool.Each slab is available for a new - // allocation.The size of the Available list is the size of the pool. - // For allocations that use slabs in chunked mode, slabs will be in the - // Available list if any one or more of their chunks is free.The entire slab - // is not necessarily free, just some chunks in the slab are free. To - // implement pooling we will allow one slab in the Available list to be - // entirely empty. Normally such a slab would have been freed. But - // now we don't, and treat this slab as "in the pool". - // When a slab becomes entirely free we have to decide whether to return it - // to the provider or keep it allocated. A simple check for size of the - // Available list is not sufficient to check whether any slab has been - // pooled yet. We would have to traverse the entire Available list and check - // if any of them is entirely free. Instead we keep a counter of entirely - // empty slabs within the Available list to speed up the process of checking - // if a slab in this bucket is already pooled. - size_t chunkedSlabsInPool; - - // Statistics - size_t allocPoolCount; - size_t freeCount; - size_t currSlabsInUse; - size_t currSlabsInPool; - size_t maxSlabsInPool; - - public: - // Statistics - size_t allocCount; - size_t maxSlabsInUse; - - Bucket(size_t Sz, DisjointPool::AllocImpl &AllocCtx) - : Size{Sz}, OwnAllocCtx{AllocCtx}, chunkedSlabsInPool(0), - allocPoolCount(0), freeCount(0), currSlabsInUse(0), - currSlabsInPool(0), maxSlabsInPool(0), allocCount(0), - maxSlabsInUse(0) {} - - // Get pointer to allocation that is one piece of an available slab in this - // bucket. - void *getChunk(bool &FromPool); - - // Get pointer to allocation that is a full slab in this bucket. - void *getSlab(bool &FromPool); - - // Return the allocation size of this bucket. - size_t getSize() const { return Size; } - - // Free an allocation that is one piece of a slab in this bucket. - void freeChunk(void *Ptr, Slab &Slab, bool &ToPool); - - // Free an allocation that is a full slab in this bucket. - void freeSlab(Slab &Slab, bool &ToPool); - - umf_memory_provider_handle_t getMemHandle(); - - DisjointPool::AllocImpl &getAllocCtx() { return OwnAllocCtx; } - - // Check whether an allocation to be freed can be placed in the pool. - bool CanPool(bool &ToPool); - - // The minimum allocation size for any slab. - size_t SlabMinSize(); - - // The allocation size for a slab in this bucket. - size_t SlabAllocSize(); - - // The minimum size of a chunk from this bucket's slabs. - size_t ChunkCutOff(); - - // The number of slabs in this bucket that can be in the pool. - size_t Capacity(); - - // The maximum allocation size subject to pooling. - size_t MaxPoolableSize(); - - // Update allocation count - void countAlloc(bool FromPool); - - // Update free count - void countFree(); - - // Update statistics of Available/Unavailable - void updateStats(int InUse, int InPool); - - // Print bucket statistics - void printStats(bool &TitlePrinted, const std::string &Label); - - private: - void onFreeChunk(Slab &, bool &ToPool); - - // Update statistics of pool usage, and indicate that an allocation was made - // from the pool. - void decrementPool(bool &FromPool); - - // Get a slab to be used for chunked allocations. - decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromPool); - - // Get a slab that will be used as a whole for a single allocation. - decltype(AvailableSlabs.begin()) getAvailFullSlab(bool &FromPool); -}; - -class DisjointPool::AllocImpl { - // It's important for the map to be destroyed last after buckets and their - // slabs This is because slab's destructor removes the object from the map. - std::unordered_multimap KnownSlabs; - std::shared_timed_mutex KnownSlabsMapLock; - - // Handle to the memory provider - umf_memory_provider_handle_t MemHandle; - - // Store as unique_ptrs since Bucket is not Movable(because of std::mutex) - std::vector> Buckets; - - // Configuration for this instance - umf_disjoint_pool_params_t params; - - umf_disjoint_pool_shared_limits_t DefaultSharedLimits = { - (std::numeric_limits::max)(), 0}; - - // Used in algorithm for finding buckets - std::size_t MinBucketSizeExp; - - // Coarse-grain allocation min alignment - size_t ProviderMinPageSize; - - public: - AllocImpl(umf_memory_provider_handle_t hProvider, - umf_disjoint_pool_params_handle_t params) - : MemHandle{hProvider}, params(*params) { - - VALGRIND_DO_CREATE_MEMPOOL(this, 0, 0); - - // deep copy of the Name - this->params.Name = new char[std::strlen(params->Name) + 1]; - std::strcpy(this->params.Name, params->Name); - - // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. - // Powers of 2 and the value halfway between the powers of 2. - auto Size1 = this->params.MinBucketSize; - // MinBucketSize cannot be larger than CutOff. - Size1 = std::min(Size1, CutOff); - // Buckets sized smaller than the bucket default size- 8 aren't needed. - Size1 = std::max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE); - // Calculate the exponent for MinBucketSize used for finding buckets. - MinBucketSizeExp = (size_t)log2Utils(Size1); - auto Size2 = Size1 + Size1 / 2; - for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { - Buckets.push_back(std::make_unique(Size1, *this)); - Buckets.push_back(std::make_unique(Size2, *this)); - } - Buckets.push_back(std::make_unique(CutOff, *this)); - - auto ret = umfMemoryProviderGetMinPageSize(hProvider, nullptr, - &ProviderMinPageSize); - if (ret != UMF_RESULT_SUCCESS) { - ProviderMinPageSize = 0; - } - } - - ~AllocImpl() { - VALGRIND_DO_DESTROY_MEMPOOL(this); - delete[] this->params.Name; - } - - void *allocate(size_t Size, size_t Alignment, bool &FromPool); - void *allocate(size_t Size, bool &FromPool); - void deallocate(void *Ptr, bool &ToPool); - - umf_memory_provider_handle_t getMemHandle() { return MemHandle; } - - std::shared_timed_mutex &getKnownSlabsMapLock() { - return KnownSlabsMapLock; - } - std::unordered_multimap &getKnownSlabs() { - return KnownSlabs; - } - - size_t SlabMinSize() { return params.SlabMinSize; }; - - umf_disjoint_pool_params_t &getParams() { return params; } - - umf_disjoint_pool_shared_limits_t *getLimits() { - if (params.SharedLimits) { - return params.SharedLimits; - } else { - return &DefaultSharedLimits; - } - }; - - void printStats(bool &TitlePrinted, size_t &HighBucketSize, - size_t &HighPeakSlabsInUse, const std::string &Label); - - private: - Bucket &findBucket(size_t Size); - std::size_t sizeToIdx(size_t Size); -}; - -static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider, - size_t size, size_t alignment = 0) { - void *ptr; - auto ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr); - if (ret != UMF_RESULT_SUCCESS) { - throw MemoryProviderError{ret}; - } - annotate_memory_inaccessible(ptr, size); - return ptr; -} - -static void memoryProviderFree(umf_memory_provider_handle_t hProvider, - void *ptr) { - size_t size = 0; - - if (ptr) { - umf_alloc_info_t allocInfo = {NULL, 0, NULL}; - umf_result_t umf_result = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); - if (umf_result == UMF_RESULT_SUCCESS) { - size = allocInfo.baseSize; - } - } - - auto ret = umfMemoryProviderFree(hProvider, ptr, size); - if (ret != UMF_RESULT_SUCCESS) { - throw MemoryProviderError{ret}; - } -} - -bool operator==(const Slab &Lhs, const Slab &Rhs) { - return Lhs.getPtr() == Rhs.getPtr(); -} - -std::ostream &operator<<(std::ostream &Os, const Slab &Slab) { - Os << "Slab<" << Slab.getPtr() << ", " << Slab.getEnd() << ", " - << Slab.getBucket().getSize() << ">"; - return Os; -} - -Slab::Slab(Bucket &Bkt) - : // In case bucket size is not a multiple of SlabMinSize, we would have - // some padding at the end of the slab. - Chunks(Bkt.SlabMinSize() / Bkt.getSize()), NumAllocated{0}, - bucket(Bkt), SlabListIter{}, FirstFreeChunkIdx{0} { - auto SlabSize = Bkt.SlabAllocSize(); - MemPtr = memoryProviderAlloc(Bkt.getMemHandle(), SlabSize); - regSlab(*this); -} - -Slab::~Slab() { - try { - unregSlab(*this); - } catch (std::exception &e) { - LOG_ERR("DisjointPool: unexpected error: %s", e.what()); - } - - try { - memoryProviderFree(bucket.getMemHandle(), MemPtr); - } catch (MemoryProviderError &e) { - LOG_ERR("DisjointPool: error from memory provider: %d", e.code); - - if (e.code == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) { - const char *message = ""; - int error = 0; - - try { - umfMemoryProviderGetLastNativeError( - umfGetLastFailedMemoryProvider(), &message, &error); - LOG_ERR("Native error msg: %s, native error code: %d", message, - error); - } catch (...) { - // ignore any additional errors from logger - } - } - } -} - -// Return the index of the first available chunk, SIZE_MAX otherwise -size_t Slab::FindFirstAvailableChunkIdx() const { - // Use the first free chunk index as a hint for the search. - auto It = std::find_if(Chunks.begin() + FirstFreeChunkIdx, Chunks.end(), - [](auto x) { return !x; }); - if (It != Chunks.end()) { - return It - Chunks.begin(); - } - - return std::numeric_limits::max(); -} - -void *Slab::getChunk() { - // assert(NumAllocated != Chunks.size()); - - const size_t ChunkIdx = FindFirstAvailableChunkIdx(); - // Free chunk must exist, otherwise we would have allocated another slab - assert(ChunkIdx != (std::numeric_limits::max())); - - void *const FreeChunk = - (static_cast(getPtr())) + ChunkIdx * getChunkSize(); - Chunks[ChunkIdx] = true; - NumAllocated += 1; - - // Use the found index as the next hint - FirstFreeChunkIdx = ChunkIdx; - - return FreeChunk; -} - -void *Slab::getSlab() { return getPtr(); } - -Bucket &Slab::getBucket() { return bucket; } -const Bucket &Slab::getBucket() const { return bucket; } - -size_t Slab::getChunkSize() const { return bucket.getSize(); } - -void Slab::regSlabByAddr(void *Addr, Slab &Slab) { - auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock(); - auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs(); - - std::lock_guard Lg(Lock); - Map.insert({Addr, Slab}); -} - -void Slab::unregSlabByAddr(void *Addr, Slab &Slab) { - auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock(); - auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs(); - - std::lock_guard Lg(Lock); - - auto Slabs = Map.equal_range(Addr); - // At least the must get the current slab from the map. - assert(Slabs.first != Slabs.second && "Slab is not found"); - - for (auto It = Slabs.first; It != Slabs.second; ++It) { - if (It->second == Slab) { - Map.erase(It); - return; - } - } - - assert(false && "Slab is not found"); -} - -void Slab::regSlab(Slab &Slab) { - void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize()); - void *EndAddr = static_cast(StartAddr) + bucket.SlabMinSize(); - - regSlabByAddr(StartAddr, Slab); - regSlabByAddr(EndAddr, Slab); -} - -void Slab::unregSlab(Slab &Slab) { - void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize()); - void *EndAddr = static_cast(StartAddr) + bucket.SlabMinSize(); - - unregSlabByAddr(StartAddr, Slab); - unregSlabByAddr(EndAddr, Slab); -} - -void Slab::freeChunk(void *Ptr) { - // This method should be called through bucket(since we might remove the slab - // as a result), therefore all locks are done on that level. - - // Make sure that we're in the right slab - assert(Ptr >= getPtr() && Ptr < getEnd()); - - // Even if the pointer p was previously aligned, it's still inside the - // corresponding chunk, so we get the correct index here. - auto ChunkIdx = (static_cast(Ptr) - static_cast(MemPtr)) / - getChunkSize(); - - // Make sure that the chunk was allocated - assert(Chunks[ChunkIdx] && "double free detected"); - - Chunks[ChunkIdx] = false; - NumAllocated -= 1; - - if (ChunkIdx < FirstFreeChunkIdx) { - FirstFreeChunkIdx = ChunkIdx; - } -} - -void *Slab::getEnd() const { - return static_cast(getPtr()) + bucket.SlabMinSize(); -} - -bool Slab::hasAvail() { return NumAllocated != getNumChunks(); } - -// If a slab was available in the pool then note that the current pooled -// size has reduced by the size of a slab in this bucket. -void Bucket::decrementPool(bool &FromPool) { - FromPool = true; - updateStats(1, -1); - OwnAllocCtx.getLimits()->TotalSize -= SlabAllocSize(); -} - -auto Bucket::getAvailFullSlab(bool &FromPool) - -> decltype(AvailableSlabs.begin()) { - // Return a slab that will be used for a single allocation. - if (AvailableSlabs.size() == 0) { - auto It = AvailableSlabs.insert(AvailableSlabs.begin(), - std::make_unique(*this)); - (*It)->setIterator(It); - FromPool = false; - updateStats(1, 0); - } else { - decrementPool(FromPool); - } - - return AvailableSlabs.begin(); -} - -void *Bucket::getSlab(bool &FromPool) { - std::lock_guard Lg(BucketLock); - - auto SlabIt = getAvailFullSlab(FromPool); - auto *FreeSlab = (*SlabIt)->getSlab(); - auto It = - UnavailableSlabs.insert(UnavailableSlabs.begin(), std::move(*SlabIt)); - AvailableSlabs.erase(SlabIt); - (*It)->setIterator(It); - return FreeSlab; -} - -void Bucket::freeSlab(Slab &Slab, bool &ToPool) { - std::lock_guard Lg(BucketLock); - auto SlabIter = Slab.getIterator(); - assert(SlabIter != UnavailableSlabs.end()); - if (CanPool(ToPool)) { - auto It = - AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); - UnavailableSlabs.erase(SlabIter); - (*It)->setIterator(It); - } else { - UnavailableSlabs.erase(SlabIter); - } -} - -auto Bucket::getAvailSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) { - - if (AvailableSlabs.size() == 0) { - auto It = AvailableSlabs.insert(AvailableSlabs.begin(), - std::make_unique(*this)); - (*It)->setIterator(It); - - updateStats(1, 0); - FromPool = false; - } else { - if ((*(AvailableSlabs.begin()))->getNumAllocated() == 0) { - // If this was an empty slab, it was in the pool. - // Now it is no longer in the pool, so update count. - --chunkedSlabsInPool; - decrementPool(FromPool); - } else { - // Allocation from existing slab is treated as from pool for statistics. - FromPool = true; - } - } - - return AvailableSlabs.begin(); -} - -void *Bucket::getChunk(bool &FromPool) { - std::lock_guard Lg(BucketLock); - - auto SlabIt = getAvailSlab(FromPool); - auto *FreeChunk = (*SlabIt)->getChunk(); - - // If the slab is full, move it to unavailable slabs and update its iterator - if (!((*SlabIt)->hasAvail())) { - auto It = UnavailableSlabs.insert(UnavailableSlabs.begin(), - std::move(*SlabIt)); - AvailableSlabs.erase(SlabIt); - (*It)->setIterator(It); - } - - return FreeChunk; -} - -void Bucket::freeChunk(void *Ptr, Slab &Slab, bool &ToPool) { - std::lock_guard Lg(BucketLock); - - Slab.freeChunk(Ptr); - - onFreeChunk(Slab, ToPool); -} - -// The lock must be acquired before calling this method -void Bucket::onFreeChunk(Slab &Slab, bool &ToPool) { - ToPool = true; - - // In case if the slab was previously full and now has 1 available - // chunk, it should be moved to the list of available slabs - if (Slab.getNumAllocated() == (Slab.getNumChunks() - 1)) { - auto SlabIter = Slab.getIterator(); - assert(SlabIter != UnavailableSlabs.end()); - - auto It = - AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); - UnavailableSlabs.erase(SlabIter); - - (*It)->setIterator(It); - } - - // Check if slab is empty, and pool it if we can. - if (Slab.getNumAllocated() == 0) { - // The slab is now empty. - // If pool has capacity then put the slab in the pool. - // The ToPool parameter indicates whether the Slab will be put in the - // pool or freed. - if (!CanPool(ToPool)) { - // Note: since the slab is stored as unique_ptr, just remove it from - // the list to destroy the object. - auto It = Slab.getIterator(); - assert(It != AvailableSlabs.end()); - AvailableSlabs.erase(It); - } - } -} - -bool Bucket::CanPool(bool &ToPool) { - size_t NewFreeSlabsInBucket; - // Check if this bucket is used in chunked form or as full slabs. - bool chunkedBucket = getSize() <= ChunkCutOff(); - if (chunkedBucket) { - NewFreeSlabsInBucket = chunkedSlabsInPool + 1; - } else { - NewFreeSlabsInBucket = AvailableSlabs.size() + 1; - } - if (Capacity() >= NewFreeSlabsInBucket) { - size_t PoolSize = OwnAllocCtx.getLimits()->TotalSize; - while (true) { - size_t NewPoolSize = PoolSize + SlabAllocSize(); - - if (OwnAllocCtx.getLimits()->MaxSize < NewPoolSize) { - break; - } - - if (OwnAllocCtx.getLimits()->TotalSize.compare_exchange_strong( - PoolSize, NewPoolSize)) { - if (chunkedBucket) { - ++chunkedSlabsInPool; - } - - updateStats(-1, 1); - ToPool = true; - return true; - } - } - } - - updateStats(-1, 0); - ToPool = false; - return false; -} - -umf_memory_provider_handle_t Bucket::getMemHandle() { - return OwnAllocCtx.getMemHandle(); -} - -size_t Bucket::SlabMinSize() { return OwnAllocCtx.getParams().SlabMinSize; } - -size_t Bucket::SlabAllocSize() { return std::max(getSize(), SlabMinSize()); } - -size_t Bucket::Capacity() { - // For buckets used in chunked mode, just one slab in pool is sufficient. - // For larger buckets, the capacity could be more and is adjustable. - if (getSize() <= ChunkCutOff()) { - return 1; - } else { - return OwnAllocCtx.getParams().Capacity; - } -} - -size_t Bucket::MaxPoolableSize() { - return OwnAllocCtx.getParams().MaxPoolableSize; -} - -size_t Bucket::ChunkCutOff() { return SlabMinSize() / 2; } - -void Bucket::countAlloc(bool FromPool) { - ++allocCount; - if (FromPool) { - ++allocPoolCount; - } -} - -void Bucket::countFree() { ++freeCount; } - -void Bucket::updateStats(int InUse, int InPool) { - if (OwnAllocCtx.getParams().PoolTrace == 0) { - return; - } - currSlabsInUse += InUse; - maxSlabsInUse = std::max(currSlabsInUse, maxSlabsInUse); - currSlabsInPool += InPool; - maxSlabsInPool = std::max(currSlabsInPool, maxSlabsInPool); - // Increment or decrement current pool sizes based on whether - // slab was added to or removed from pool. - OwnAllocCtx.getParams().CurPoolSize += InPool * SlabAllocSize(); -} - -void Bucket::printStats(bool &TitlePrinted, const std::string &Label) { - if (allocCount) { - if (!TitlePrinted) { - std::cout << Label << " memory statistics\n"; - std::cout << std::setw(14) << "Bucket Size" << std::setw(12) - << "Allocs" << std::setw(12) << "Frees" << std::setw(18) - << "Allocs from Pool" << std::setw(20) - << "Peak Slabs in Use" << std::setw(21) - << "Peak Slabs in Pool" << std::endl; - TitlePrinted = true; - } - std::cout << std::setw(14) << getSize() << std::setw(12) << allocCount - << std::setw(12) << freeCount << std::setw(18) - << allocPoolCount << std::setw(20) << maxSlabsInUse - << std::setw(21) << maxSlabsInPool << std::endl; - } -} - -void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) try { - void *Ptr; - - if (Size == 0) { - return nullptr; - } - - FromPool = false; - if (Size > getParams().MaxPoolableSize) { - Ptr = memoryProviderAlloc(getMemHandle(), Size); - annotate_memory_undefined(Ptr, Size); - return Ptr; - } - - auto &Bucket = findBucket(Size); - - if (Size > Bucket.ChunkCutOff()) { - Ptr = Bucket.getSlab(FromPool); - } else { - Ptr = Bucket.getChunk(FromPool); - } - - if (getParams().PoolTrace > 1) { - Bucket.countAlloc(FromPool); - } - - VALGRIND_DO_MEMPOOL_ALLOC(this, Ptr, Size); - annotate_memory_undefined(Ptr, Bucket.getSize()); - - return Ptr; -} catch (MemoryProviderError &e) { - umf::getPoolLastStatusRef() = e.code; - return nullptr; -} - -void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment, - bool &FromPool) try { - void *Ptr; - - if (Size == 0) { - return nullptr; - } - - if (Alignment <= 1) { - return allocate(Size, FromPool); - } - - size_t AlignedSize; - if (Alignment <= ProviderMinPageSize) { - // This allocation will be served from a Bucket which size is multiple - // of Alignment and Slab address is aligned to ProviderMinPageSize - // so the address will be properly aligned. - AlignedSize = (Size > 1) ? AlignUp(Size, Alignment) : Alignment; - } else { - // Slabs are only aligned to ProviderMinPageSize, we need to compensate - // for that in case the allocation is within pooling limit. - // TODO: consider creating properly-aligned Slabs on demand - AlignedSize = Size + Alignment - 1; - } - - // Check if requested allocation size is within pooling limit. - // If not, just request aligned pointer from the system. - FromPool = false; - if (AlignedSize > getParams().MaxPoolableSize) { - Ptr = memoryProviderAlloc(getMemHandle(), Size, Alignment); - annotate_memory_undefined(Ptr, Size); - return Ptr; - } - - auto &Bucket = findBucket(AlignedSize); - - if (AlignedSize > Bucket.ChunkCutOff()) { - Ptr = Bucket.getSlab(FromPool); - } else { - Ptr = Bucket.getChunk(FromPool); - } - - if (getParams().PoolTrace > 1) { - Bucket.countAlloc(FromPool); - } - - VALGRIND_DO_MEMPOOL_ALLOC(this, AlignPtrUp(Ptr, Alignment), Size); - annotate_memory_undefined(AlignPtrUp(Ptr, Alignment), Size); - return AlignPtrUp(Ptr, Alignment); -} catch (MemoryProviderError &e) { - umf::getPoolLastStatusRef() = e.code; - return nullptr; -} - -std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) { - assert(Size <= CutOff && "Unexpected size"); - assert(Size > 0 && "Unexpected size"); - - size_t MinBucketSize = (size_t)1 << MinBucketSizeExp; - if (Size < MinBucketSize) { - return 0; - } - - // Get the position of the leftmost set bit. - size_t position = getLeftmostSetBitPos(Size); - - auto isPowerOf2 = 0 == (Size & (Size - 1)); - auto largerThanHalfwayBetweenPowersOf2 = - !isPowerOf2 && bool((Size - 1) & (uint64_t(1) << (position - 1))); - auto index = (position - MinBucketSizeExp) * 2 + (int)(!isPowerOf2) + - (int)largerThanHalfwayBetweenPowersOf2; - - return index; -} - -Bucket &DisjointPool::AllocImpl::findBucket(size_t Size) { - auto calculatedIdx = sizeToIdx(Size); - assert((*(Buckets[calculatedIdx])).getSize() >= Size); - if (calculatedIdx > 0) { - assert((*(Buckets[calculatedIdx - 1])).getSize() < Size); - } - - return *(Buckets[calculatedIdx]); -} - -void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) { - auto *SlabPtr = AlignPtrDown(Ptr, SlabMinSize()); - - // Lock the map on read - std::shared_lock Lk(getKnownSlabsMapLock()); - - ToPool = false; - auto Slabs = getKnownSlabs().equal_range(SlabPtr); - if (Slabs.first == Slabs.second) { - Lk.unlock(); - memoryProviderFree(getMemHandle(), Ptr); - return; - } - - for (auto It = Slabs.first; It != Slabs.second; ++It) { - // The slab object won't be deleted until it's removed from the map which is - // protected by the lock, so it's safe to access it here. - auto &Slab = It->second; - if (Ptr >= Slab.getPtr() && Ptr < Slab.getEnd()) { - // Unlock the map before freeing the chunk, it may be locked on write - // there - Lk.unlock(); - auto &Bucket = Slab.getBucket(); - - if (getParams().PoolTrace > 1) { - Bucket.countFree(); - } - - VALGRIND_DO_MEMPOOL_FREE(this, Ptr); - annotate_memory_inaccessible(Ptr, Bucket.getSize()); - if (Bucket.getSize() <= Bucket.ChunkCutOff()) { - Bucket.freeChunk(Ptr, Slab, ToPool); - } else { - Bucket.freeSlab(Slab, ToPool); - } - - return; - } - } - - Lk.unlock(); - // There is a rare case when we have a pointer from system allocation next - // to some slab with an entry in the map. So we find a slab - // but the range checks fail. - memoryProviderFree(getMemHandle(), Ptr); -} - -void DisjointPool::AllocImpl::printStats(bool &TitlePrinted, - size_t &HighBucketSize, - size_t &HighPeakSlabsInUse, - const std::string &MTName) { - HighBucketSize = 0; - HighPeakSlabsInUse = 0; - for (auto &B : Buckets) { - (*B).printStats(TitlePrinted, MTName); - HighPeakSlabsInUse = std::max((*B).maxSlabsInUse, HighPeakSlabsInUse); - if ((*B).allocCount) { - HighBucketSize = std::max((*B).SlabAllocSize(), HighBucketSize); - } - } -} - -umf_result_t -DisjointPool::initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_handle_t parameters) { - if (!provider) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - // MinBucketSize parameter must be a power of 2 for bucket sizes - // to generate correctly. - if (!parameters->MinBucketSize || - !((parameters->MinBucketSize & (parameters->MinBucketSize - 1)) == 0)) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - impl = std::make_unique(provider, parameters); - return UMF_RESULT_SUCCESS; -} - -void *DisjointPool::malloc(size_t size) { // For full-slab allocations indicates - // whether slab is from Pool. - bool FromPool; - auto Ptr = impl->allocate(size, FromPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Allocated " << std::setw(8) << size << " " << MT - << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->" - << Ptr << std::endl; - } - return Ptr; -} - -void *DisjointPool::calloc(size_t, size_t) { - // Not supported - umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; - return NULL; -} - -void *DisjointPool::realloc(void *, size_t) { - // Not supported - umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; - return NULL; -} - -void *DisjointPool::aligned_malloc(size_t size, size_t alignment) { - bool FromPool; - auto Ptr = impl->allocate(size, alignment, FromPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Allocated " << std::setw(8) << size << " " << MT - << " bytes aligned at " << alignment << " from " - << (FromPool ? "Pool" : "Provider") << " ->" << Ptr - << std::endl; - } - return Ptr; -} - -size_t DisjointPool::malloc_usable_size(void *) { - // Not supported - return 0; -} - -umf_result_t DisjointPool::free(void *ptr) try { - bool ToPool; - impl->deallocate(ptr, ToPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Freed " << MT << " " << ptr << " to " - << (ToPool ? "Pool" : "Provider") - << ", Current total pool size " - << impl->getLimits()->TotalSize.load() - << ", Current pool size for " << MT << " " - << impl->getParams().CurPoolSize << "\n"; - } - return UMF_RESULT_SUCCESS; -} catch (MemoryProviderError &e) { - return e.code; -} - -umf_result_t DisjointPool::get_last_allocation_error() { - return umf::getPoolLastStatusRef(); -} - -DisjointPool::DisjointPool() {} - -// Define destructor for use with unique_ptr -DisjointPool::~DisjointPool() { - bool TitlePrinted = false; - size_t HighBucketSize; - size_t HighPeakSlabsInUse; - if (impl->getParams().PoolTrace > 1) { - auto name = impl->getParams().Name; - try { // cannot throw in destructor - impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse, - name); - if (TitlePrinted) { - std::cout << "Current Pool Size " - << impl->getLimits()->TotalSize.load() << std::endl; - std::cout << "Suggested Setting=;" - << std::string(1, (char)tolower(name[0])) - << std::string(name + 1) << ":" << HighBucketSize - << "," << HighPeakSlabsInUse << ",64K" << std::endl; - } - } catch (...) { // ignore exceptions - } - } -} - -static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = - umf::poolMakeCOps(); - -umf_memory_pool_ops_t *umfDisjointPoolOps(void) { - return &UMF_DISJOINT_POOL_OPS; -} diff --git a/src/pool/pool_disjoint_internal.h b/src/pool/pool_disjoint_internal.h new file mode 100644 index 000000000..3d656689c --- /dev/null +++ b/src/pool/pool_disjoint_internal.h @@ -0,0 +1,176 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_POOL_DISJOINT_INTERNAL_H +#define UMF_POOL_DISJOINT_INTERNAL_H 1 + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "critnib/critnib.h" +#include "uthash/utlist.h" + +#include "base_alloc_global.h" +#include "provider/provider_tracking.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" +#include "utils_math.h" + +typedef struct bucket_t bucket_t; +typedef struct slab_t slab_t; +typedef struct slab_list_item_t slab_list_item_t; +typedef struct disjoint_pool_t disjoint_pool_t; + +typedef struct bucket_t { + size_t size; + + // Linked list of slabs which have at least 1 available chunk. + // We always count available slabs as an optimization. + slab_list_item_t *available_slabs; + size_t available_slabs_num; + + // Linked list of slabs with 0 available chunks + slab_list_item_t *unavailable_slabs; + + // Protects the bucket and all the corresponding slabs + utils_mutex_t bucket_lock; + + // Reference to the allocator context, used to access memory allocation + // routines, slab map and etc. + disjoint_pool_t *pool; + + umf_disjoint_pool_shared_limits_handle_t shared_limits; + + // For buckets used in chunked mode, a counter of slabs in the pool. + // For allocations that use an entire slab each, the entries in the + // "available" list are entries in the pool. Each slab is available for a + // new allocation. The size of the available list is the size of the pool. + // + // For allocations that use slabs in chunked mode, slabs will be in the + // "available" list if any one or more of their chunks are free. The entire + // slab is not necessarily free, just some chunks in the slab are free. To + // implement pooling, we will allow one slab in the "available" list to be + // entirely empty, and treat this slab as "in the pool". + // When a slab becomes entirely free, we must decide whether to return it + // to the provider or keep it allocated. We keep a counter of entirely + // empty slabs within the "available" list to speed up the process of + // checking if a slab in this bucket is already pooled. + size_t chunked_slabs_in_pool; + + // Statistics + size_t alloc_count; + size_t alloc_pool_count; + size_t free_count; + size_t curr_slabs_in_use; + size_t curr_slabs_in_pool; + size_t max_slabs_in_pool; + size_t max_slabs_in_use; +} bucket_t; + +typedef struct slab_list_item_t { + slab_t *val; + struct slab_list_item_t *prev, *next; +} slab_list_item_t; + +// Represents the allocated memory block of size 'slab_min_size' +// Internally, it splits the memory block into chunks. The number of +// chunks depends on the size of a Bucket which created the Slab. +// Note: Bucket's methods are responsible for thread safety of Slab access, +// so no locking happens here. +typedef struct slab_t { + // Pointer to the allocated memory of slab_min_size bytes + void *mem_ptr; + size_t slab_size; + + // Represents the current state of each chunk: if the bit is set, the + // chunk is allocated; otherwise, the chunk is free for allocation + bool *chunks; + size_t num_chunks_total; + + // Total number of allocated chunks at the moment. + size_t num_chunks_allocated; + + // The bucket which the slab belongs to + bucket_t *bucket; + + // Hints where to start search for free chunk in a slab + size_t first_free_chunk_idx; + + // Store iterator to the corresponding node in avail/unavail list + // to achieve O(1) removal + slab_list_item_t iter; +} slab_t; + +typedef struct umf_disjoint_pool_shared_limits_t { + size_t max_size; + size_t total_size; // requires atomic access +} umf_disjoint_pool_shared_limits_t; + +typedef struct umf_disjoint_pool_params_t { + // Minimum allocation size that will be requested from the memory provider. + size_t slab_min_size; + + // Allocations up to this limit will be subject to chunking/pooling + size_t max_poolable_size; + + // When pooling, each bucket will hold a max of 'capacity' unfreed slabs + size_t capacity; + + // Holds the minimum bucket size valid for allocation of a memory type. + // This value must be a power of 2. + size_t min_bucket_size; + + // Holds size of the pool managed by the allocator. + size_t cur_pool_size; + + // Whether to print pool usage statistics + int pool_trace; + + // Memory limits that can be shared between multiple pool instances, + // i.e. if multiple pools use the same shared_limits sum of those pools' + // sizes cannot exceed max_size. + umf_disjoint_pool_shared_limits_handle_t shared_limits; + + // Name used in traces + char *name; +} umf_disjoint_pool_params_t; + +typedef struct disjoint_pool_t { + // Keep the list of known slabs to quickly find required one during the + // free() + critnib *known_slabs; // (void *, slab_t *) + + // Handle to the memory provider + umf_memory_provider_handle_t provider; + + // Array of bucket_t* + bucket_t **buckets; + size_t buckets_num; + + // Configuration for this instance + umf_disjoint_pool_params_t params; + + umf_disjoint_pool_shared_limits_handle_t default_shared_limits; + + // Used in algorithm for finding buckets + size_t min_bucket_size_exp; + + // Coarse-grain allocation min alignment + size_t provider_min_page_size; +} disjoint_pool_t; + +#endif // UMF_POOL_DISJOINT_INTERNAL_H diff --git a/src/utils/utils_common.c b/src/utils/utils_common.c index eaf5420fc..225c02d2c 100644 --- a/src/utils/utils_common.c +++ b/src/utils/utils_common.c @@ -128,3 +128,6 @@ umf_result_t utils_translate_flags(unsigned in_flags, unsigned max, *out_flags = out_f; return UMF_RESULT_SUCCESS; } + +size_t utils_max(size_t a, size_t b) { return a > b ? a : b; } +size_t utils_min(size_t a, size_t b) { return a < b ? a : b; } diff --git a/src/utils/utils_common.h b/src/utils/utils_common.h index 6af5a08d9..7824e74af 100644 --- a/src/utils/utils_common.h +++ b/src/utils/utils_common.h @@ -38,6 +38,8 @@ typedef enum umf_purge_advise_t { expression; \ } while (0) +#define IS_POWER_OF_2(value) ((value) != 0 && ((value) & ((value)-1)) == 0) + #define IS_ALIGNED(value, align) \ ((align == 0 || (((value) & ((align)-1)) == 0))) #define IS_NOT_ALIGNED(value, align) \ @@ -176,6 +178,10 @@ int utils_fallocate(int fd, long offset, long len); long utils_get_size_threshold(char *str_threshold); +size_t utils_max(size_t a, size_t b); + +size_t utils_min(size_t a, size_t b); + #ifdef __cplusplus } #endif diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index 155184cc4..910c859b0 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -11,6 +11,7 @@ #define UMF_UTILS_CONCURRENCY_H 1 #include +#include #ifdef _WIN32 #include @@ -45,11 +46,27 @@ typedef struct utils_mutex_t { } utils_mutex_t; size_t utils_mutex_get_size(void); -utils_mutex_t *utils_mutex_init(void *ptr); +utils_mutex_t *utils_mutex_init(utils_mutex_t *ptr); void utils_mutex_destroy_not_free(utils_mutex_t *m); int utils_mutex_lock(utils_mutex_t *mutex); int utils_mutex_unlock(utils_mutex_t *mutex); +typedef struct utils_rwlock_t { +#ifdef _WIN32 + // Slim Read/Wrtiter lock + SRWLOCK lock; +#else + pthread_rwlock_t rwlock; +#endif +} utils_rwlock_t; + +utils_rwlock_t *utils_rwlock_init(utils_rwlock_t *ptr); +void utils_rwlock_destroy_not_free(utils_rwlock_t *rwlock); +int utils_read_lock(utils_rwlock_t *rwlock); +int utils_write_lock(utils_rwlock_t *rwlock); +int utils_read_unlock(utils_rwlock_t *rwlock); +int utils_write_unlock(utils_rwlock_t *rwlock); + #if defined(_WIN32) #define UTIL_ONCE_FLAG INIT_ONCE #define UTIL_ONCE_FLAG_INIT INIT_ONCE_STATIC_INIT @@ -61,11 +78,13 @@ int utils_mutex_unlock(utils_mutex_t *mutex); void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)); #if defined(_WIN32) + static __inline unsigned char utils_lssb_index(long long value) { unsigned long ret; _BitScanForward64(&ret, value); return (unsigned char)ret; } + static __inline unsigned char utils_mssb_index(long long value) { unsigned long ret; _BitScanReverse64(&ret, value); @@ -81,15 +100,25 @@ static __inline unsigned char utils_mssb_index(long long value) { #define utils_atomic_store_release(object, desired) \ InterlockedExchange64((LONG64 volatile *)object, (LONG64)desired) + #define utils_atomic_increment(object) \ InterlockedIncrement64((LONG64 volatile *)object) + #define utils_atomic_decrement(object) \ InterlockedDecrement64((LONG64 volatile *)object) + #define utils_fetch_and_add64(ptr, value) \ InterlockedExchangeAdd64((LONG64 *)(ptr), value) -#else + +// NOTE: windows version have different order of args +#define utils_compare_exchange(object, desired, expected) \ + InterlockedCompareExchange64((LONG64 volatile *)object, *expected, *desired) + +#else // !defined(_WIN32) + #define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x)) #define utils_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x))) + #define utils_atomic_load_acquire(object, dest) \ do { \ utils_annotate_acquire((void *)object); \ @@ -103,12 +132,19 @@ static __inline unsigned char utils_mssb_index(long long value) { } while (0) #define utils_atomic_increment(object) \ - __atomic_add_fetch(object, 1, __ATOMIC_ACQ_REL) + __atomic_add_fetch(object, 1, memory_order_acq_rel) + #define utils_atomic_decrement(object) \ - __atomic_sub_fetch(object, 1, __ATOMIC_ACQ_REL) -#define utils_fetch_and_add64 __sync_fetch_and_add + __atomic_sub_fetch(object, 1, memory_order_acq_rel) -#endif +#define utils_fetch_and_add64(object, value) \ + __atomic_fetch_add(object, value, memory_order_acq_rel) + +#define utils_compare_exchange(object, expected, desired) \ + __atomic_compare_exchange(object, expected, desired, 0 /* strong */, \ + memory_order_acq_rel, memory_order_relaxed) + +#endif // !defined(_WIN32) #ifdef __cplusplus } diff --git a/src/utils/utils_posix_concurrency.c b/src/utils/utils_posix_concurrency.c index 531e09c10..44a317361 100644 --- a/src/utils/utils_posix_concurrency.c +++ b/src/utils/utils_posix_concurrency.c @@ -11,10 +11,11 @@ #include #include "utils_concurrency.h" +#include "utils_log.h" size_t utils_mutex_get_size(void) { return sizeof(pthread_mutex_t); } -utils_mutex_t *utils_mutex_init(void *ptr) { +utils_mutex_t *utils_mutex_init(utils_mutex_t *ptr) { pthread_mutex_t *mutex = (pthread_mutex_t *)ptr; int ret = pthread_mutex_init(mutex, NULL); return ret == 0 ? ((utils_mutex_t *)mutex) : NULL; @@ -23,7 +24,9 @@ utils_mutex_t *utils_mutex_init(void *ptr) { void utils_mutex_destroy_not_free(utils_mutex_t *m) { pthread_mutex_t *mutex = (pthread_mutex_t *)m; int ret = pthread_mutex_destroy(mutex); - (void)ret; // TODO: add logging + if (ret) { + LOG_ERR("pthread_mutex_destroy failed"); + } } int utils_mutex_lock(utils_mutex_t *m) { @@ -37,3 +40,33 @@ int utils_mutex_unlock(utils_mutex_t *m) { void utils_init_once(UTIL_ONCE_FLAG *flag, void (*oneCb)(void)) { pthread_once(flag, oneCb); } + +utils_rwlock_t *utils_rwlock_init(utils_rwlock_t *ptr) { + pthread_rwlock_t *rwlock = (pthread_rwlock_t *)ptr; + int ret = pthread_rwlock_init(rwlock, NULL); + return ret == 0 ? ((utils_rwlock_t *)rwlock) : NULL; +} + +void utils_rwlock_destroy_not_free(utils_rwlock_t *ptr) { + pthread_rwlock_t *rwlock = (pthread_rwlock_t *)ptr; + int ret = pthread_rwlock_destroy(rwlock); + if (ret) { + LOG_ERR("pthread_rwlock_destroy failed"); + } +} + +int utils_read_lock(utils_rwlock_t *rwlock) { + return pthread_rwlock_rdlock((pthread_rwlock_t *)rwlock); +} + +int utils_write_lock(utils_rwlock_t *rwlock) { + return pthread_rwlock_wrlock((pthread_rwlock_t *)rwlock); +} + +int utils_read_unlock(utils_rwlock_t *rwlock) { + return pthread_rwlock_unlock((pthread_rwlock_t *)rwlock); +} + +int utils_write_unlock(utils_rwlock_t *rwlock) { + return pthread_rwlock_unlock((pthread_rwlock_t *)rwlock); +} diff --git a/src/utils/utils_windows_concurrency.c b/src/utils/utils_windows_concurrency.c index e2cc574a9..faa302be3 100644 --- a/src/utils/utils_windows_concurrency.c +++ b/src/utils/utils_windows_concurrency.c @@ -11,35 +11,61 @@ size_t utils_mutex_get_size(void) { return sizeof(utils_mutex_t); } -utils_mutex_t *utils_mutex_init(void *ptr) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)ptr; - InitializeCriticalSection(&mutex_internal->lock); - return (utils_mutex_t *)mutex_internal; +utils_mutex_t *utils_mutex_init(utils_mutex_t *mutex) { + InitializeCriticalSection(&mutex->lock); + return mutex; } void utils_mutex_destroy_not_free(utils_mutex_t *mutex) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; - DeleteCriticalSection(&mutex_internal->lock); + DeleteCriticalSection(&mutex->lock); } int utils_mutex_lock(utils_mutex_t *mutex) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; - EnterCriticalSection(&mutex_internal->lock); + EnterCriticalSection(&mutex->lock); - if (mutex_internal->lock.RecursionCount > 1) { - LeaveCriticalSection(&mutex_internal->lock); + if (mutex->lock.RecursionCount > 1) { + LeaveCriticalSection(&mutex->lock); /* deadlock detected */ - return -1; + abort(); } return 0; } int utils_mutex_unlock(utils_mutex_t *mutex) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; - LeaveCriticalSection(&mutex_internal->lock); + LeaveCriticalSection(&mutex->lock); return 0; } +utils_rwlock_t *utils_rwlock_init(utils_rwlock_t *rwlock) { + InitializeSRWLock(&rwlock->lock); + return 0; // never fails +} + +void utils_rwlock_destroy_not_free(utils_rwlock_t *rwlock) { + // there is no call to destroy SWR lock + (void)rwlock; +} + +int utils_read_lock(utils_rwlock_t *rwlock) { + AcquireSRWLockShared(&rwlock->lock); + return 0; // never fails +} + +int utils_write_lock(utils_rwlock_t *rwlock) { + AcquireSRWLockExclusive(&rwlock->lock); + return 0; // never fails +} + +int utils_read_unlock(utils_rwlock_t *rwlock) { + ReleaseSRWLockShared(&rwlock->lock); + return 0; // never fails +} + +int utils_write_unlock(utils_rwlock_t *rwlock) { + ReleaseSRWLockExclusive(&rwlock->lock); + return 0; // never fails +} + static BOOL CALLBACK initOnceCb(PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContext) { (void)InitOnce; // unused diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index cdbe2425f..ecdde95e1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -71,10 +71,6 @@ function(build_umf_test) set(CPL_DEFS ${CPL_DEFS} UMF_POOL_SCALABLE_ENABLED=1) endif() - if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - set(CPL_DEFS ${CPL_DEFS} UMF_POOL_DISJOINT_ENABLED=1) - endif() - set(TEST_LIBS umf_test_common ${ARG_LIBS} @@ -192,10 +188,6 @@ if(UMF_BUILD_SHARED_LIBRARY) endif() endif() -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - set(LIB_DISJOINT_POOL disjoint_pool) -endif() - if(UMF_BUILD_SHARED_LIBRARY) # if build as shared library, ba symbols won't be visible in tests set(BA_SOURCES_FOR_TEST ${BA_SOURCES}) @@ -237,32 +229,29 @@ add_umf_test( SRCS coarse_lib.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST} coarse) -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - add_umf_test( - NAME disjointPool - SRCS pools/disjoint_pool.cpp malloc_compliance_tests.cpp - LIBS disjoint_pool) +add_umf_test( + NAME disjoint_pool + SRCS pools/disjoint_pool.cpp malloc_compliance_tests.cpp + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + +add_umf_test( + NAME c_api_disjoint_pool + SRCS c_api/disjoint_pool.c ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + +if(LINUX AND (NOT UMF_DISABLE_HWLOC)) + # this test uses the file provider add_umf_test( - NAME c_api_disjoint_pool - SRCS c_api/disjoint_pool.c - LIBS disjoint_pool) - if(LINUX AND (NOT UMF_DISABLE_HWLOC)) - # this test uses the file provider - add_umf_test( - NAME disjointPoolFileProv - SRCS disjointPoolFileProv.cpp - LIBS disjoint_pool) - endif() + NAME disjoint_pool_file_prov + SRCS disjoint_pool_file_prov.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) endif() -if(UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_POOL_JEMALLOC_ENABLED +if(UMF_POOL_JEMALLOC_ENABLED AND UMF_POOL_SCALABLE_ENABLED AND (NOT UMF_DISABLE_HWLOC)) - add_umf_test( - NAME c_api_multi_pool - SRCS c_api/multi_pool.c - LIBS disjoint_pool) + add_umf_test(NAME c_api_multi_pool SRCS c_api/multi_pool.c) endif() if(UMF_POOL_JEMALLOC_ENABLED AND (NOT UMF_DISABLE_HWLOC)) @@ -293,7 +282,7 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME provider_os_memory SRCS provider_os_memory.cpp ${BA_SOURCES_FOR_TEST} - LIBS ${UMF_UTILS_FOR_TEST} ${LIB_DISJOINT_POOL}) + LIBS ${UMF_UTILS_FOR_TEST}) add_umf_test( NAME provider_os_memory_multiple_numa_nodes SRCS provider_os_memory_multiple_numa_nodes.cpp @@ -618,37 +607,33 @@ if(LINUX) # TODO add IPC tests for CUDA - if(UMF_BUILD_GPU_TESTS - AND UMF_BUILD_LEVEL_ZERO_PROVIDER - AND UMF_BUILD_LIBUMF_POOL_DISJOINT) + if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) build_umf_test( NAME ipc_level_zero_prov_consumer SRCS providers/ipc_level_zero_prov_consumer.c common/ipc_common.c providers/ipc_level_zero_prov_common.c ${UMF_UTILS_DIR}/utils_level_zero.cpp - LIBS ze_loader disjoint_pool ${UMF_UTILS_FOR_TEST}) + LIBS ze_loader ${UMF_UTILS_FOR_TEST}) build_umf_test( NAME ipc_level_zero_prov_producer SRCS providers/ipc_level_zero_prov_producer.c common/ipc_common.c providers/ipc_level_zero_prov_common.c ${UMF_UTILS_DIR}/utils_level_zero.cpp - LIBS ze_loader disjoint_pool ${UMF_UTILS_FOR_TEST}) + LIBS ze_loader ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_level_zero_prov SRC_DIR providers) endif() - if(UMF_BUILD_GPU_TESTS - AND UMF_BUILD_CUDA_PROVIDER - AND UMF_BUILD_LIBUMF_POOL_DISJOINT) + if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_CUDA_PROVIDER) build_umf_test( NAME ipc_cuda_prov_consumer SRCS providers/ipc_cuda_prov_consumer.c common/ipc_common.c providers/ipc_cuda_prov_common.c providers/cuda_helpers.cpp - LIBS cuda disjoint_pool ${UMF_UTILS_FOR_TEST}) + LIBS cuda ${UMF_UTILS_FOR_TEST}) build_umf_test( NAME ipc_cuda_prov_producer SRCS providers/ipc_cuda_prov_producer.c common/ipc_common.c providers/ipc_cuda_prov_common.c providers/cuda_helpers.cpp - LIBS cuda disjoint_pool ${UMF_UTILS_FOR_TEST}) + LIBS cuda ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_cuda_prov SRC_DIR providers) endif() else() @@ -701,41 +686,34 @@ if(LINUX ) endif() - if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) + if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLES ${EXAMPLES} level_zero_shared_memory) else() message( STATUS - "GPU level zero shared memory example requires UMF_BUILD_GPU_EXAMPLES, " - "UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT " - "to be turned ON - skipping") + "GPU level zero shared memory example requires UMF_BUILD_GPU_EXAMPLES and " + "UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping") endif() if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_CUDA_PROVIDER AND UMF_CUDA_ENABLED) set(EXAMPLES ${EXAMPLES} cuda_shared_memory) else() message( STATUS - "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES, " - "UMF_BUILD_CUDA_PROVIDER, UMF_BUILD_LIBUMF_POOL_DISJOINT " - "to be turned ON and installed CUDA libraries - skipping") + "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES " + "and UMF_BUILD_CUDA_PROVIDER to be turned ON and installed CUDA " + "libraries - skipping") endif() # TODO add IPC examples for CUDA - if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) + if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLES ${EXAMPLES} ipc_level_zero) else() message( - STATUS - "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" - ) + STATUS "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES and " + "UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping") endif() if(UMF_POOL_SCALABLE_ENABLED) diff --git a/test/c_api/disjoint_pool.c b/test/c_api/disjoint_pool.c index 4d4634def..b529497c8 100644 --- a/test/c_api/disjoint_pool.c +++ b/test/c_api/disjoint_pool.c @@ -1,10 +1,11 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include -#include "pool_disjoint.h" +#include + #include "provider_null.h" #include "test_helpers.h" #include "test_ut_asserts.h" diff --git a/test/disjointPoolFileProv.cpp b/test/disjoint_pool_file_prov.cpp similarity index 99% rename from test/disjointPoolFileProv.cpp rename to test/disjoint_pool_file_prov.cpp index 383487a87..b874d2a49 100644 --- a/test/disjointPoolFileProv.cpp +++ b/test/disjoint_pool_file_prov.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/test/pools/disjoint_pool.cpp b/test/pools/disjoint_pool.cpp index 025f546be..dad960187 100644 --- a/test/pools/disjoint_pool.cpp +++ b/test/pools/disjoint_pool.cpp @@ -4,9 +4,11 @@ #include +#include + #include "pool.hpp" +#include "pool/pool_disjoint_internal.h" #include "poolFixtures.hpp" -#include "pool_disjoint.h" #include "provider.hpp" #include "provider_null.h" #include "provider_trace.h" @@ -57,11 +59,130 @@ umf_result_t poolConfigDestroy(void *config) { using umf_test::test; using namespace umf_test; +TEST_F(test, internals) { + static umf_result_t expectedResult = UMF_RESULT_SUCCESS; + struct memory_provider : public umf_test::provider_base_t { + umf_result_t alloc(size_t size, size_t alignment, void **ptr) noexcept { + *ptr = umf_ba_global_aligned_alloc(size, alignment); + return UMF_RESULT_SUCCESS; + } + + umf_result_t free(void *ptr, [[maybe_unused]] size_t size) noexcept { + // do the actual free only when we expect the success + if (expectedResult == UMF_RESULT_SUCCESS) { + umf_ba_global_free(ptr); + } + return expectedResult; + } + + umf_result_t + get_min_page_size([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t *pageSize) noexcept { + *pageSize = 1024; + return UMF_RESULT_SUCCESS; + } + }; + umf_memory_provider_ops_t provider_ops = + umf::providerMakeCOps(); + + auto providerUnique = + wrapProviderUnique(createProviderChecked(&provider_ops, nullptr)); + + umf_memory_provider_handle_t provider_handle; + provider_handle = providerUnique.get(); + + umf_disjoint_pool_params_handle_t params = + (umf_disjoint_pool_params_handle_t)defaultPoolConfig(); + // set to maximum tracing + params->pool_trace = 3; + params->max_poolable_size = 1024 * 1024; + + // in "internals" test we use ops interface to directly manipulate the pool + // structure + umf_memory_pool_ops_t *ops = umfDisjointPoolOps(); + EXPECT_NE(ops, nullptr); + + disjoint_pool_t *pool; + umf_result_t res = ops->initialize(provider_handle, params, (void **)&pool); + EXPECT_EQ(res, UMF_RESULT_SUCCESS); + EXPECT_NE(pool, nullptr); + EXPECT_EQ(pool->provider_min_page_size, 1024); + + // check buckets sizes + size_t expected_size = DEFAULT_DISJOINT_MIN_BUCKET_SIZE; + EXPECT_EQ(pool->buckets[0]->size, expected_size); + EXPECT_EQ(pool->buckets[pool->buckets_num - 1]->size, + (size_t)1 << 31); // 2GB + for (size_t i = 0; i < pool->buckets_num; i++) { + bucket_t *bucket = pool->buckets[i]; + EXPECT_NE(bucket, nullptr); + EXPECT_EQ(bucket->size, expected_size); + + // assuming DEFAULT_DISJOINT_MIN_BUCKET_SIZE = 64, expected bucket + // sizes are: 64, 96, 128, 192, 256, ..., 2GB + if (i % 2 == 0) { + expected_size += expected_size / 2; + } else { + expected_size = DEFAULT_DISJOINT_MIN_BUCKET_SIZE << ((i + 1) / 2); + } + } + + // test small allocations + size_t size = 8; + void *ptr = ops->malloc(pool, size); + EXPECT_NE(ptr, nullptr); + + // get bucket - because of small size this should be the first bucket in + // the pool + bucket_t *bucket = pool->buckets[0]; + EXPECT_NE(bucket, nullptr); + + // check bucket stats + EXPECT_EQ(bucket->alloc_count, 1); + + // first allocation will always use external memory (newly added to the + // pool) and this is counted as allocation from the outside of the pool + EXPECT_EQ(bucket->alloc_pool_count, 0); + EXPECT_EQ(bucket->curr_slabs_in_use, 1); + + // check slab - there should be only single slab allocated + EXPECT_NE(bucket->available_slabs, nullptr); + EXPECT_EQ(bucket->available_slabs_num, 1); + EXPECT_EQ(bucket->available_slabs->next, nullptr); + slab_t *slab = bucket->available_slabs->val; + + // check slab stats + EXPECT_GE(slab->slab_size, params->slab_min_size); + EXPECT_GE(slab->num_chunks_total, slab->slab_size / bucket->size); + + // check allocation in slab + EXPECT_EQ(slab->chunks[0], true); + EXPECT_EQ(slab->chunks[1], false); + EXPECT_EQ(slab->first_free_chunk_idx, 1); + + // TODO: + // * multiple alloc + free from single bucket + // * alignments + // * full slab alloc + // * slab overflow + // * chunked slabs + // * multiple alloc + free from different buckets + // * alloc something outside pool (> MaxPoolableSize) + // * test capacity + // * check minBucketSize + // * test large objects + // * check available_slabs_num + + // cleanup + ops->finalize(pool); + umfDisjointPoolParamsDestroy(params); +} + TEST_F(test, freeErrorPropagation) { static umf_result_t expectedResult = UMF_RESULT_SUCCESS; struct memory_provider : public umf_test::provider_base_t { - umf_result_t alloc(size_t size, size_t, void **ptr) noexcept { - *ptr = umf_ba_global_alloc(size); + umf_result_t alloc(size_t size, size_t alignment, void **ptr) noexcept { + *ptr = umf_ba_global_aligned_alloc(size, alignment); return UMF_RESULT_SUCCESS; } @@ -117,8 +238,8 @@ TEST_F(test, sharedLimits) { static size_t numFrees = 0; struct memory_provider : public umf_test::provider_base_t { - umf_result_t alloc(size_t size, size_t, void **ptr) noexcept { - *ptr = umf_ba_global_alloc(size); + umf_result_t alloc(size_t size, size_t alignment, void **ptr) noexcept { + *ptr = umf_ba_global_aligned_alloc(size, alignment); numAllocs++; return UMF_RESULT_SUCCESS; } diff --git a/test/provider_os_memory.cpp b/test/provider_os_memory.cpp index ddc44548e..5b647b642 100644 --- a/test/provider_os_memory.cpp +++ b/test/provider_os_memory.cpp @@ -9,10 +9,8 @@ #include "test_helpers.h" #include -#include -#ifdef UMF_POOL_DISJOINT_ENABLED #include -#endif +#include #ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif @@ -428,8 +426,6 @@ umf_result_t destroyOsMemoryProviderParamsShared(void *params) { HostMemoryAccessor hostAccessor; -#ifdef UMF_POOL_DISJOINT_ENABLED - void *createDisjointPoolParams() { umf_disjoint_pool_params_handle_t params = nullptr; umf_result_t res = umfDisjointPoolParamsCreate(¶ms); @@ -465,14 +461,10 @@ umf_result_t destroyDisjointPoolParams(void *params) { static_cast(params)); } -#endif - static std::vector ipcTestParamsList = { -#ifdef UMF_POOL_DISJOINT_ENABLED {umfDisjointPoolOps(), createDisjointPoolParams, destroyDisjointPoolParams, umfOsMemoryProviderOps(), createOsMemoryProviderParamsShared, destroyOsMemoryProviderParamsShared, &hostAccessor}, -#endif #ifdef UMF_POOL_JEMALLOC_ENABLED {umfJemallocPoolOps(), nullptr, nullptr, umfOsMemoryProviderOps(), createOsMemoryProviderParamsShared, destroyOsMemoryProviderParamsShared, diff --git a/test/supp/drd-umf_test-disjoint_pool.supp b/test/supp/drd-umf_test-disjoint_pool.supp new file mode 100644 index 000000000..24a44b93d --- /dev/null +++ b/test/supp/drd-umf_test-disjoint_pool.supp @@ -0,0 +1,7 @@ +{ + False-positive ConflictingAccess in critnib_insert + drd:ConflictingAccess + fun:store + fun:critnib_insert + ... +} diff --git a/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp b/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp deleted file mode 100644 index 2f669eb31..000000000 --- a/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp +++ /dev/null @@ -1,24 +0,0 @@ -{ - Incompatibility with helgrind's implementation (pthread_mutex_lock with a pthread_rwlock_t* argument) - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:_ZL20__gthread_mutex_lockP15pthread_mutex_t - ... -} - -{ - Incompatibility with helgrind's implementation (pthread_mutex_unlock with a pthread_rwlock_t* argument) - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:_ZL22__gthread_mutex_unlockP15pthread_mutex_t - ... -} - -{ - Incompatibility with helgrind's implementation (lock order "0xA before 0xB" violated) - Helgrind:LockOrder - obj:*vgpreload_helgrind-amd64-linux.so - fun:_ZStL23__glibcxx_rwlock_wrlockP16pthread_rwlock_t - fun:_ZNSt22__shared_mutex_pthread4lockEv - ... -} diff --git a/test/supp/helgrind-umf_test-disjointPool.supp b/test/supp/helgrind-umf_test-disjoint_pool.supp similarity index 53% rename from test/supp/helgrind-umf_test-disjointPool.supp rename to test/supp/helgrind-umf_test-disjoint_pool.supp index 3ada32736..929674e8e 100644 --- a/test/supp/helgrind-umf_test-disjointPool.supp +++ b/test/supp/helgrind-umf_test-disjoint_pool.supp @@ -29,25 +29,9 @@ } { - Incompatibility with helgrind's implementation ("pthread_rwlock_{rd,rw}lock with a pthread_mutex_t* argument") - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:*glibcxx_rwlock_wrlock*pthread_rwlock_t - ... -} - -{ - Incompatibility with helgrind's implementation ("pthread_rwlock_unlock with a pthread_mutex_t* argument") - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:*glibcxx_rwlock_unlock*pthread_rwlock_t - ... -} - -{ - Incompatibility with helgrind's implementation ("pthread_rwlock_{rd,rw}lock with a pthread_mutex_t* argument") - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:*glibcxx_rwlock_rdlock*pthread_rwlock_t* + False-positive Race in critnib_insert + Helgrind:Race + fun:store + fun:critnib_insert ... } diff --git a/test/test_installation.py b/test/test_installation.py index b5dd676dc..ef30ac759 100644 --- a/test/test_installation.py +++ b/test/test_installation.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -278,11 +278,6 @@ def parse_arguments(self) -> argparse.Namespace: action="store_true", help="Add this argument if the proxy library should be built together with the UMF library", ) - self.parser.add_argument( - "--disjoint-pool", - action="store_true", - help="Add this argument if the UMF was built with Disjoint Pool enabled", - ) self.parser.add_argument( "--umf-version", action="store", @@ -299,8 +294,6 @@ def run(self) -> None: build_dir = Path(workspace_dir, self.args.build_dir) install_dir = Path(workspace_dir, self.args.install_dir) pools = [] - if self.args.disjoint_pool: - pools.append("disjoint_pool") umf_version = Version(self.args.umf_version)