From df1de3ae377e6b435f19653e2b902d489e001f79 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 3 Dec 2024 16:05:42 +0100 Subject: [PATCH 01/12] do nothing in ba_global_free if ba is destroyed --- src/base_alloc/base_alloc_global.c | 14 ++++++++++++-- src/base_alloc/base_alloc_global.h | 3 +++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/base_alloc/base_alloc_global.c b/src/base_alloc/base_alloc_global.c index 2aca5d29c..f709eab9d 100644 --- a/src/base_alloc/base_alloc_global.c +++ b/src/base_alloc/base_alloc_global.c @@ -23,6 +23,7 @@ // global base allocator used by all providers and pools static UTIL_ONCE_FLAG ba_is_initialized = UTIL_ONCE_FLAG_INIT; +static bool ba_is_destroyed = false; #define ALLOC_METADATA_SIZE (sizeof(size_t)) @@ -40,6 +41,8 @@ struct base_alloc_t { static struct base_alloc_t BASE_ALLOC = {.ac_sizes = ALLOCATION_CLASSES}; void umf_ba_destroy_global(void) { + ba_is_destroyed = true; + for (int i = 0; i < NUM_ALLOCATION_CLASSES; i++) { if (BASE_ALLOC.ac[i]) { umf_ba_destroy(BASE_ALLOC.ac[i]); @@ -48,10 +51,12 @@ void umf_ba_destroy_global(void) { } // portable version of "ba_is_initialized = UTIL_ONCE_FLAG_INIT;" - static UTIL_ONCE_FLAG is_initialized = UTIL_ONCE_FLAG_INIT; - memcpy(&ba_is_initialized, &is_initialized, sizeof(ba_is_initialized)); + static UTIL_ONCE_FLAG set_once = UTIL_ONCE_FLAG_INIT; + memcpy(&ba_is_initialized, &set_once, sizeof(ba_is_initialized)); } +bool umf_ba_global_is_destroyed(void) { return ba_is_destroyed; } + static void umf_ba_create_global(void) { for (int i = 0; i < NUM_ALLOCATION_CLASSES; i++) { // allocation classes need to be powers of 2 @@ -202,6 +207,11 @@ void umf_ba_global_free(void *ptr) { return; } + if (ba_is_destroyed) { + LOG_WARN("base_alloc: calling free after the base alloc is destroyed"); + return; + } + size_t total_size; ptr = get_original_alloc(ptr, &total_size, NULL); diff --git a/src/base_alloc/base_alloc_global.h b/src/base_alloc/base_alloc_global.h index ad7f12ce5..bd55d352f 100644 --- a/src/base_alloc/base_alloc_global.h +++ b/src/base_alloc/base_alloc_global.h @@ -8,6 +8,8 @@ #ifndef UMF_BASE_ALLOC_GLOBAL_H #define UMF_BASE_ALLOC_GLOBAL_H 1 +#include + #include "base_alloc.h" #ifdef __cplusplus @@ -17,6 +19,7 @@ extern "C" { void *umf_ba_global_alloc(size_t size); void umf_ba_global_free(void *ptr); void umf_ba_destroy_global(void); +bool umf_ba_global_is_destroyed(void); size_t umf_ba_global_malloc_usable_size(void *ptr); void *umf_ba_global_aligned_alloc(size_t size, size_t alignment); From 1ee248c7a211f1fb54f36cf70360baed4b2884b8 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 3 Dec 2024 16:08:15 +0100 Subject: [PATCH 02/12] add utils min/max functions --- src/utils/utils_common.c | 3 +++ src/utils/utils_common.h | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/src/utils/utils_common.c b/src/utils/utils_common.c index eaf5420fc..225c02d2c 100644 --- a/src/utils/utils_common.c +++ b/src/utils/utils_common.c @@ -128,3 +128,6 @@ umf_result_t utils_translate_flags(unsigned in_flags, unsigned max, *out_flags = out_f; return UMF_RESULT_SUCCESS; } + +size_t utils_max(size_t a, size_t b) { return a > b ? a : b; } +size_t utils_min(size_t a, size_t b) { return a < b ? a : b; } diff --git a/src/utils/utils_common.h b/src/utils/utils_common.h index 6af5a08d9..d8ea9bf6a 100644 --- a/src/utils/utils_common.h +++ b/src/utils/utils_common.h @@ -176,6 +176,10 @@ int utils_fallocate(int fd, long offset, long len); long utils_get_size_threshold(char *str_threshold); +size_t utils_max(size_t a, size_t b); + +size_t utils_min(size_t a, size_t b); + #ifdef __cplusplus } #endif From cd6efbbd9f9bf8ae272a11951ce2fd4612618ffe Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Sat, 7 Dec 2024 18:46:04 +0100 Subject: [PATCH 03/12] add utils_compare_exchange function --- src/utils/utils_concurrency.h | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index 155184cc4..287f5d12a 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -61,11 +61,13 @@ int utils_mutex_unlock(utils_mutex_t *mutex); void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)); #if defined(_WIN32) + static __inline unsigned char utils_lssb_index(long long value) { unsigned long ret; _BitScanForward64(&ret, value); return (unsigned char)ret; } + static __inline unsigned char utils_mssb_index(long long value) { unsigned long ret; _BitScanReverse64(&ret, value); @@ -81,15 +83,25 @@ static __inline unsigned char utils_mssb_index(long long value) { #define utils_atomic_store_release(object, desired) \ InterlockedExchange64((LONG64 volatile *)object, (LONG64)desired) + #define utils_atomic_increment(object) \ InterlockedIncrement64((LONG64 volatile *)object) + #define utils_atomic_decrement(object) \ InterlockedDecrement64((LONG64 volatile *)object) + #define utils_fetch_and_add64(ptr, value) \ InterlockedExchangeAdd64((LONG64 *)(ptr), value) -#else + +// NOTE: windows version have different order of args +#define utils_compare_exchange(object, desired, expected) \ + InterlockedCompareExchange64((LONG64 volatile *)object, *expected, *desired) + +#else // !defined(_WIN32) + #define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x)) #define utils_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x))) + #define utils_atomic_load_acquire(object, dest) \ do { \ utils_annotate_acquire((void *)object); \ @@ -103,12 +115,19 @@ static __inline unsigned char utils_mssb_index(long long value) { } while (0) #define utils_atomic_increment(object) \ - __atomic_add_fetch(object, 1, __ATOMIC_ACQ_REL) + __atomic_add_fetch(object, 1, memory_order_acq_rel) + #define utils_atomic_decrement(object) \ - __atomic_sub_fetch(object, 1, __ATOMIC_ACQ_REL) -#define utils_fetch_and_add64 __sync_fetch_and_add + __atomic_sub_fetch(object, 1, memory_order_acq_rel) -#endif +#define utils_fetch_and_add64(object, value) \ + __atomic_fetch_add(object, value, memory_order_acq_rel) + +#define utils_compare_exchange(object, expected, desired) \ + __atomic_compare_exchange(object, expected, desired, 0 /* strong */, \ + memory_order_acq_rel, memory_order_relaxed) + +#endif // !defined(_WIN32) #ifdef __cplusplus } From 76b4b5c6d82d9f619ed74a3ef1de9187ece20816 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Fri, 24 Jan 2025 15:35:01 +0100 Subject: [PATCH 04/12] make disjoint pool a C structure --- .github/workflows/coverity.yml | 1 - .github/workflows/nightly.yml | 3 - .github/workflows/reusable_basic.yml | 8 - .github/workflows/reusable_benchmarks.yml | 1 - .github/workflows/reusable_dax.yml | 1 - .github/workflows/reusable_fast.yml | 7 - .github/workflows/reusable_gpu.yml | 1 - .github/workflows/reusable_multi_numa.yml | 4 +- .github/workflows/reusable_proxy_lib.yml | 1 - .github/workflows/reusable_sanitizers.yml | 2 - .github/workflows/reusable_valgrind.yml | 1 - CMakeLists.txt | 6 +- README.md | 13 +- benchmark/CMakeLists.txt | 7 - benchmark/benchmark.cpp | 4 - benchmark/benchmark_umf.hpp | 8 +- benchmark/multithread.cpp | 9 +- benchmark/ubench.c | 15 +- examples/CMakeLists.txt | 20 +- examples/README.md | 4 +- examples/cuda_shared_memory/CMakeLists.txt | 7 +- examples/ipc_level_zero/CMakeLists.txt | 4 +- .../level_zero_shared_memory/CMakeLists.txt | 4 +- scripts/qemu/run-build.sh | 3 +- src/CMakeLists.txt | 3 +- src/base_alloc/base_alloc_global.c | 5 +- src/libumf.def | 12 + src/libumf.map | 12 + src/pool/CMakeLists.txt | 30 - src/pool/pool_disjoint.c | 1123 ++++++++++++++ src/pool/pool_disjoint.cpp | 1313 ----------------- src/pool/pool_disjoint_internal.h | 176 +++ src/utils/utils_common.h | 2 + src/utils/utils_concurrency.h | 21 +- src/utils/utils_posix_concurrency.c | 37 +- src/utils/utils_windows_concurrency.c | 52 +- test/CMakeLists.txt | 90 +- test/c_api/disjoint_pool.c | 5 +- ...leProv.cpp => disjoint_pool_file_prov.cpp} | 2 +- test/pools/disjoint_pool.cpp | 131 +- test/provider_os_memory.cpp | 10 +- test/supp/drd-umf_test-disjoint_pool.supp | 7 + ...ind-umf_test-disjointCoarseMallocPool.supp | 24 - ...p => helgrind-umf_test-disjoint_pool.supp} | 24 +- test/test_installation.py | 9 +- 45 files changed, 1635 insertions(+), 1587 deletions(-) create mode 100644 src/pool/pool_disjoint.c delete mode 100644 src/pool/pool_disjoint.cpp create mode 100644 src/pool/pool_disjoint_internal.h rename test/{disjointPoolFileProv.cpp => disjoint_pool_file_prov.cpp} (99%) create mode 100644 test/supp/drd-umf_test-disjoint_pool.supp delete mode 100644 test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp rename test/supp/{helgrind-umf_test-disjointPool.supp => helgrind-umf_test-disjoint_pool.supp} (53%) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 531a463c7..ebae6086a 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -49,7 +49,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=OFF -DUMF_TESTS_FAIL_ON_SKIP=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - name: Build diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 7a6335ed6..44f2ba2ca 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -76,7 +76,6 @@ jobs: -DCMAKE_BUILD_TYPE=Debug -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_BUILD_CUDA_PROVIDER=OFF @@ -138,7 +137,6 @@ jobs: -DUMF_LINK_HWLOC_STATICALLY=${{matrix.static_hwloc}} -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON @@ -219,7 +217,6 @@ jobs: -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} ^ -DUMF_FORMAT_CODE_STYLE=OFF ^ -DUMF_DEVELOPER_MODE=ON ^ - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON ^ -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON ^ -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON ^ -DUMF_BUILD_CUDA_PROVIDER=ON ^ diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index d23e646dd..7170ec418 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -165,7 +165,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_DISABLE_HWLOC=${{matrix.disable_hwloc}} -DUMF_LINK_HWLOC_STATICALLY=${{matrix.link_hwloc_statically}} @@ -208,7 +207,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{matrix.build_type}} - --disjoint-pool ${{ matrix.install_tbb == 'ON' && matrix.disable_hwloc != 'ON' && matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || '' }} @@ -284,7 +282,6 @@ jobs: -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=${{matrix.level_zero_provider}} -DUMF_BUILD_CUDA_PROVIDER=${{matrix.cuda_provider}} @@ -304,7 +301,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{matrix.build_type}} - --disjoint-pool ${{matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || ''}} @@ -342,7 +338,6 @@ jobs: -DUMF_BUILD_EXAMPLES=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON @@ -385,7 +380,6 @@ jobs: -DUMF_BUILD_EXAMPLES=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON @@ -496,7 +490,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_SHARED_LIBRARY=ON -DUMF_TESTS_FAIL_ON_SKIP=ON @@ -511,7 +504,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{env.BUILD_TYPE}} - --disjoint-pool --proxy --umf-version ${{env.UMF_VERSION}} --shared-library diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index b33fdb25e..b41c99f3a 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -93,7 +93,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - name: Build UMF diff --git a/.github/workflows/reusable_dax.yml b/.github/workflows/reusable_dax.yml index f7f4fbe50..1a41b11c7 100644 --- a/.github/workflows/reusable_dax.yml +++ b/.github/workflows/reusable_dax.yml @@ -84,7 +84,6 @@ jobs: -DUMF_BUILD_GPU_EXAMPLES=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_TESTS_FAIL_ON_SKIP=ON diff --git a/.github/workflows/reusable_fast.yml b/.github/workflows/reusable_fast.yml index 58a172a74..5166f2b96 100644 --- a/.github/workflows/reusable_fast.yml +++ b/.github/workflows/reusable_fast.yml @@ -19,24 +19,20 @@ jobs: matrix: include: - os: windows-latest - disjoint: 'OFF' build_tests: 'ON' simple_cmake: 'OFF' # pure C build (Windows) - os: windows-latest - disjoint: 'OFF' # Tests' building is off for a pure C build build_tests: 'OFF' simple_cmake: 'OFF' - os: ubuntu-latest - disjoint: 'ON' build_tests: 'ON' # Windows doesn't recognize 'CMAKE_BUILD_TYPE', it uses '--config' param in build command extra_build_options: '-DCMAKE_BUILD_TYPE=Release -DUMF_BUILD_BENCHMARKS=ON -DUMF_BUILD_BENCHMARKS_MT=ON' simple_cmake: 'OFF' # pure C build (Linux) - os: ubuntu-latest - disjoint: 'OFF' # Windows doesn't recognize 'CMAKE_BUILD_TYPE', it uses '--config' param in build command # Tests' building is off for a pure C build build_tests: 'OFF' @@ -44,13 +40,11 @@ jobs: simple_cmake: 'OFF' # simplest CMake on ubuntu-latest - os: ubuntu-latest - disjoint: 'OFF' build_tests: 'ON' extra_build_options: '-DCMAKE_BUILD_TYPE=Release' simple_cmake: 'ON' # simplest CMake ubuntu-20.04 - os: ubuntu-20.04 - disjoint: 'OFF' build_tests: 'ON' extra_build_options: '-DCMAKE_BUILD_TYPE=Release' simple_cmake: 'ON' @@ -97,7 +91,6 @@ jobs: -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=${{matrix.disjoint}} -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_TESTS=${{matrix.build_tests}} -DUMF_BUILD_EXAMPLES=ON diff --git a/.github/workflows/reusable_gpu.yml b/.github/workflows/reusable_gpu.yml index 47f48f6a8..87a7cfd30 100644 --- a/.github/workflows/reusable_gpu.yml +++ b/.github/workflows/reusable_gpu.yml @@ -93,7 +93,6 @@ jobs: -DUMF_BUILD_GPU_TESTS=ON -DUMF_BUILD_GPU_EXAMPLES=ON -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_CUDA_PROVIDER=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF diff --git a/.github/workflows/reusable_multi_numa.yml b/.github/workflows/reusable_multi_numa.yml index f546b0545..7c7750551 100644 --- a/.github/workflows/reusable_multi_numa.yml +++ b/.github/workflows/reusable_multi_numa.yml @@ -45,7 +45,6 @@ jobs: -DUMF_BUILD_BENCHMARKS=OFF -DUMF_BUILD_TESTS=ON -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=${{ matrix.os == 'rhel-9.1' && 'OFF' || 'ON' }} -DUMF_TESTS_FAIL_ON_SKIP=ON ${{ matrix.build_type == 'Debug' && matrix.os == 'ubuntu-22.04' && '-DUMF_USE_COVERAGE=ON' || '' }} @@ -61,11 +60,12 @@ jobs: # On RHEL, hwloc version is just a little too low. # Skip some tests until we upgrade hwloc and update CMake to properly handle local hwloc installation. # TODO: fix issue #560 + # TODO: add issue for -E umf-init_teardown - it is not clear why it fails - name: Run tests (on RHEL) if: matrix.os == 'rhel-9.1' working-directory: ${{github.workspace}}/build run: | - ctest --output-on-failure --test-dir test -E "umf-provider_os_memory_multiple_numa_nodes" + ctest --output-on-failure --test-dir test -E "umf-provider_os_memory_multiple_numa_nodes|umf-init_teardown" ./test/umf_test-provider_os_memory_multiple_numa_nodes \ --gtest_filter="-*checkModeLocal/*:*checkModePreferredEmptyNodeset/*:testNuma.checkModeInterleave" diff --git a/.github/workflows/reusable_proxy_lib.yml b/.github/workflows/reusable_proxy_lib.yml index a1f5975fa..bb4a3278e 100644 --- a/.github/workflows/reusable_proxy_lib.yml +++ b/.github/workflows/reusable_proxy_lib.yml @@ -48,7 +48,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_PROXY_LIB_BASED_ON_POOL=${{matrix.proxy_lib_pool}} ${{ matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }} diff --git a/.github/workflows/reusable_sanitizers.yml b/.github/workflows/reusable_sanitizers.yml index 25458da51..1a044308e 100644 --- a/.github/workflows/reusable_sanitizers.yml +++ b/.github/workflows/reusable_sanitizers.yml @@ -55,7 +55,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_USE_ASAN=${{matrix.sanitizers.asan}} -DUMF_USE_UBSAN=${{matrix.sanitizers.ubsan}} -DUMF_USE_TSAN=${{matrix.sanitizers.tsan}} @@ -127,7 +126,6 @@ jobs: -DUMF_BUILD_SHARED_LIBRARY=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_USE_ASAN=${{matrix.sanitizers.asan}} -DUMF_BUILD_EXAMPLES=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF diff --git a/.github/workflows/reusable_valgrind.yml b/.github/workflows/reusable_valgrind.yml index aba0e3260..5999297d6 100644 --- a/.github/workflows/reusable_valgrind.yml +++ b/.github/workflows/reusable_valgrind.yml @@ -29,7 +29,6 @@ jobs: -DCMAKE_BUILD_TYPE=Debug -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_BUILD_CUDA_PROVIDER=OFF diff --git a/CMakeLists.txt b/CMakeLists.txt index f8c393609..396a27c1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,8 +60,6 @@ endmacro() umf_option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) umf_option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON) umf_option(UMF_BUILD_CUDA_PROVIDER "Build CUDA memory provider" ON) -umf_option(UMF_BUILD_LIBUMF_POOL_DISJOINT - "Build the libumf_pool_disjoint static library" OFF) umf_option(UMF_BUILD_LIBUMF_POOL_JEMALLOC "Build the libumf_pool_jemalloc static library" OFF) umf_option(UMF_BUILD_TESTS "Build UMF tests" ON) @@ -497,8 +495,8 @@ endif() # For using the options listed in the OPTIONS_REQUIRING_CXX variable a C++17 # compiler is required. Moreover, if these options are not set, CMake will set # up a strict C build, without C++ support. -set(OPTIONS_REQUIRING_CXX "UMF_BUILD_TESTS" "UMF_BUILD_LIBUMF_POOL_DISJOINT" - "UMF_BUILD_BENCHMARKS_MT" "UMF_BUILD_BENCHMARKS") +set(OPTIONS_REQUIRING_CXX "UMF_BUILD_TESTS" "UMF_BUILD_BENCHMARKS_MT" + "UMF_BUILD_BENCHMARKS") foreach(option_name ${OPTIONS_REQUIRING_CXX}) if(${option_name}) enable_language(CXX) diff --git a/README.md b/README.md index 5bd0b9b2f..00d6136df 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ For development and contributions: - cmake-format-0.6 (can be installed with `python -m pip install cmake-format==0.6.13`) - black (can be installed with `python -m pip install black==24.3.0`) -For building tests, multithreaded benchmarks and Disjoint Pool: +For building tests and multithreaded benchmarks: - C++ compiler with C++17 support @@ -106,7 +106,6 @@ List of options provided by CMake: | UMF_BUILD_SHARED_LIBRARY | Build UMF as shared library | ON/OFF | OFF | | UMF_BUILD_LEVEL_ZERO_PROVIDER | Build Level Zero memory provider | ON/OFF | ON | | UMF_BUILD_CUDA_PROVIDER | Build CUDA memory provider | ON/OFF | ON | -| UMF_BUILD_LIBUMF_POOL_DISJOINT | Build the libumf_pool_disjoint static library | ON/OFF | OFF | | UMF_BUILD_LIBUMF_POOL_JEMALLOC | Build the libumf_pool_jemalloc static library | ON/OFF | OFF | | UMF_BUILD_TESTS | Build UMF tests | ON/OFF | ON | | UMF_BUILD_GPU_TESTS | Build UMF GPU tests | ON/OFF | OFF | @@ -267,13 +266,11 @@ This memory pool is distributed as part of libumf. It forwards all requests to t memory provider. Currently umfPoolRealloc, umfPoolCalloc and umfPoolMallocUsableSize functions are not supported by the proxy pool. -#### Disjoint pool +#### Disjoint pool (part of libumf) -TODO: Add a description - -##### Requirements - -To enable this feature, the `UMF_BUILD_LIBUMF_POOL_DISJOINT` option needs to be turned `ON`. +The Disjoint pool is designed to keep internal metadata separate from user data. +This separation is particularly useful when user data needs to be placed in memory with relatively high latency, +such as GPU memory or disk storage. #### Jemalloc pool diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 73b9b257a..80c8ba5ec 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -97,10 +97,6 @@ function(add_umf_benchmark) ) endif() - if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - target_compile_definitions(${BENCH_NAME} - PRIVATE UMF_POOL_DISJOINT_ENABLED=1) - endif() if(UMF_POOL_JEMALLOC_ENABLED) target_compile_definitions(${BENCH_NAME} PRIVATE UMF_POOL_JEMALLOC_ENABLED=1) @@ -131,9 +127,6 @@ endfunction() set(LIB_DIRS ${LIBHWLOC_LIBRARY_DIRS}) # optional libraries -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - set(LIBS_OPTIONAL ${LIBS_OPTIONAL} disjoint_pool) -endif() if(LINUX) set(LIBS_OPTIONAL ${LIBS_OPTIONAL} m) endif() diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 6c8175e1d..ad29e9029 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -66,7 +66,6 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, proxy_pool, fixed_alloc_size, UMF_BENCHMARK_REGISTER_F(alloc_benchmark, proxy_pool) ->Apply(&default_alloc_fix_size); -#ifdef UMF_POOL_DISJOINT_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, disjoint_pool_fix, fixed_alloc_size, pool_allocator>); @@ -80,7 +79,6 @@ UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_fix) UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_uniform) ->Apply(&default_alloc_uniform_size); */ -#endif #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, jemalloc_pool_fix, @@ -150,7 +148,6 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, os_provider, UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, os_provider) ->Apply(&default_multiple_alloc_fix_size); -#ifdef UMF_POOL_DISJOINT_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix, fixed_alloc_size, pool_allocator>); @@ -164,7 +161,6 @@ UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_fix) UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) ->Apply(&default_multiple_alloc_uniform_size); */ -#endif #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, diff --git a/benchmark/benchmark_umf.hpp b/benchmark/benchmark_umf.hpp index 389c224ed..86cba4877 100644 --- a/benchmark/benchmark_umf.hpp +++ b/benchmark/benchmark_umf.hpp @@ -13,16 +13,14 @@ #include #include +#include #include + #ifdef UMF_POOL_SCALABLE_ENABLED #include #endif #include -#ifdef UMF_POOL_DISJOINT_ENABLED -#include -#endif - #ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif @@ -167,7 +165,6 @@ struct proxy_pool : public pool_interface { static std::string name() { return "proxy_pool<" + Provider::name() + ">"; } }; -#ifdef UMF_POOL_DISJOINT_ENABLED template struct disjoint_pool : public pool_interface { umf_memory_pool_ops_t * @@ -221,7 +218,6 @@ struct disjoint_pool : public pool_interface { return "disjoint_pool<" + Provider::name() + ">"; } }; -#endif #ifdef UMF_POOL_JEMALLOC_ENABLED template diff --git a/benchmark/multithread.cpp b/benchmark/multithread.cpp index ecc238529..d00ffba90 100644 --- a/benchmark/multithread.cpp +++ b/benchmark/multithread.cpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -121,7 +121,7 @@ int main() { std::cout << "skipping jemalloc_pool mt_alloc_free" << std::endl; #endif -#if defined(UMF_POOL_DISJOINT_ENABLED) + // NOTE: disjoint pool is always enabled umf_disjoint_pool_params_handle_t hDisjointParams = nullptr; umf_result_t ret = umfDisjointPoolParamsCreate(&hDisjointParams); if (ret != UMF_RESULT_SUCCESS) { @@ -132,20 +132,15 @@ int main() { std::cout << "disjoint_pool mt_alloc_free: "; mt_alloc_free(poolCreateExtParams{umfDisjointPoolOps(), hDisjointParams, umfOsMemoryProviderOps(), osParams}); -#else - std::cout << "skipping disjoint_pool mt_alloc_free" << std::endl; -#endif // ctest looks for "PASSED" in the output std::cout << "PASSED" << std::endl; -#if defined(UMF_POOL_DISJOINT_ENABLED) ret = umfDisjointPoolParamsDestroy(hDisjointParams); if (ret != UMF_RESULT_SUCCESS) { std::cerr << "disjoint pool params destroy failed" << std::endl; return -1; } -#endif return 0; } diff --git a/benchmark/ubench.c b/benchmark/ubench.c index 3892740e8..5beaa62be 100644 --- a/benchmark/ubench.c +++ b/benchmark/ubench.c @@ -15,23 +15,19 @@ #include #include +#include #include #include #include #include -#ifdef UMF_POOL_DISJOINT_ENABLED -#include -#endif - #ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif #include "utils_common.h" -#if (defined UMF_POOL_DISJOINT_ENABLED && \ - defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) +#if (defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) #include "utils_level_zero.h" #endif @@ -244,7 +240,6 @@ UBENCH_EX(simple, proxy_pool_with_os_memory_provider) { free(array); } -#if (defined UMF_POOL_DISJOINT_ENABLED) ////////////////// DISJOINT POOL WITH OS MEMORY PROVIDER UBENCH_EX(simple, disjoint_pool_with_os_memory_provider) { @@ -327,7 +322,6 @@ UBENCH_EX(simple, disjoint_pool_with_os_memory_provider) { umfMemoryProviderDestroy(os_memory_provider); free(array); } -#endif /* (defined UMF_POOL_DISJOINT_ENABLED) */ #if (defined UMF_POOL_JEMALLOC_ENABLED) ////////////////// JEMALLOC POOL WITH OS MEMORY PROVIDER @@ -421,8 +415,7 @@ UBENCH_EX(simple, scalable_pool_with_os_memory_provider) { } #endif /* (defined UMF_POOL_SCALABLE_ENABLED) */ -#if (defined UMF_POOL_DISJOINT_ENABLED && \ - defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) +#if (defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) static void do_ipc_get_put_benchmark(alloc_t *allocs, size_t num_allocs, size_t repeats, umf_ipc_handle_t *ipc_handles) { @@ -630,7 +623,7 @@ UBENCH_EX(ipc, disjoint_pool_with_level_zero_provider) { err_destroy_context: utils_ze_destroy_context(context); } -#endif /* (defined UMF_POLL_DISJOINT_ENABLED && defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */ +#endif /* (defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */ // TODO add IPC benchmark for CUDA diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 89f80ee2d..a26b8915e 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -41,16 +41,14 @@ if(UMF_POOL_SCALABLE_ENABLED) endif() endif() -if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) +if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLE_NAME umf_example_level_zero_shared_memory) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS level_zero_shared_memory/level_zero_shared_memory.c common/examples_level_zero_helpers.c - LIBS disjoint_pool ze_loader umf) + LIBS ze_loader umf) target_include_directories( ${EXAMPLE_NAME} @@ -84,12 +82,11 @@ if(UMF_BUILD_GPU_EXAMPLES endif() else() message(STATUS "GPU Level Zero shared memory example requires " - "UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and " - "UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping") + "UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER " + "to be turned ON - skipping") endif() if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_CUDA_PROVIDER AND UMF_CUDA_ENABLED) set(EXAMPLE_NAME umf_example_cuda_shared_memory) @@ -97,7 +94,7 @@ if(UMF_BUILD_GPU_EXAMPLES add_umf_executable( NAME ${EXAMPLE_NAME} SRCS cuda_shared_memory/cuda_shared_memory.c - LIBS disjoint_pool cuda umf) + LIBS cuda umf) target_include_directories( ${EXAMPLE_NAME} @@ -123,14 +120,13 @@ if(UMF_BUILD_GPU_EXAMPLES else() message( STATUS - "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_CUDA_PROVIDER, UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON and installed CUDA libraries - skipping" + "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_CUDA_PROVIDER to be turned ON and installed CUDA libraries - skipping" ) endif() # TODO: it looks like there is some problem with IPC implementation in Level # Zero on windows if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_LEVEL_ZERO_PROVIDER AND LINUX) set(EXAMPLE_NAME umf_example_ipc_level_zero) @@ -139,7 +135,7 @@ if(UMF_BUILD_GPU_EXAMPLES NAME ${EXAMPLE_NAME} SRCS ipc_level_zero/ipc_level_zero.c common/examples_level_zero_helpers.c - LIBS disjoint_pool ze_loader umf) + LIBS ze_loader umf) target_include_directories( ${EXAMPLE_NAME} @@ -174,7 +170,7 @@ if(UMF_BUILD_GPU_EXAMPLES else() message( STATUS - "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" + "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping" ) endif() diff --git a/examples/README.md b/examples/README.md index e7823347e..70d114a63 100644 --- a/examples/README.md +++ b/examples/README.md @@ -24,7 +24,7 @@ cleans up and exits with an error status. ### Requirements * Level Zero headers and libraries * compatible GPU with installed driver -* set UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LIBUMF_POOL_DISJOINT and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON +* set UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON ## IPC example with Level Zero memory provider This example demonstrates how to use UMF IPC API. The example creates two @@ -35,7 +35,7 @@ and build this example Level Zero development package should be installed. ### Requirements * Level Zero headers and libraries * compatible GPU with installed driver -* set UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LIBUMF_POOL_DISJOINT and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON +* set UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON ## IPC example with shared memory This example also demonstrates how to use UMF IPC API. The example creates two diff --git a/examples/cuda_shared_memory/CMakeLists.txt b/examples/cuda_shared_memory/CMakeLists.txt index dd8567c14..0e57ec607 100644 --- a/examples/cuda_shared_memory/CMakeLists.txt +++ b/examples/cuda_shared_memory/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -55,9 +55,8 @@ target_link_directories( ${LIBHWLOC_LIBRARY_DIRS} ${CUDA_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries( - ${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a ${CUDA_LIBRARIES} - ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ${CUDA_LIBRARIES} + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( diff --git a/examples/ipc_level_zero/CMakeLists.txt b/examples/ipc_level_zero/CMakeLists.txt index 273a88bb0..d672d3e92 100644 --- a/examples/ipc_level_zero/CMakeLists.txt +++ b/examples/ipc_level_zero/CMakeLists.txt @@ -53,8 +53,8 @@ target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a - ze_loader ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ze_loader + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( diff --git a/examples/level_zero_shared_memory/CMakeLists.txt b/examples/level_zero_shared_memory/CMakeLists.txt index d05072ca2..f4aaf09e9 100644 --- a/examples/level_zero_shared_memory/CMakeLists.txt +++ b/examples/level_zero_shared_memory/CMakeLists.txt @@ -53,8 +53,8 @@ target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a - ze_loader ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ze_loader + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( diff --git a/scripts/qemu/run-build.sh b/scripts/qemu/run-build.sh index c6314153c..724e6d7ff 100755 --- a/scripts/qemu/run-build.sh +++ b/scripts/qemu/run-build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -26,7 +26,6 @@ cmake .. \ -DUMF_BUILD_CUDA_PROVIDER=ON \ -DUMF_FORMAT_CODE_STYLE=OFF \ -DUMF_DEVELOPER_MODE=ON \ - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON \ -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON \ -DUMF_BUILD_EXAMPLES=ON \ -DUMF_USE_COVERAGE=${COVERAGE} \ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c0072be7e..49fa2c5d8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -85,8 +85,9 @@ set(UMF_SOURCES provider/provider_tracking.c critnib/critnib.c ravl/ravl.c - pool/pool_proxy.c + pool/pool_disjoint.c pool/pool_jemalloc.c + pool/pool_proxy.c pool/pool_scalable.c) if(UMF_POOL_JEMALLOC_ENABLED) diff --git a/src/base_alloc/base_alloc_global.c b/src/base_alloc/base_alloc_global.c index f709eab9d..f3b61566a 100644 --- a/src/base_alloc/base_alloc_global.c +++ b/src/base_alloc/base_alloc_global.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -208,7 +208,8 @@ void umf_ba_global_free(void *ptr) { } if (ba_is_destroyed) { - LOG_WARN("base_alloc: calling free after the base alloc is destroyed"); + LOG_WARN( + "base_alloc: calling free() after the base allocator is destroyed"); return; } diff --git a/src/libumf.def b/src/libumf.def index 98226dace..ce8820a8f 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -119,6 +119,18 @@ EXPORTS umfScalablePoolParamsSetKeepAllMemory ; Added in UMF_0.11 umfCUDAMemoryProviderParamsSetAllocFlags + umfDisjointPoolOps + umfDisjointPoolParamsCreate + umfDisjointPoolParamsDestroy + umfDisjointPoolParamsSetCapacity + umfDisjointPoolParamsSetMaxPoolableSize + umfDisjointPoolParamsSetMinBucketSize + umfDisjointPoolParamsSetName + umfDisjointPoolParamsSetSharedLimits + umfDisjointPoolParamsSetSlabMinSize + umfDisjointPoolParamsSetTrace + umfDisjointPoolSharedLimitsCreate + umfDisjointPoolSharedLimitsDestroy umfFixedMemoryProviderOps umfFixedMemoryProviderParamsCreate umfFixedMemoryProviderParamsDestroy diff --git a/src/libumf.map b/src/libumf.map index bbf664dcf..6582fd0f8 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -117,6 +117,18 @@ UMF_0.10 { UMF_0.11 { umfCUDAMemoryProviderParamsSetAllocFlags; + umfDisjointPoolOps; + umfDisjointPoolParamsCreate; + umfDisjointPoolParamsDestroy; + umfDisjointPoolParamsSetCapacity; + umfDisjointPoolParamsSetMaxPoolableSize; + umfDisjointPoolParamsSetMinBucketSize; + umfDisjointPoolParamsSetName; + umfDisjointPoolParamsSetSharedLimits; + umfDisjointPoolParamsSetSlabMinSize; + umfDisjointPoolParamsSetTrace; + umfDisjointPoolSharedLimitsCreate; + umfDisjointPoolSharedLimitsDestroy; umfFixedMemoryProviderOps; umfFixedMemoryProviderParamsCreate; umfFixedMemoryProviderParamsDestroy; diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt index f54e70185..22aeab783 100644 --- a/src/pool/CMakeLists.txt +++ b/src/pool/CMakeLists.txt @@ -8,33 +8,3 @@ if(UMF_BUILD_SHARED_LIBRARY) endif() set(POOL_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS}) - -# libumf_pool_disjoint -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - add_umf_library( - NAME disjoint_pool - TYPE STATIC - SRCS pool_disjoint.cpp ${POOL_EXTRA_SRCS} - LIBS ${POOL_EXTRA_LIBS}) - - target_compile_definitions(disjoint_pool - PRIVATE ${POOL_COMPILE_DEFINITIONS}) - - if(WINDOWS) - target_compile_options(disjoint_pool PRIVATE /DWIN32_LEAN_AND_MEAN - /DNOMINMAX) - endif() - - add_library(${PROJECT_NAME}::disjoint_pool ALIAS disjoint_pool) - - add_dependencies(disjoint_pool umf) - - target_link_libraries(disjoint_pool PRIVATE umf) - - target_include_directories( - disjoint_pool - PUBLIC $ - $) - - install(TARGETS disjoint_pool EXPORT ${PROJECT_NAME}-targets) -endif() diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c new file mode 100644 index 000000000..e2288e49e --- /dev/null +++ b/src/pool/pool_disjoint.c @@ -0,0 +1,1123 @@ +/* + * Copyright (C) 2022-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include "pool_disjoint_internal.h" + +// Temporary solution for disabling memory poisoning. This is needed because +// AddressSanitizer does not support memory poisoning for GPU allocations. +// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634 +#ifndef POISON_MEMORY +#undef __SANITIZE_ADDRESS__ +#endif +#include "utils_sanitizers.h" + +// Forward declarations +static slab_t *create_slab(bucket_t *bucket, bool full_size); +static void destroy_slab(slab_t *slab); + +static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool); +static bool bucket_can_pool(bucket_t *bucket); +static void bucket_decrement_pool(bucket_t *bucket, bool *from_pool); +static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, + bool *from_pool); +static slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, + bool *from_pool); + +static __TLS umf_result_t TLS_last_allocation_error; + +// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is +// requested. The implementation distinguishes between allocations of size +// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger. +// Allocation requests smaller than ChunkCutoff use chunks taken from a single +// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation +// size, and 8-byte allocations, only 1 in ~8000 requests results in a new +// coarse-grain allocation. Freeing results only in a chunk of a larger +// allocation to be marked as available and no real return to the system. An +// allocation is returned to the system only when all chunks in the larger +// allocation are freed by the program. Allocations larger than ChunkCutOff use +// a separate coarse-grain allocation for each request. These are subject to +// "pooling". That is, when such an allocation is freed by the program it is +// retained in a pool. The pool is available for future allocations, which means +// there are fewer actual coarse-grain allocations/deallocations. + +// The largest size which is allocated via the allocator. +// Allocations with size > CutOff bypass the pool and +// go directly to the provider. +static size_t CutOff = (size_t)1 << 31; // 2GB + +static size_t bucket_slab_min_size(bucket_t *bucket) { + return bucket->pool->params.slab_min_size; +} + +static size_t bucket_slab_alloc_size(bucket_t *bucket) { + return utils_max(bucket->size, bucket_slab_min_size(bucket)); +} + +static slab_t *create_slab(bucket_t *bucket, bool full_size) { + assert(bucket); + + umf_result_t res = UMF_RESULT_SUCCESS; + umf_memory_provider_handle_t provider = bucket->pool->provider; + + slab_t *slab = umf_ba_global_alloc(sizeof(*slab)); + if (slab == NULL) { + LOG_ERR("allocation of new slab failed!"); + return NULL; + } + + slab->num_chunks_allocated = 0; + slab->first_free_chunk_idx = 0; + slab->bucket = bucket; + + slab->iter = umf_ba_global_alloc(sizeof(*slab->iter)); + if (slab->iter == NULL) { + LOG_ERR("allocation of new slab iter failed!"); + goto free_slab; + } + slab->iter->val = slab; + slab->iter->prev = slab->iter->next = NULL; + + if (full_size) { + slab->num_chunks_total = 0; + slab->chunks = NULL; + } else { + slab->num_chunks_total = bucket_slab_min_size(bucket) / bucket->size; + slab->chunks = + umf_ba_global_alloc(sizeof(bool) * slab->num_chunks_total); + if (slab->chunks == NULL) { + LOG_ERR("allocation of slab chunks failed!"); + goto free_slab_iter; + } + memset(slab->chunks, 0, sizeof(bool) * slab->num_chunks_total); + } + // if slab_min_size is not a multiple of bucket size, we would have some + // padding at the end of the slab + slab->slab_size = bucket_slab_alloc_size(bucket); + + // TODO not true + // NOTE: originally slabs memory were allocated without alignment + // with this registering a slab is simpler and doesn't require multimap + res = umfMemoryProviderAlloc(provider, slab->slab_size, 0, &slab->mem_ptr); + if (res != UMF_RESULT_SUCCESS) { + LOG_ERR("allocation of slab data failed!"); + goto free_slab_chunks; + } + + // TODO + // ASSERT_IS_ALIGNED((uintptr_t)slab->mem_ptr, bucket->size); + + // raw allocation is not available for user so mark it as inaccessible + utils_annotate_memory_inaccessible(slab->mem_ptr, slab->slab_size); + + LOG_DEBUG("bucket: %p, slab_size: %zu", (void *)bucket, slab->slab_size); + return slab; + +free_slab_chunks: + umf_ba_global_free(slab->chunks); + +free_slab_iter: + umf_ba_global_free(slab->iter); + +free_slab: + umf_ba_global_free(slab); + return NULL; +} + +static void destroy_slab(slab_t *slab) { + LOG_DEBUG("bucket: %p, slab_size: %zu", (void *)slab->bucket, + slab->slab_size); + + umf_memory_provider_handle_t provider = slab->bucket->pool->provider; + umf_result_t res = + umfMemoryProviderFree(provider, slab->mem_ptr, slab->slab_size); + if (res != UMF_RESULT_SUCCESS) { + LOG_ERR("deallocation of slab data failed!"); + } + + umf_ba_global_free(slab->chunks); + umf_ba_global_free(slab->iter); + umf_ba_global_free(slab); +} + +// return the index of the first available chunk, SIZE_MAX otherwise +static size_t slab_find_first_available_chunk_idx(const slab_t *slab) { + // use the first free chunk index as a hint for the search + for (bool *chunk = slab->chunks + slab->first_free_chunk_idx; + chunk != slab->chunks + slab->num_chunks_total; chunk++) { + + // false means not used + if (*chunk == false) { + size_t idx = chunk - slab->chunks; + LOG_DEBUG("idx: %zu", idx); + return idx; + } + } + + LOG_DEBUG("idx: SIZE_MAX"); + return SIZE_MAX; +} + +static void *slab_get_chunk(slab_t *slab) { + // slab has to be allocated in chunk mode + assert(slab->chunks && slab->num_chunks_total > 0); + + // free chunk must exist, otherwise we would have allocated another slab + const size_t chunk_idx = slab_find_first_available_chunk_idx(slab); + assert(chunk_idx != SIZE_MAX); + + void *free_chunk = + (void *)((uintptr_t)slab->mem_ptr + chunk_idx * slab->bucket->size); + + // mark chunk as used + slab->chunks[chunk_idx] = true; + slab->num_chunks_allocated += 1; + + // use the found index as the next hint + slab->first_free_chunk_idx = chunk_idx + 1; + + return free_chunk; +} + +static void *slab_get(const slab_t *slab) { return slab->mem_ptr; } +static void *slab_get_end(const slab_t *slab) { + return (void *)((uintptr_t)slab->mem_ptr + + bucket_slab_min_size(slab->bucket)); +} + +static void slab_free_chunk(slab_t *slab, void *ptr) { + // This method should be called through bucket (since we might remove the + // slab as a result), therefore all locks are done on bucket level. + + // Make sure that we're in the right slab + assert(ptr >= slab_get(slab) && ptr < slab_get_end(slab)); + + // Even if the pointer p was previously aligned, it's still inside the + // corresponding chunk, so we get the correct index here. + size_t chunk_idx = + ((uintptr_t)ptr - (uintptr_t)slab->mem_ptr) / slab->bucket->size; + + // Make sure that the chunk was allocated + assert(slab->chunks[chunk_idx] && "double free detected"); + slab->chunks[chunk_idx] = false; + slab->num_chunks_allocated -= 1; + + if (chunk_idx < slab->first_free_chunk_idx) { + slab->first_free_chunk_idx = chunk_idx; + } + + LOG_DEBUG("chunk_idx: %zu, num_chunks_allocated: %zu, " + "first_free_chunk_idx: %zu", + chunk_idx, slab->num_chunks_allocated, + slab->first_free_chunk_idx); +} + +static bool slab_has_avail(const slab_t *slab) { + return slab->num_chunks_allocated < slab->num_chunks_total; +} + +static umf_result_t slab_reg(slab_t *slab) { + bucket_t *bucket = slab->bucket; + disjoint_pool_t *pool = bucket->pool; + critnib *slabs = pool->known_slabs; + + // NOTE: changed vs original DisjointPool implementation - currently slab + // is already aligned to bucket size. + void *slab_addr = slab_get(slab); + // TODO ASSERT_IS_ALIGNED((uintptr_t)slab_addr, bucket->size); + LOG_DEBUG("slab: %p, start: %p", (void *)slab, slab_addr); + + // NOTE: we don't need to lock the slabs map as the critnib already has a + // lock inside it + int ret = critnib_insert(slabs, (uintptr_t)slab_addr, slab, 0); + umf_result_t res = UMF_RESULT_SUCCESS; + if (ret == ENOMEM) { + LOG_ERR("register failed because of out of memory!"); + res = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } else if (ret == EEXIST) { + LOG_ERR("register failed because the address is already registered!"); + res = UMF_RESULT_ERROR_UNKNOWN; + } + + return res; +} + +static umf_result_t slab_unreg(slab_t *slab) { + bucket_t *bucket = slab->bucket; + disjoint_pool_t *pool = bucket->pool; + critnib *slabs = pool->known_slabs; + + void *slab_addr = slab_get(slab); + // TODO ASSERT_IS_ALIGNED((uintptr_t)slab_addr, bucket->size); + LOG_DEBUG("slab: %p, start: %p", (void *)slab, slab_addr); + + critnib_remove(slabs, (uintptr_t)slab_addr); + + return UMF_RESULT_SUCCESS; +} + +static bucket_t * +create_bucket(size_t sz, disjoint_pool_t *pool, + umf_disjoint_pool_shared_limits_handle_t shared_limits) { + + bucket_t *bucket = umf_ba_global_alloc(sizeof(*bucket)); + if (bucket == NULL) { + LOG_ERR("allocation of new bucket failed!"); + return NULL; + } + + memset(bucket, 0, sizeof(*bucket)); + bucket->size = sz; + bucket->pool = pool; + bucket->shared_limits = shared_limits; + + utils_mutex_init(&bucket->bucket_lock); + return bucket; +} + +static void destroy_bucket(bucket_t *bucket) { + // use an extra tmp to store the next iterator before destroying the slab + slab_list_item_t *it = NULL, *tmp = NULL; + LL_FOREACH_SAFE(bucket->available_slabs, it, tmp) { + LL_DELETE(bucket->available_slabs, it); + destroy_slab(it->val); + } + + LL_FOREACH_SAFE(bucket->unavailable_slabs, it, tmp) { + LL_DELETE(bucket->unavailable_slabs, it); + destroy_slab(it->val); + } + + utils_mutex_destroy_not_free(&bucket->bucket_lock); + umf_ba_global_free(bucket); +} + +static size_t slab_get_num_free_chunks(const slab_t *slab) { + return slab->num_chunks_total - slab->num_chunks_allocated; +} + +// NOTE: this function must be called under bucket->bucket_lock +static void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, + bool *to_pool) { + slab_free_chunk(slab, ptr); + + // in case if the slab was previously full and now has single available + // chunk, it should be moved to the list of available slabs + if (slab_get_num_free_chunks(slab) == 1) { + slab_list_item_t *slab_it = slab->iter; + assert(slab_it->val != NULL); + DL_DELETE(bucket->unavailable_slabs, slab_it); + DL_PREPEND(bucket->available_slabs, slab_it); + bucket->available_slabs_num++; + } + + // check if slab is empty, and pool it if we can + if (slab->num_chunks_allocated == 0) { + // The slab is now empty. + // If the pool has capacity then put the slab in the pool. + // The to_pool parameter indicates whether the slab will be put in the + // pool or freed. + *to_pool = bucket_can_pool(bucket); + if (*to_pool == false) { + // remove slab + slab_list_item_t *slab_it = slab->iter; + assert(slab_it->val != NULL); + slab_unreg(slab_it->val); + DL_DELETE(bucket->available_slabs, slab_it); + bucket->available_slabs_num--; + destroy_slab(slab_it->val); + } + } else { + // return this chunk to the pool + *to_pool = true; + } +} + +// NOTE: this function must be called under bucket->bucket_lock +static void *bucket_get_free_chunk(bucket_t *bucket, bool *from_pool) { + slab_list_item_t *slab_it = bucket_get_avail_slab(bucket, from_pool); + if (slab_it == NULL) { + return NULL; + } + + void *free_chunk = slab_get_chunk(slab_it->val); + + // if we allocated last free chunk from the slab and now it is full, move + // it to unavailable slabs and update its iterator + if (!(slab_has_avail(slab_it->val))) { + DL_DELETE(bucket->available_slabs, slab_it); + bucket->available_slabs_num--; + slab_it->prev = NULL; + DL_PREPEND(bucket->unavailable_slabs, slab_it); + } + + return free_chunk; +} + +static size_t bucket_chunk_cut_off(bucket_t *bucket) { + return bucket_slab_min_size(bucket) / 2; +} + +static slab_t *bucket_create_slab(bucket_t *bucket, bool full_size) { + slab_t *slab = create_slab(bucket, full_size); + if (slab == NULL) { + LOG_ERR("create_slab failed!") + return NULL; + } + + umf_result_t res = slab_reg(slab); + if (res != UMF_RESULT_SUCCESS) { + LOG_ERR("slab_reg failed!") + destroy_slab(slab); + return NULL; + } + + DL_PREPEND(bucket->available_slabs, slab->iter); + bucket->available_slabs_num++; + bucket_update_stats(bucket, 1, 0); + + return slab; +} + +static slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, + bool *from_pool) { + // return a slab that will be used for a single allocation + if (bucket->available_slabs == NULL) { + bucket_create_slab(bucket, true /* full size */); + *from_pool = false; + } else { + bucket_decrement_pool(bucket, from_pool); + } + + return bucket->available_slabs; +} + +// NOTE: this function must be called under bucket->bucket_lock +static void *bucket_get_free_slab(bucket_t *bucket, bool *from_pool) { + slab_list_item_t *slab_it = bucket_get_avail_full_slab(bucket, from_pool); + if (slab_it == NULL) { + return NULL; + } + + slab_t *slab = slab_it->val; + void *ptr = slab_get(slab); + + DL_DELETE(bucket->available_slabs, slab_it); + bucket->available_slabs_num--; + slab_it->prev = NULL; + DL_PREPEND(bucket->unavailable_slabs, slab_it); + + return ptr; +} + +// NOTE: this function must be called under bucket->bucket_lock +static void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool) { + slab_list_item_t *slab_it = slab->iter; + assert(slab_it->val != NULL); + *to_pool = bucket_can_pool(bucket); + if (*to_pool) { + DL_DELETE(bucket->unavailable_slabs, slab_it); + slab_it->prev = NULL; + DL_PREPEND(bucket->available_slabs, slab_it); + bucket->available_slabs_num++; + } else { + slab_unreg(slab_it->val); + DL_DELETE(bucket->unavailable_slabs, slab_it); + destroy_slab(slab_it->val); + } +} + +static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, + bool *from_pool) { + if (bucket->available_slabs == NULL) { + bucket_create_slab(bucket, false /* chunked */); + *from_pool = false; + } else { + slab_t *slab = bucket->available_slabs->val; + if (slab->num_chunks_allocated == 0) { + // If this was an empty slab, it was in the pool. + // Now it is no longer in the pool, so update count. + --bucket->chunked_slabs_in_pool; + bucket_decrement_pool(bucket, from_pool); + } else { + // Allocation from existing slab is treated as from pool for statistics. + *from_pool = true; + } + } + + return bucket->available_slabs; +} + +static size_t bucket_capacity(bucket_t *bucket) { + // For buckets used in chunked mode, just one slab in pool is sufficient. + // For larger buckets, the capacity could be more and is adjustable. + if (bucket->size <= bucket_chunk_cut_off(bucket)) { + return 1; + } else { + return bucket->pool->params.capacity; + } +} + +static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) { + if (bucket->pool->params.pool_trace == 0) { + return; + } + + bucket->curr_slabs_in_use += in_use; + bucket->max_slabs_in_use = + utils_max(bucket->curr_slabs_in_use, bucket->max_slabs_in_use); + + bucket->curr_slabs_in_pool += in_pool; + bucket->max_slabs_in_pool = + utils_max(bucket->curr_slabs_in_pool, bucket->max_slabs_in_pool); + + // Increment or decrement current pool sizes based on whether + // slab was added to or removed from pool. + bucket->pool->params.cur_pool_size += + in_pool * bucket_slab_alloc_size(bucket); +} + +static void bucket_decrement_pool(bucket_t *bucket, bool *from_pool) { + // If a slab was available in the pool then note that the current pooled + // size has reduced by the size of a slab in this bucket. + *from_pool = true; + bucket_update_stats(bucket, 1, -1); + utils_fetch_and_add64(&bucket->shared_limits->total_size, + -(long long)bucket_slab_alloc_size(bucket)); +} + +static bool bucket_can_pool(bucket_t *bucket) { + size_t new_free_slabs_in_bucket; + + // check if this bucket is used in chunked form or as full slabs + bool chunked_bucket = bucket->size <= bucket_chunk_cut_off(bucket); + if (chunked_bucket) { + new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1; + } else { + new_free_slabs_in_bucket = bucket->available_slabs_num + 1; + } + + // we keep at most params.capacity slabs in the pool + if (bucket_capacity(bucket) >= new_free_slabs_in_bucket) { + size_t pool_size = 0; + utils_atomic_load_acquire(&bucket->shared_limits->total_size, + &pool_size); + while (true) { + size_t new_pool_size = pool_size + bucket_slab_alloc_size(bucket); + + if (bucket->shared_limits->max_size < new_pool_size) { + break; + } + + if (utils_compare_exchange(&bucket->shared_limits->total_size, + &pool_size, &new_pool_size)) { + if (chunked_bucket) { + ++bucket->chunked_slabs_in_pool; + } + + bucket_update_stats(bucket, -1, 1); + return true; + } + } + } + + bucket_update_stats(bucket, -1, 0); + return false; +} + +static size_t size_to_idx(disjoint_pool_t *pool, size_t size) { + assert(size <= CutOff && "Unexpected size"); + assert(size > 0 && "Unexpected size"); + + size_t min_bucket_size = (size_t)1 << pool->min_bucket_size_exp; + if (size < min_bucket_size) { + return 0; + } + + // get the position of the leftmost set bit + size_t position = getLeftmostSetBitPos(size); + + bool is_power_of_2 = 0 == (size & (size - 1)); + bool larger_than_halfway_between_powers_of_2 = + !is_power_of_2 && + (bool)((size - 1) & ((uint64_t)(1) << (position - 1))); + size_t index = (position - pool->min_bucket_size_exp) * 2 + + (int)(!is_power_of_2) + + (int)larger_than_halfway_between_powers_of_2; + + return index; +} + +static umf_disjoint_pool_shared_limits_t * +disjoint_pool_get_limits(disjoint_pool_t *pool) { + if (pool->params.shared_limits) { + return pool->params.shared_limits; + } else { + return pool->default_shared_limits; + } +} + +static bucket_t *disjoint_pool_find_bucket(disjoint_pool_t *pool, size_t size) { + size_t calculated_idx = size_to_idx(pool, size); + return pool->buckets[calculated_idx]; +} + +static void disjoint_pool_print_stats(disjoint_pool_t *pool) { + size_t high_bucket_size = 0; + size_t high_peak_slabs_in_use = 0; + const char *name = pool->params.name; + + LOG_DEBUG("\"%s\" pool memory statistics", name); + LOG_DEBUG("%14s %12s %12s %18s %20s %21s", "Bucket Size", "Allocs", "Frees", + "Allocs from Pool", "Peak Slabs in Use", "Peak Slabs in Pool"); + + for (size_t i = 0; i < pool->buckets_num; i++) { + bucket_t *bucket = pool->buckets[i]; + if (bucket->alloc_count) { + LOG_DEBUG("%14zu %12zu %12zu %18zu %20zu %21zu", bucket->size, + bucket->alloc_count, bucket->free_count, + bucket->alloc_pool_count, bucket->max_slabs_in_use, + bucket->max_slabs_in_pool); + high_bucket_size = + utils_max(bucket_slab_alloc_size(bucket), high_bucket_size); + } + high_peak_slabs_in_use = + utils_max(bucket->max_slabs_in_use, high_peak_slabs_in_use); + } + + LOG_DEBUG("current pool size: %zu", + disjoint_pool_get_limits(pool)->total_size); + LOG_DEBUG("suggested setting=;%c%s:%zu,%zu,64K", (char)tolower(name[0]), + (name + 1), high_bucket_size, high_peak_slabs_in_use); +} + +static void *disjoint_pool_allocate(disjoint_pool_t *pool, size_t size) { + if (size == 0) { + return NULL; + } + + void *ptr = NULL; + + if (size > pool->params.max_poolable_size) { + umf_result_t ret = + umfMemoryProviderAlloc(pool->provider, size, 0, &ptr); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("allocation from the memory provider failed"); + return NULL; + } + + utils_annotate_memory_undefined(ptr, size); + return ptr; + } + + bucket_t *bucket = disjoint_pool_find_bucket(pool, size); + + utils_mutex_lock(&bucket->bucket_lock); + + bool from_pool = false; + if (size > bucket_chunk_cut_off(bucket)) { + ptr = bucket_get_free_slab(bucket, &from_pool); + } else { + ptr = bucket_get_free_chunk(bucket, &from_pool); + } + + if (ptr == NULL) { + TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + utils_mutex_unlock(&bucket->bucket_lock); + return NULL; + } + + if (pool->params.pool_trace > 1) { + // update stats + ++bucket->alloc_count; + if (from_pool) { + ++bucket->alloc_pool_count; + } + } + + utils_mutex_unlock(&bucket->bucket_lock); + + if (pool->params.pool_trace > 2) { + LOG_DEBUG("Allocated %8zu %s bytes from %s -> %p", size, + pool->params.name, (from_pool ? "pool" : "provider"), ptr); + } + + VALGRIND_DO_MEMPOOL_ALLOC(pool, ptr, size); + utils_annotate_memory_undefined(ptr, bucket->size); + return ptr; +} + +umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, + void *params, void **ppPool) { + // TODO set defaults when user pass the NULL as params + if (!provider || !params || !ppPool) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + disjoint_pool_t *disjoint_pool = + umf_ba_global_alloc(sizeof(*disjoint_pool)); + if (!disjoint_pool) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + umf_disjoint_pool_params_t *dp_params = + (umf_disjoint_pool_params_t *)params; + + // min_bucket_size parameter must be a power of 2 for bucket sizes + // to generate correctly. + if (!dp_params->min_bucket_size || + !IS_POWER_OF_2(dp_params->min_bucket_size)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + VALGRIND_DO_CREATE_MEMPOOL(disjoint_pool, 0, 0); + + disjoint_pool->provider = provider; + disjoint_pool->params = *dp_params; + + disjoint_pool->known_slabs = critnib_new(); + + // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. + // Powers of 2 and the value halfway between the powers of 2. + size_t Size1 = disjoint_pool->params.min_bucket_size; + + // min_bucket_size cannot be larger than CutOff. + Size1 = utils_min(Size1, CutOff); + + // Buckets sized smaller than the bucket default size- 8 aren't needed. + Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE); + + // Calculate the exponent for min_bucket_size used for finding buckets. + disjoint_pool->min_bucket_size_exp = (size_t)log2Utils(Size1); + disjoint_pool->default_shared_limits = + umfDisjointPoolSharedLimitsCreate(SIZE_MAX); + + // count number of buckets, start from 1 + disjoint_pool->buckets_num = 1; + size_t Size2 = Size1 + Size1 / 2; + size_t ts2 = Size2, ts1 = Size1; + for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { + disjoint_pool->buckets_num += 2; + } + disjoint_pool->buckets = + umf_ba_global_alloc(sizeof(bucket_t *) * disjoint_pool->buckets_num); + + int i = 0; + Size1 = ts1; + Size2 = ts2; + for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2, i += 2) { + disjoint_pool->buckets[i] = create_bucket( + Size1, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + disjoint_pool->buckets[i + 1] = create_bucket( + Size2, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + } + disjoint_pool->buckets[i] = create_bucket( + CutOff, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + + umf_result_t ret = umfMemoryProviderGetMinPageSize( + provider, NULL, &disjoint_pool->provider_min_page_size); + if (ret != UMF_RESULT_SUCCESS) { + disjoint_pool->provider_min_page_size = 0; + } + + *ppPool = (void *)disjoint_pool; + + return UMF_RESULT_SUCCESS; +} + +void *disjoint_pool_malloc(void *pool, size_t size) { + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + void *ptr = disjoint_pool_allocate(hPool, size); + + return ptr; +} + +void *disjoint_pool_calloc(void *pool, size_t num, size_t size) { + (void)pool; + (void)num; + (void)size; + + // Not supported + TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED; + return NULL; +} + +void *disjoint_pool_realloc(void *pool, void *ptr, size_t size) { + (void)pool; + (void)ptr; + (void)size; + + // Not supported + TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED; + return NULL; +} + +void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) { + disjoint_pool_t *disjoint_pool = (disjoint_pool_t *)pool; + + void *ptr = NULL; + + if (size == 0) { + return NULL; + } + + if (alignment <= 1) { + return disjoint_pool_allocate(pool, size); + } + + size_t aligned_size; + if (alignment <= disjoint_pool->provider_min_page_size) { + // This allocation will be served from a Bucket which size is multiple + // of Alignment and Slab address is aligned to provider_min_page_size + // so the address will be properly aligned. + aligned_size = (size > 1) ? ALIGN_UP_SAFE(size, alignment) : alignment; + } else { + // Slabs are only aligned to provider_min_page_size, we need to compensate + // for that in case the allocation is within pooling limit. + // TODO: consider creating properly-aligned Slabs on demand + aligned_size = size + alignment - 1; + } + + // Check if requested allocation size is within pooling limit. + // If not, just request aligned pointer from the system. + if (aligned_size > disjoint_pool->params.max_poolable_size) { + + umf_result_t ret = umfMemoryProviderAlloc(disjoint_pool->provider, size, + alignment, &ptr); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("allocation from the memory provider failed"); + return NULL; + } + + assert(ptr); + utils_annotate_memory_undefined(ptr, size); + return ptr; + } + + bool from_pool = false; + bucket_t *bucket = disjoint_pool_find_bucket(pool, aligned_size); + + utils_mutex_lock(&bucket->bucket_lock); + + if (aligned_size > bucket_chunk_cut_off(bucket)) { + ptr = bucket_get_free_slab(bucket, &from_pool); + } else { + ptr = bucket_get_free_chunk(bucket, &from_pool); + } + + if (ptr == NULL) { + TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + utils_mutex_unlock(&bucket->bucket_lock); + return NULL; + } + + if (disjoint_pool->params.pool_trace > 1) { + // update stats + ++bucket->alloc_count; + if (from_pool) { + ++bucket->alloc_pool_count; + } + } + + utils_mutex_unlock(&bucket->bucket_lock); + + if (disjoint_pool->params.pool_trace > 2) { + LOG_DEBUG("Allocated %8zu %s bytes aligned at %zu from %s -> %p", size, + disjoint_pool->params.name, alignment, + (from_pool ? "pool" : "provider"), ptr); + } + + void *aligned_ptr = (void *)ALIGN_UP_SAFE((size_t)ptr, alignment); + VALGRIND_DO_MEMPOOL_ALLOC(disjoint_pool, aligned_ptr, size); + utils_annotate_memory_undefined(aligned_ptr, size); + return aligned_ptr; +} + +size_t disjoint_pool_malloc_usable_size(void *pool, void *ptr) { + (void)pool; + (void)ptr; + + // Not supported + return 0; +} + +umf_result_t disjoint_pool_free(void *pool, void *ptr) { + disjoint_pool_t *disjoint_pool = (disjoint_pool_t *)pool; + if (ptr == NULL) { + return UMF_RESULT_SUCCESS; + } + + // check if given pointer is allocated inside any Disjoint Pool slab + slab_t *slab = + (slab_t *)critnib_find_le(disjoint_pool->known_slabs, (uintptr_t)ptr); + + if (slab == NULL || ptr >= slab_get_end(slab)) { + + // regular free + umf_alloc_info_t allocInfo = {NULL, 0, NULL}; + umf_result_t ret = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("failed to get allocation info from the memory tracker"); + return ret; + } + + size_t size = allocInfo.baseSize; + umf_memory_provider_handle_t provider = disjoint_pool->provider; + ret = umfMemoryProviderFree(provider, ptr, size); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("deallocation from the memory provider failed"); + } + + return ret; + } + + bool to_pool = false; + + if (ptr < slab_get(slab) || ptr >= slab_get_end(slab)) { + assert(0); + return UMF_RESULT_ERROR_UNKNOWN; + } + + // The slab object won't be deleted until it's removed from the map which is + // protected by the lock, so it's safe to access it here. + + bucket_t *bucket = slab->bucket; + + VALGRIND_DO_MEMPOOL_FREE(pool, ptr); + utils_mutex_lock(&bucket->bucket_lock); + + utils_annotate_memory_inaccessible(ptr, bucket->size); + if (bucket->size <= bucket_chunk_cut_off(bucket)) { + bucket_free_chunk(bucket, ptr, slab, &to_pool); + } else { + bucket_free_slab(bucket, slab, &to_pool); + } + + if (disjoint_pool->params.pool_trace > 1) { + bucket->free_count++; + } + + utils_mutex_unlock(&bucket->bucket_lock); + + if (disjoint_pool->params.pool_trace > 2) { + const char *name = disjoint_pool->params.name; + LOG_DEBUG("freed %s %p to %s, current total pool size: %zu, current " + "pool size for %s: %zu", + name, ptr, (to_pool ? "pool" : "provider"), + disjoint_pool_get_limits(disjoint_pool)->total_size, name, + disjoint_pool->params.cur_pool_size); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t disjoint_pool_get_last_allocation_error(void *pool) { + (void)pool; + + return TLS_last_allocation_error; +} + +// Define destructor for use with unique_ptr +void disjoint_pool_finalize(void *pool) { + + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + + if (hPool->params.pool_trace > 1) { + disjoint_pool_print_stats(hPool); + } + + for (size_t i = 0; i < hPool->buckets_num; i++) { + destroy_bucket(hPool->buckets[i]); + } + + VALGRIND_DO_DESTROY_MEMPOOL(hPool); + + umfDisjointPoolSharedLimitsDestroy(hPool->default_shared_limits); + critnib_delete(hPool->known_slabs); + + umf_ba_global_free(hPool); +} + +static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = { + .version = UMF_VERSION_CURRENT, + .initialize = disjoint_pool_initialize, + .finalize = disjoint_pool_finalize, + .malloc = disjoint_pool_malloc, + .calloc = disjoint_pool_calloc, + .realloc = disjoint_pool_realloc, + .aligned_malloc = disjoint_pool_aligned_malloc, + .malloc_usable_size = disjoint_pool_malloc_usable_size, + .free = disjoint_pool_free, + .get_last_allocation_error = disjoint_pool_get_last_allocation_error, +}; + +umf_memory_pool_ops_t *umfDisjointPoolOps(void) { + return &UMF_DISJOINT_POOL_OPS; +} + +umf_disjoint_pool_shared_limits_t * +umfDisjointPoolSharedLimitsCreate(size_t max_size) { + umf_disjoint_pool_shared_limits_t *ptr = + umf_ba_global_alloc(sizeof(umf_disjoint_pool_shared_limits_t)); + //umf_ba_global_alloc(sizeof(*ptr)); + ptr->max_size = max_size; + ptr->total_size = 0; + return ptr; +} + +void umfDisjointPoolSharedLimitsDestroy( + umf_disjoint_pool_shared_limits_t *limits) { + umf_ba_global_free(limits); +} + +umf_result_t +umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams) { + static const char *DEFAULT_NAME = "disjoint_pool"; + + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_disjoint_pool_params_handle_t params = + umf_ba_global_alloc(sizeof(*params)); + if (params == NULL) { + LOG_ERR("cannot allocate memory for disjoint pool params"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + params->slab_min_size = 0; + params->max_poolable_size = 0; + params->capacity = 0; + params->min_bucket_size = UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE; + params->cur_pool_size = 0; + params->pool_trace = 0; + params->shared_limits = NULL; + params->name = NULL; + + umf_result_t ret = umfDisjointPoolParamsSetName(params, DEFAULT_NAME); + if (ret != UMF_RESULT_SUCCESS) { + umf_ba_global_free(params); + return ret; + } + + *hParams = params; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsDestroy(umf_disjoint_pool_params_handle_t hParams) { + // NOTE: dereferencing hParams when BA is already destroyed leads to crash + if (hParams && !umf_ba_global_is_destroyed()) { + umf_ba_global_free(hParams->name); + umf_ba_global_free(hParams); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetSlabMinSize(umf_disjoint_pool_params_handle_t hParams, + size_t slabMinSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->slab_min_size = slabMinSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDisjointPoolParamsSetMaxPoolableSize( + umf_disjoint_pool_params_handle_t hParams, size_t maxPoolableSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->max_poolable_size = maxPoolableSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetCapacity(umf_disjoint_pool_params_handle_t hParams, + size_t maxCapacity) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->capacity = maxCapacity; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetMinBucketSize(umf_disjoint_pool_params_handle_t hParams, + size_t minBucketSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // minBucketSize parameter must be a power of 2 and greater than 0. + if (minBucketSize == 0 || (minBucketSize & (minBucketSize - 1))) { + LOG_ERR("minBucketSize must be a power of 2 and greater than 0"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->min_bucket_size = minBucketSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, + int poolTrace) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->pool_trace = poolTrace; + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDisjointPoolParamsSetSharedLimits( + umf_disjoint_pool_params_handle_t hParams, + umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->shared_limits = hSharedLimits; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, + const char *name) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + char *newName = umf_ba_global_alloc(sizeof(char) * (strlen(name) + 1)); + if (newName == NULL) { + LOG_ERR("cannot allocate memory for disjoint pool name"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + umf_ba_global_free(hParams->name); + hParams->name = newName; + strcpy(hParams->name, name); + + return UMF_RESULT_SUCCESS; +} diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp deleted file mode 100644 index 0390f5375..000000000 --- a/src/pool/pool_disjoint.cpp +++ /dev/null @@ -1,1313 +0,0 @@ -// Copyright (C) 2023-2025 Intel Corporation -// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// TODO: replace with logger? -#include - -#include "provider/provider_tracking.h" - -#include "../cpp_helpers.hpp" -#include "pool_disjoint.h" -#include "umf.h" -#include "utils_log.h" -#include "utils_math.h" -#include "utils_sanitizers.h" - -// Temporary solution for disabling memory poisoning. This is needed because -// AddressSanitizer does not support memory poisoning for GPU allocations. -// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634 -#ifndef POISON_MEMORY -#define POISON_MEMORY 0 -#endif - -static inline void annotate_memory_inaccessible([[maybe_unused]] void *ptr, - [[maybe_unused]] size_t size) { -#if (POISON_MEMORY != 0) - utils_annotate_memory_inaccessible(ptr, size); -#endif -} - -static inline void annotate_memory_undefined([[maybe_unused]] void *ptr, - [[maybe_unused]] size_t size) { -#if (POISON_MEMORY != 0) - utils_annotate_memory_undefined(ptr, size); -#endif -} - -typedef struct umf_disjoint_pool_shared_limits_t { - size_t MaxSize; - std::atomic TotalSize; -} umf_disjoint_pool_shared_limits_t; - -// Configuration of Disjoint Pool -typedef struct umf_disjoint_pool_params_t { - // Minimum allocation size that will be requested from the memory provider. - size_t SlabMinSize; - - // Allocations up to this limit will be subject to chunking/pooling - size_t MaxPoolableSize; - - // When pooling, each bucket will hold a max of 'Capacity' unfreed slabs - size_t Capacity; - - // Holds the minimum bucket size valid for allocation of a memory type. - // This value must be a power of 2. - size_t MinBucketSize; - - // Holds size of the pool managed by the allocator. - size_t CurPoolSize; - - // Whether to print pool usage statistics - int PoolTrace; - - // Memory limits that can be shared between multitple pool instances, - // i.e. if multiple pools use the same SharedLimits sum of those pools' - // sizes cannot exceed MaxSize. - umf_disjoint_pool_shared_limits_handle_t SharedLimits; - - // Name used in traces - char *Name; -} umf_disjoint_pool_params_t; - -class DisjointPool { - public: - class AllocImpl; - using Config = umf_disjoint_pool_params_t; - - umf_result_t initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_handle_t parameters); - void *malloc(size_t size); - void *calloc(size_t, size_t); - void *realloc(void *, size_t); - void *aligned_malloc(size_t size, size_t alignment); - size_t malloc_usable_size(void *); - umf_result_t free(void *ptr); - umf_result_t get_last_allocation_error(); - - DisjointPool(); - ~DisjointPool(); - - private: - std::unique_ptr impl; -}; - -umf_disjoint_pool_shared_limits_t * -umfDisjointPoolSharedLimitsCreate(size_t MaxSize) { - return new umf_disjoint_pool_shared_limits_t{MaxSize, 0}; -} - -void umfDisjointPoolSharedLimitsDestroy( - umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { - delete hSharedLimits; -} - -umf_result_t -umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams) { - static const char *DEFAULT_NAME = "disjoint_pool"; - - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - umf_disjoint_pool_params_handle_t params = new umf_disjoint_pool_params_t{}; - if (params == nullptr) { - LOG_ERR("cannot allocate memory for disjoint pool params"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - params->SlabMinSize = 0; - params->MaxPoolableSize = 0; - params->Capacity = 0; - params->MinBucketSize = UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE; - params->CurPoolSize = 0; - params->PoolTrace = 0; - params->SharedLimits = nullptr; - params->Name = nullptr; - - umf_result_t ret = umfDisjointPoolParamsSetName(params, DEFAULT_NAME); - if (ret != UMF_RESULT_SUCCESS) { - delete params; - return ret; - } - - *hParams = params; - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsDestroy(umf_disjoint_pool_params_handle_t hParams) { - if (hParams) { - delete[] hParams->Name; - delete hParams; - } - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetSlabMinSize(umf_disjoint_pool_params_handle_t hParams, - size_t slabMinSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->SlabMinSize = slabMinSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t umfDisjointPoolParamsSetMaxPoolableSize( - umf_disjoint_pool_params_handle_t hParams, size_t maxPoolableSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->MaxPoolableSize = maxPoolableSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetCapacity(umf_disjoint_pool_params_handle_t hParams, - size_t maxCapacity) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->Capacity = maxCapacity; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetMinBucketSize(umf_disjoint_pool_params_handle_t hParams, - size_t minBucketSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - // minBucketSize parameter must be a power of 2 and greater than 0. - if (minBucketSize == 0 || (minBucketSize & (minBucketSize - 1))) { - LOG_ERR("minBucketSize must be a power of 2 and greater than 0"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->MinBucketSize = minBucketSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, - int poolTrace) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->PoolTrace = poolTrace; - return UMF_RESULT_SUCCESS; -} - -umf_result_t umfDisjointPoolParamsSetSharedLimits( - umf_disjoint_pool_params_handle_t hParams, - umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->SharedLimits = hSharedLimits; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, - const char *name) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - char *newName = new char[std::strlen(name) + 1]; - if (newName == nullptr) { - LOG_ERR("cannot allocate memory for disjoint pool name"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - delete[] hParams->Name; - hParams->Name = newName; - std::strcpy(hParams->Name, name); - - return UMF_RESULT_SUCCESS; -} - -// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is -// requested. The implementation distinguishes between allocations of size -// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger. -// Allocation requests smaller than ChunkCutoff use chunks taken from a single -// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation -// size, and 8-byte allocations, only 1 in ~8000 requests results in a new -// coarse-grain allocation. Freeing results only in a chunk of a larger -// allocation to be marked as available and no real return to the system. An -// allocation is returned to the system only when all chunks in the larger -// allocation are freed by the program. Allocations larger than ChunkCutOff use -// a separate coarse-grain allocation for each request. These are subject to -// "pooling". That is, when such an allocation is freed by the program it is -// retained in a pool. The pool is available for future allocations, which means -// there are fewer actual coarse-grain allocations/deallocations. - -// The largest size which is allocated via the allocator. -// Allocations with size > CutOff bypass the pool and -// go directly to the provider. -static constexpr size_t CutOff = (size_t)1 << 31; // 2GB - -// Aligns the pointer down to the specified alignment -// (e.g. returns 8 for Size = 13, Alignment = 8) -static void *AlignPtrDown(void *Ptr, const size_t Alignment) { - return reinterpret_cast((reinterpret_cast(Ptr)) & - (~(Alignment - 1))); -} - -// Aligns the pointer up to the specified alignment -// (e.g. returns 16 for Size = 13, Alignment = 8) -static void *AlignPtrUp(void *Ptr, const size_t Alignment) { - void *AlignedPtr = AlignPtrDown(Ptr, Alignment); - // Special case when the pointer is already aligned - if (Ptr == AlignedPtr) { - return Ptr; - } - return static_cast(AlignedPtr) + Alignment; -} - -// Aligns the value up to the specified alignment -// (e.g. returns 16 for Size = 13, Alignment = 8) -static size_t AlignUp(size_t Val, size_t Alignment) { - assert(Alignment > 0); - return (Val + Alignment - 1) & (~(Alignment - 1)); -} - -typedef struct MemoryProviderError { - umf_result_t code; -} MemoryProviderError_t; - -class Bucket; - -// Represents the allocated memory block of size 'SlabMinSize' -// Internally, it splits the memory block into chunks. The number of -// chunks depends of the size of a Bucket which created the Slab. -// Note: Bucket's methods are responsible for thread safety of Slab access, -// so no locking happens here. -class Slab { - - // Pointer to the allocated memory of SlabMinSize bytes - void *MemPtr; - - // Represents the current state of each chunk: - // if the bit is set then the chunk is allocated - // the chunk is free for allocation otherwise - std::vector Chunks; - - // Total number of allocated chunks at the moment. - size_t NumAllocated = 0; - - // The bucket which the slab belongs to - Bucket &bucket; - - using ListIter = std::list>::iterator; - - // Store iterator to the corresponding node in avail/unavail list - // to achieve O(1) removal - ListIter SlabListIter; - - // Hints where to start search for free chunk in a slab - size_t FirstFreeChunkIdx = 0; - - // Return the index of the first available chunk, SIZE_MAX otherwise - size_t FindFirstAvailableChunkIdx() const; - - // Register/Unregister the slab in the global slab address map. - void regSlab(Slab &); - void unregSlab(Slab &); - static void regSlabByAddr(void *, Slab &); - static void unregSlabByAddr(void *, Slab &); - - public: - Slab(Bucket &); - ~Slab(); - - void setIterator(ListIter It) { SlabListIter = It; } - ListIter getIterator() const { return SlabListIter; } - - size_t getNumAllocated() const { return NumAllocated; } - - // Get pointer to allocation that is one piece of this slab. - void *getChunk(); - - // Get pointer to allocation that is this entire slab. - void *getSlab(); - - void *getPtr() const { return MemPtr; } - void *getEnd() const; - - size_t getChunkSize() const; - size_t getNumChunks() const { return Chunks.size(); } - - bool hasAvail(); - - Bucket &getBucket(); - const Bucket &getBucket() const; - - void freeChunk(void *Ptr); -}; - -class Bucket { - const size_t Size; - - // List of slabs which have at least 1 available chunk. - std::list> AvailableSlabs; - - // List of slabs with 0 available chunk. - std::list> UnavailableSlabs; - - // Protects the bucket and all the corresponding slabs - std::mutex BucketLock; - - // Reference to the allocator context, used access memory allocation - // routines, slab map and etc. - DisjointPool::AllocImpl &OwnAllocCtx; - - // For buckets used in chunked mode, a counter of slabs in the pool. - // For allocations that use an entire slab each, the entries in the Available - // list are entries in the pool.Each slab is available for a new - // allocation.The size of the Available list is the size of the pool. - // For allocations that use slabs in chunked mode, slabs will be in the - // Available list if any one or more of their chunks is free.The entire slab - // is not necessarily free, just some chunks in the slab are free. To - // implement pooling we will allow one slab in the Available list to be - // entirely empty. Normally such a slab would have been freed. But - // now we don't, and treat this slab as "in the pool". - // When a slab becomes entirely free we have to decide whether to return it - // to the provider or keep it allocated. A simple check for size of the - // Available list is not sufficient to check whether any slab has been - // pooled yet. We would have to traverse the entire Available list and check - // if any of them is entirely free. Instead we keep a counter of entirely - // empty slabs within the Available list to speed up the process of checking - // if a slab in this bucket is already pooled. - size_t chunkedSlabsInPool; - - // Statistics - size_t allocPoolCount; - size_t freeCount; - size_t currSlabsInUse; - size_t currSlabsInPool; - size_t maxSlabsInPool; - - public: - // Statistics - size_t allocCount; - size_t maxSlabsInUse; - - Bucket(size_t Sz, DisjointPool::AllocImpl &AllocCtx) - : Size{Sz}, OwnAllocCtx{AllocCtx}, chunkedSlabsInPool(0), - allocPoolCount(0), freeCount(0), currSlabsInUse(0), - currSlabsInPool(0), maxSlabsInPool(0), allocCount(0), - maxSlabsInUse(0) {} - - // Get pointer to allocation that is one piece of an available slab in this - // bucket. - void *getChunk(bool &FromPool); - - // Get pointer to allocation that is a full slab in this bucket. - void *getSlab(bool &FromPool); - - // Return the allocation size of this bucket. - size_t getSize() const { return Size; } - - // Free an allocation that is one piece of a slab in this bucket. - void freeChunk(void *Ptr, Slab &Slab, bool &ToPool); - - // Free an allocation that is a full slab in this bucket. - void freeSlab(Slab &Slab, bool &ToPool); - - umf_memory_provider_handle_t getMemHandle(); - - DisjointPool::AllocImpl &getAllocCtx() { return OwnAllocCtx; } - - // Check whether an allocation to be freed can be placed in the pool. - bool CanPool(bool &ToPool); - - // The minimum allocation size for any slab. - size_t SlabMinSize(); - - // The allocation size for a slab in this bucket. - size_t SlabAllocSize(); - - // The minimum size of a chunk from this bucket's slabs. - size_t ChunkCutOff(); - - // The number of slabs in this bucket that can be in the pool. - size_t Capacity(); - - // The maximum allocation size subject to pooling. - size_t MaxPoolableSize(); - - // Update allocation count - void countAlloc(bool FromPool); - - // Update free count - void countFree(); - - // Update statistics of Available/Unavailable - void updateStats(int InUse, int InPool); - - // Print bucket statistics - void printStats(bool &TitlePrinted, const std::string &Label); - - private: - void onFreeChunk(Slab &, bool &ToPool); - - // Update statistics of pool usage, and indicate that an allocation was made - // from the pool. - void decrementPool(bool &FromPool); - - // Get a slab to be used for chunked allocations. - decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromPool); - - // Get a slab that will be used as a whole for a single allocation. - decltype(AvailableSlabs.begin()) getAvailFullSlab(bool &FromPool); -}; - -class DisjointPool::AllocImpl { - // It's important for the map to be destroyed last after buckets and their - // slabs This is because slab's destructor removes the object from the map. - std::unordered_multimap KnownSlabs; - std::shared_timed_mutex KnownSlabsMapLock; - - // Handle to the memory provider - umf_memory_provider_handle_t MemHandle; - - // Store as unique_ptrs since Bucket is not Movable(because of std::mutex) - std::vector> Buckets; - - // Configuration for this instance - umf_disjoint_pool_params_t params; - - umf_disjoint_pool_shared_limits_t DefaultSharedLimits = { - (std::numeric_limits::max)(), 0}; - - // Used in algorithm for finding buckets - std::size_t MinBucketSizeExp; - - // Coarse-grain allocation min alignment - size_t ProviderMinPageSize; - - public: - AllocImpl(umf_memory_provider_handle_t hProvider, - umf_disjoint_pool_params_handle_t params) - : MemHandle{hProvider}, params(*params) { - - VALGRIND_DO_CREATE_MEMPOOL(this, 0, 0); - - // deep copy of the Name - this->params.Name = new char[std::strlen(params->Name) + 1]; - std::strcpy(this->params.Name, params->Name); - - // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. - // Powers of 2 and the value halfway between the powers of 2. - auto Size1 = this->params.MinBucketSize; - // MinBucketSize cannot be larger than CutOff. - Size1 = std::min(Size1, CutOff); - // Buckets sized smaller than the bucket default size- 8 aren't needed. - Size1 = std::max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE); - // Calculate the exponent for MinBucketSize used for finding buckets. - MinBucketSizeExp = (size_t)log2Utils(Size1); - auto Size2 = Size1 + Size1 / 2; - for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { - Buckets.push_back(std::make_unique(Size1, *this)); - Buckets.push_back(std::make_unique(Size2, *this)); - } - Buckets.push_back(std::make_unique(CutOff, *this)); - - auto ret = umfMemoryProviderGetMinPageSize(hProvider, nullptr, - &ProviderMinPageSize); - if (ret != UMF_RESULT_SUCCESS) { - ProviderMinPageSize = 0; - } - } - - ~AllocImpl() { - VALGRIND_DO_DESTROY_MEMPOOL(this); - delete[] this->params.Name; - } - - void *allocate(size_t Size, size_t Alignment, bool &FromPool); - void *allocate(size_t Size, bool &FromPool); - void deallocate(void *Ptr, bool &ToPool); - - umf_memory_provider_handle_t getMemHandle() { return MemHandle; } - - std::shared_timed_mutex &getKnownSlabsMapLock() { - return KnownSlabsMapLock; - } - std::unordered_multimap &getKnownSlabs() { - return KnownSlabs; - } - - size_t SlabMinSize() { return params.SlabMinSize; }; - - umf_disjoint_pool_params_t &getParams() { return params; } - - umf_disjoint_pool_shared_limits_t *getLimits() { - if (params.SharedLimits) { - return params.SharedLimits; - } else { - return &DefaultSharedLimits; - } - }; - - void printStats(bool &TitlePrinted, size_t &HighBucketSize, - size_t &HighPeakSlabsInUse, const std::string &Label); - - private: - Bucket &findBucket(size_t Size); - std::size_t sizeToIdx(size_t Size); -}; - -static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider, - size_t size, size_t alignment = 0) { - void *ptr; - auto ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr); - if (ret != UMF_RESULT_SUCCESS) { - throw MemoryProviderError{ret}; - } - annotate_memory_inaccessible(ptr, size); - return ptr; -} - -static void memoryProviderFree(umf_memory_provider_handle_t hProvider, - void *ptr) { - size_t size = 0; - - if (ptr) { - umf_alloc_info_t allocInfo = {NULL, 0, NULL}; - umf_result_t umf_result = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); - if (umf_result == UMF_RESULT_SUCCESS) { - size = allocInfo.baseSize; - } - } - - auto ret = umfMemoryProviderFree(hProvider, ptr, size); - if (ret != UMF_RESULT_SUCCESS) { - throw MemoryProviderError{ret}; - } -} - -bool operator==(const Slab &Lhs, const Slab &Rhs) { - return Lhs.getPtr() == Rhs.getPtr(); -} - -std::ostream &operator<<(std::ostream &Os, const Slab &Slab) { - Os << "Slab<" << Slab.getPtr() << ", " << Slab.getEnd() << ", " - << Slab.getBucket().getSize() << ">"; - return Os; -} - -Slab::Slab(Bucket &Bkt) - : // In case bucket size is not a multiple of SlabMinSize, we would have - // some padding at the end of the slab. - Chunks(Bkt.SlabMinSize() / Bkt.getSize()), NumAllocated{0}, - bucket(Bkt), SlabListIter{}, FirstFreeChunkIdx{0} { - auto SlabSize = Bkt.SlabAllocSize(); - MemPtr = memoryProviderAlloc(Bkt.getMemHandle(), SlabSize); - regSlab(*this); -} - -Slab::~Slab() { - try { - unregSlab(*this); - } catch (std::exception &e) { - LOG_ERR("DisjointPool: unexpected error: %s", e.what()); - } - - try { - memoryProviderFree(bucket.getMemHandle(), MemPtr); - } catch (MemoryProviderError &e) { - LOG_ERR("DisjointPool: error from memory provider: %d", e.code); - - if (e.code == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) { - const char *message = ""; - int error = 0; - - try { - umfMemoryProviderGetLastNativeError( - umfGetLastFailedMemoryProvider(), &message, &error); - LOG_ERR("Native error msg: %s, native error code: %d", message, - error); - } catch (...) { - // ignore any additional errors from logger - } - } - } -} - -// Return the index of the first available chunk, SIZE_MAX otherwise -size_t Slab::FindFirstAvailableChunkIdx() const { - // Use the first free chunk index as a hint for the search. - auto It = std::find_if(Chunks.begin() + FirstFreeChunkIdx, Chunks.end(), - [](auto x) { return !x; }); - if (It != Chunks.end()) { - return It - Chunks.begin(); - } - - return std::numeric_limits::max(); -} - -void *Slab::getChunk() { - // assert(NumAllocated != Chunks.size()); - - const size_t ChunkIdx = FindFirstAvailableChunkIdx(); - // Free chunk must exist, otherwise we would have allocated another slab - assert(ChunkIdx != (std::numeric_limits::max())); - - void *const FreeChunk = - (static_cast(getPtr())) + ChunkIdx * getChunkSize(); - Chunks[ChunkIdx] = true; - NumAllocated += 1; - - // Use the found index as the next hint - FirstFreeChunkIdx = ChunkIdx; - - return FreeChunk; -} - -void *Slab::getSlab() { return getPtr(); } - -Bucket &Slab::getBucket() { return bucket; } -const Bucket &Slab::getBucket() const { return bucket; } - -size_t Slab::getChunkSize() const { return bucket.getSize(); } - -void Slab::regSlabByAddr(void *Addr, Slab &Slab) { - auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock(); - auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs(); - - std::lock_guard Lg(Lock); - Map.insert({Addr, Slab}); -} - -void Slab::unregSlabByAddr(void *Addr, Slab &Slab) { - auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock(); - auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs(); - - std::lock_guard Lg(Lock); - - auto Slabs = Map.equal_range(Addr); - // At least the must get the current slab from the map. - assert(Slabs.first != Slabs.second && "Slab is not found"); - - for (auto It = Slabs.first; It != Slabs.second; ++It) { - if (It->second == Slab) { - Map.erase(It); - return; - } - } - - assert(false && "Slab is not found"); -} - -void Slab::regSlab(Slab &Slab) { - void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize()); - void *EndAddr = static_cast(StartAddr) + bucket.SlabMinSize(); - - regSlabByAddr(StartAddr, Slab); - regSlabByAddr(EndAddr, Slab); -} - -void Slab::unregSlab(Slab &Slab) { - void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize()); - void *EndAddr = static_cast(StartAddr) + bucket.SlabMinSize(); - - unregSlabByAddr(StartAddr, Slab); - unregSlabByAddr(EndAddr, Slab); -} - -void Slab::freeChunk(void *Ptr) { - // This method should be called through bucket(since we might remove the slab - // as a result), therefore all locks are done on that level. - - // Make sure that we're in the right slab - assert(Ptr >= getPtr() && Ptr < getEnd()); - - // Even if the pointer p was previously aligned, it's still inside the - // corresponding chunk, so we get the correct index here. - auto ChunkIdx = (static_cast(Ptr) - static_cast(MemPtr)) / - getChunkSize(); - - // Make sure that the chunk was allocated - assert(Chunks[ChunkIdx] && "double free detected"); - - Chunks[ChunkIdx] = false; - NumAllocated -= 1; - - if (ChunkIdx < FirstFreeChunkIdx) { - FirstFreeChunkIdx = ChunkIdx; - } -} - -void *Slab::getEnd() const { - return static_cast(getPtr()) + bucket.SlabMinSize(); -} - -bool Slab::hasAvail() { return NumAllocated != getNumChunks(); } - -// If a slab was available in the pool then note that the current pooled -// size has reduced by the size of a slab in this bucket. -void Bucket::decrementPool(bool &FromPool) { - FromPool = true; - updateStats(1, -1); - OwnAllocCtx.getLimits()->TotalSize -= SlabAllocSize(); -} - -auto Bucket::getAvailFullSlab(bool &FromPool) - -> decltype(AvailableSlabs.begin()) { - // Return a slab that will be used for a single allocation. - if (AvailableSlabs.size() == 0) { - auto It = AvailableSlabs.insert(AvailableSlabs.begin(), - std::make_unique(*this)); - (*It)->setIterator(It); - FromPool = false; - updateStats(1, 0); - } else { - decrementPool(FromPool); - } - - return AvailableSlabs.begin(); -} - -void *Bucket::getSlab(bool &FromPool) { - std::lock_guard Lg(BucketLock); - - auto SlabIt = getAvailFullSlab(FromPool); - auto *FreeSlab = (*SlabIt)->getSlab(); - auto It = - UnavailableSlabs.insert(UnavailableSlabs.begin(), std::move(*SlabIt)); - AvailableSlabs.erase(SlabIt); - (*It)->setIterator(It); - return FreeSlab; -} - -void Bucket::freeSlab(Slab &Slab, bool &ToPool) { - std::lock_guard Lg(BucketLock); - auto SlabIter = Slab.getIterator(); - assert(SlabIter != UnavailableSlabs.end()); - if (CanPool(ToPool)) { - auto It = - AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); - UnavailableSlabs.erase(SlabIter); - (*It)->setIterator(It); - } else { - UnavailableSlabs.erase(SlabIter); - } -} - -auto Bucket::getAvailSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) { - - if (AvailableSlabs.size() == 0) { - auto It = AvailableSlabs.insert(AvailableSlabs.begin(), - std::make_unique(*this)); - (*It)->setIterator(It); - - updateStats(1, 0); - FromPool = false; - } else { - if ((*(AvailableSlabs.begin()))->getNumAllocated() == 0) { - // If this was an empty slab, it was in the pool. - // Now it is no longer in the pool, so update count. - --chunkedSlabsInPool; - decrementPool(FromPool); - } else { - // Allocation from existing slab is treated as from pool for statistics. - FromPool = true; - } - } - - return AvailableSlabs.begin(); -} - -void *Bucket::getChunk(bool &FromPool) { - std::lock_guard Lg(BucketLock); - - auto SlabIt = getAvailSlab(FromPool); - auto *FreeChunk = (*SlabIt)->getChunk(); - - // If the slab is full, move it to unavailable slabs and update its iterator - if (!((*SlabIt)->hasAvail())) { - auto It = UnavailableSlabs.insert(UnavailableSlabs.begin(), - std::move(*SlabIt)); - AvailableSlabs.erase(SlabIt); - (*It)->setIterator(It); - } - - return FreeChunk; -} - -void Bucket::freeChunk(void *Ptr, Slab &Slab, bool &ToPool) { - std::lock_guard Lg(BucketLock); - - Slab.freeChunk(Ptr); - - onFreeChunk(Slab, ToPool); -} - -// The lock must be acquired before calling this method -void Bucket::onFreeChunk(Slab &Slab, bool &ToPool) { - ToPool = true; - - // In case if the slab was previously full and now has 1 available - // chunk, it should be moved to the list of available slabs - if (Slab.getNumAllocated() == (Slab.getNumChunks() - 1)) { - auto SlabIter = Slab.getIterator(); - assert(SlabIter != UnavailableSlabs.end()); - - auto It = - AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); - UnavailableSlabs.erase(SlabIter); - - (*It)->setIterator(It); - } - - // Check if slab is empty, and pool it if we can. - if (Slab.getNumAllocated() == 0) { - // The slab is now empty. - // If pool has capacity then put the slab in the pool. - // The ToPool parameter indicates whether the Slab will be put in the - // pool or freed. - if (!CanPool(ToPool)) { - // Note: since the slab is stored as unique_ptr, just remove it from - // the list to destroy the object. - auto It = Slab.getIterator(); - assert(It != AvailableSlabs.end()); - AvailableSlabs.erase(It); - } - } -} - -bool Bucket::CanPool(bool &ToPool) { - size_t NewFreeSlabsInBucket; - // Check if this bucket is used in chunked form or as full slabs. - bool chunkedBucket = getSize() <= ChunkCutOff(); - if (chunkedBucket) { - NewFreeSlabsInBucket = chunkedSlabsInPool + 1; - } else { - NewFreeSlabsInBucket = AvailableSlabs.size() + 1; - } - if (Capacity() >= NewFreeSlabsInBucket) { - size_t PoolSize = OwnAllocCtx.getLimits()->TotalSize; - while (true) { - size_t NewPoolSize = PoolSize + SlabAllocSize(); - - if (OwnAllocCtx.getLimits()->MaxSize < NewPoolSize) { - break; - } - - if (OwnAllocCtx.getLimits()->TotalSize.compare_exchange_strong( - PoolSize, NewPoolSize)) { - if (chunkedBucket) { - ++chunkedSlabsInPool; - } - - updateStats(-1, 1); - ToPool = true; - return true; - } - } - } - - updateStats(-1, 0); - ToPool = false; - return false; -} - -umf_memory_provider_handle_t Bucket::getMemHandle() { - return OwnAllocCtx.getMemHandle(); -} - -size_t Bucket::SlabMinSize() { return OwnAllocCtx.getParams().SlabMinSize; } - -size_t Bucket::SlabAllocSize() { return std::max(getSize(), SlabMinSize()); } - -size_t Bucket::Capacity() { - // For buckets used in chunked mode, just one slab in pool is sufficient. - // For larger buckets, the capacity could be more and is adjustable. - if (getSize() <= ChunkCutOff()) { - return 1; - } else { - return OwnAllocCtx.getParams().Capacity; - } -} - -size_t Bucket::MaxPoolableSize() { - return OwnAllocCtx.getParams().MaxPoolableSize; -} - -size_t Bucket::ChunkCutOff() { return SlabMinSize() / 2; } - -void Bucket::countAlloc(bool FromPool) { - ++allocCount; - if (FromPool) { - ++allocPoolCount; - } -} - -void Bucket::countFree() { ++freeCount; } - -void Bucket::updateStats(int InUse, int InPool) { - if (OwnAllocCtx.getParams().PoolTrace == 0) { - return; - } - currSlabsInUse += InUse; - maxSlabsInUse = std::max(currSlabsInUse, maxSlabsInUse); - currSlabsInPool += InPool; - maxSlabsInPool = std::max(currSlabsInPool, maxSlabsInPool); - // Increment or decrement current pool sizes based on whether - // slab was added to or removed from pool. - OwnAllocCtx.getParams().CurPoolSize += InPool * SlabAllocSize(); -} - -void Bucket::printStats(bool &TitlePrinted, const std::string &Label) { - if (allocCount) { - if (!TitlePrinted) { - std::cout << Label << " memory statistics\n"; - std::cout << std::setw(14) << "Bucket Size" << std::setw(12) - << "Allocs" << std::setw(12) << "Frees" << std::setw(18) - << "Allocs from Pool" << std::setw(20) - << "Peak Slabs in Use" << std::setw(21) - << "Peak Slabs in Pool" << std::endl; - TitlePrinted = true; - } - std::cout << std::setw(14) << getSize() << std::setw(12) << allocCount - << std::setw(12) << freeCount << std::setw(18) - << allocPoolCount << std::setw(20) << maxSlabsInUse - << std::setw(21) << maxSlabsInPool << std::endl; - } -} - -void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) try { - void *Ptr; - - if (Size == 0) { - return nullptr; - } - - FromPool = false; - if (Size > getParams().MaxPoolableSize) { - Ptr = memoryProviderAlloc(getMemHandle(), Size); - annotate_memory_undefined(Ptr, Size); - return Ptr; - } - - auto &Bucket = findBucket(Size); - - if (Size > Bucket.ChunkCutOff()) { - Ptr = Bucket.getSlab(FromPool); - } else { - Ptr = Bucket.getChunk(FromPool); - } - - if (getParams().PoolTrace > 1) { - Bucket.countAlloc(FromPool); - } - - VALGRIND_DO_MEMPOOL_ALLOC(this, Ptr, Size); - annotate_memory_undefined(Ptr, Bucket.getSize()); - - return Ptr; -} catch (MemoryProviderError &e) { - umf::getPoolLastStatusRef() = e.code; - return nullptr; -} - -void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment, - bool &FromPool) try { - void *Ptr; - - if (Size == 0) { - return nullptr; - } - - if (Alignment <= 1) { - return allocate(Size, FromPool); - } - - size_t AlignedSize; - if (Alignment <= ProviderMinPageSize) { - // This allocation will be served from a Bucket which size is multiple - // of Alignment and Slab address is aligned to ProviderMinPageSize - // so the address will be properly aligned. - AlignedSize = (Size > 1) ? AlignUp(Size, Alignment) : Alignment; - } else { - // Slabs are only aligned to ProviderMinPageSize, we need to compensate - // for that in case the allocation is within pooling limit. - // TODO: consider creating properly-aligned Slabs on demand - AlignedSize = Size + Alignment - 1; - } - - // Check if requested allocation size is within pooling limit. - // If not, just request aligned pointer from the system. - FromPool = false; - if (AlignedSize > getParams().MaxPoolableSize) { - Ptr = memoryProviderAlloc(getMemHandle(), Size, Alignment); - annotate_memory_undefined(Ptr, Size); - return Ptr; - } - - auto &Bucket = findBucket(AlignedSize); - - if (AlignedSize > Bucket.ChunkCutOff()) { - Ptr = Bucket.getSlab(FromPool); - } else { - Ptr = Bucket.getChunk(FromPool); - } - - if (getParams().PoolTrace > 1) { - Bucket.countAlloc(FromPool); - } - - VALGRIND_DO_MEMPOOL_ALLOC(this, AlignPtrUp(Ptr, Alignment), Size); - annotate_memory_undefined(AlignPtrUp(Ptr, Alignment), Size); - return AlignPtrUp(Ptr, Alignment); -} catch (MemoryProviderError &e) { - umf::getPoolLastStatusRef() = e.code; - return nullptr; -} - -std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) { - assert(Size <= CutOff && "Unexpected size"); - assert(Size > 0 && "Unexpected size"); - - size_t MinBucketSize = (size_t)1 << MinBucketSizeExp; - if (Size < MinBucketSize) { - return 0; - } - - // Get the position of the leftmost set bit. - size_t position = getLeftmostSetBitPos(Size); - - auto isPowerOf2 = 0 == (Size & (Size - 1)); - auto largerThanHalfwayBetweenPowersOf2 = - !isPowerOf2 && bool((Size - 1) & (uint64_t(1) << (position - 1))); - auto index = (position - MinBucketSizeExp) * 2 + (int)(!isPowerOf2) + - (int)largerThanHalfwayBetweenPowersOf2; - - return index; -} - -Bucket &DisjointPool::AllocImpl::findBucket(size_t Size) { - auto calculatedIdx = sizeToIdx(Size); - assert((*(Buckets[calculatedIdx])).getSize() >= Size); - if (calculatedIdx > 0) { - assert((*(Buckets[calculatedIdx - 1])).getSize() < Size); - } - - return *(Buckets[calculatedIdx]); -} - -void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) { - auto *SlabPtr = AlignPtrDown(Ptr, SlabMinSize()); - - // Lock the map on read - std::shared_lock Lk(getKnownSlabsMapLock()); - - ToPool = false; - auto Slabs = getKnownSlabs().equal_range(SlabPtr); - if (Slabs.first == Slabs.second) { - Lk.unlock(); - memoryProviderFree(getMemHandle(), Ptr); - return; - } - - for (auto It = Slabs.first; It != Slabs.second; ++It) { - // The slab object won't be deleted until it's removed from the map which is - // protected by the lock, so it's safe to access it here. - auto &Slab = It->second; - if (Ptr >= Slab.getPtr() && Ptr < Slab.getEnd()) { - // Unlock the map before freeing the chunk, it may be locked on write - // there - Lk.unlock(); - auto &Bucket = Slab.getBucket(); - - if (getParams().PoolTrace > 1) { - Bucket.countFree(); - } - - VALGRIND_DO_MEMPOOL_FREE(this, Ptr); - annotate_memory_inaccessible(Ptr, Bucket.getSize()); - if (Bucket.getSize() <= Bucket.ChunkCutOff()) { - Bucket.freeChunk(Ptr, Slab, ToPool); - } else { - Bucket.freeSlab(Slab, ToPool); - } - - return; - } - } - - Lk.unlock(); - // There is a rare case when we have a pointer from system allocation next - // to some slab with an entry in the map. So we find a slab - // but the range checks fail. - memoryProviderFree(getMemHandle(), Ptr); -} - -void DisjointPool::AllocImpl::printStats(bool &TitlePrinted, - size_t &HighBucketSize, - size_t &HighPeakSlabsInUse, - const std::string &MTName) { - HighBucketSize = 0; - HighPeakSlabsInUse = 0; - for (auto &B : Buckets) { - (*B).printStats(TitlePrinted, MTName); - HighPeakSlabsInUse = std::max((*B).maxSlabsInUse, HighPeakSlabsInUse); - if ((*B).allocCount) { - HighBucketSize = std::max((*B).SlabAllocSize(), HighBucketSize); - } - } -} - -umf_result_t -DisjointPool::initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_handle_t parameters) { - if (!provider) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - // MinBucketSize parameter must be a power of 2 for bucket sizes - // to generate correctly. - if (!parameters->MinBucketSize || - !((parameters->MinBucketSize & (parameters->MinBucketSize - 1)) == 0)) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - impl = std::make_unique(provider, parameters); - return UMF_RESULT_SUCCESS; -} - -void *DisjointPool::malloc(size_t size) { // For full-slab allocations indicates - // whether slab is from Pool. - bool FromPool; - auto Ptr = impl->allocate(size, FromPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Allocated " << std::setw(8) << size << " " << MT - << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->" - << Ptr << std::endl; - } - return Ptr; -} - -void *DisjointPool::calloc(size_t, size_t) { - // Not supported - umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; - return NULL; -} - -void *DisjointPool::realloc(void *, size_t) { - // Not supported - umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; - return NULL; -} - -void *DisjointPool::aligned_malloc(size_t size, size_t alignment) { - bool FromPool; - auto Ptr = impl->allocate(size, alignment, FromPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Allocated " << std::setw(8) << size << " " << MT - << " bytes aligned at " << alignment << " from " - << (FromPool ? "Pool" : "Provider") << " ->" << Ptr - << std::endl; - } - return Ptr; -} - -size_t DisjointPool::malloc_usable_size(void *) { - // Not supported - return 0; -} - -umf_result_t DisjointPool::free(void *ptr) try { - bool ToPool; - impl->deallocate(ptr, ToPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Freed " << MT << " " << ptr << " to " - << (ToPool ? "Pool" : "Provider") - << ", Current total pool size " - << impl->getLimits()->TotalSize.load() - << ", Current pool size for " << MT << " " - << impl->getParams().CurPoolSize << "\n"; - } - return UMF_RESULT_SUCCESS; -} catch (MemoryProviderError &e) { - return e.code; -} - -umf_result_t DisjointPool::get_last_allocation_error() { - return umf::getPoolLastStatusRef(); -} - -DisjointPool::DisjointPool() {} - -// Define destructor for use with unique_ptr -DisjointPool::~DisjointPool() { - bool TitlePrinted = false; - size_t HighBucketSize; - size_t HighPeakSlabsInUse; - if (impl->getParams().PoolTrace > 1) { - auto name = impl->getParams().Name; - try { // cannot throw in destructor - impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse, - name); - if (TitlePrinted) { - std::cout << "Current Pool Size " - << impl->getLimits()->TotalSize.load() << std::endl; - std::cout << "Suggested Setting=;" - << std::string(1, (char)tolower(name[0])) - << std::string(name + 1) << ":" << HighBucketSize - << "," << HighPeakSlabsInUse << ",64K" << std::endl; - } - } catch (...) { // ignore exceptions - } - } -} - -static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = - umf::poolMakeCOps(); - -umf_memory_pool_ops_t *umfDisjointPoolOps(void) { - return &UMF_DISJOINT_POOL_OPS; -} diff --git a/src/pool/pool_disjoint_internal.h b/src/pool/pool_disjoint_internal.h new file mode 100644 index 000000000..56a25e611 --- /dev/null +++ b/src/pool/pool_disjoint_internal.h @@ -0,0 +1,176 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_POOL_DISJOINT_INTERNAL_H +#define UMF_POOL_DISJOINT_INTERNAL_H 1 + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "critnib/critnib.h" +#include "uthash/utlist.h" + +#include "base_alloc_global.h" +#include "provider/provider_tracking.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" +#include "utils_math.h" + +typedef struct bucket_t bucket_t; +typedef struct slab_t slab_t; +typedef struct slab_list_item_t slab_list_item_t; +typedef struct disjoint_pool_t disjoint_pool_t; + +typedef struct bucket_t { + size_t size; + + // Linked list of slabs which have at least 1 available chunk. + // We always count available slabs as an optimization. + slab_list_item_t *available_slabs; + size_t available_slabs_num; + + // Linked list of slabs with 0 available chunks + slab_list_item_t *unavailable_slabs; + + // Protects the bucket and all the corresponding slabs + utils_mutex_t bucket_lock; + + // Reference to the allocator context, used to access memory allocation + // routines, slab map and etc. + disjoint_pool_t *pool; + + umf_disjoint_pool_shared_limits_handle_t shared_limits; + + // For buckets used in chunked mode, a counter of slabs in the pool. + // For allocations that use an entire slab each, the entries in the + // "available" list are entries in the pool. Each slab is available for a + // new allocation. The size of the available list is the size of the pool. + // + // For allocations that use slabs in chunked mode, slabs will be in the + // "available" list if any one or more of their chunks are free. The entire + // slab is not necessarily free, just some chunks in the slab are free. To + // implement pooling, we will allow one slab in the "available" list to be + // entirely empty, and treat this slab as "in the pool". + // When a slab becomes entirely free, we must decide whether to return it + // to the provider or keep it allocated. We keep a counter of entirely + // empty slabs within the "available" list to speed up the process of + // checking if a slab in this bucket is already pooled. + size_t chunked_slabs_in_pool; + + // Statistics + size_t alloc_count; + size_t alloc_pool_count; + size_t free_count; + size_t curr_slabs_in_use; + size_t curr_slabs_in_pool; + size_t max_slabs_in_pool; + size_t max_slabs_in_use; +} bucket_t; + +// Represents the allocated memory block of size 'slab_min_size' +// Internally, it splits the memory block into chunks. The number of +// chunks depends on the size of a Bucket which created the Slab. +// Note: Bucket's methods are responsible for thread safety of Slab access, +// so no locking happens here. +typedef struct slab_t { + // Pointer to the allocated memory of slab_min_size bytes + void *mem_ptr; + size_t slab_size; + + // Represents the current state of each chunk: if the bit is set, the + // chunk is allocated; otherwise, the chunk is free for allocation + bool *chunks; + size_t num_chunks_total; + + // Total number of allocated chunks at the moment. + size_t num_chunks_allocated; + + // The bucket which the slab belongs to + bucket_t *bucket; + + // Hints where to start search for free chunk in a slab + size_t first_free_chunk_idx; + + // Store iterator to the corresponding node in avail/unavail list + // to achieve O(1) removal + slab_list_item_t *iter; +} slab_t; + +typedef struct slab_list_item_t { + slab_t *val; + struct slab_list_item_t *prev, *next; +} slab_list_item_t; + +typedef struct umf_disjoint_pool_shared_limits_t { + size_t max_size; + size_t total_size; // requires atomic access +} umf_disjoint_pool_shared_limits_t; + +typedef struct umf_disjoint_pool_params_t { + // Minimum allocation size that will be requested from the memory provider. + size_t slab_min_size; + + // Allocations up to this limit will be subject to chunking/pooling + size_t max_poolable_size; + + // When pooling, each bucket will hold a max of 'capacity' unfreed slabs + size_t capacity; + + // Holds the minimum bucket size valid for allocation of a memory type. + // This value must be a power of 2. + size_t min_bucket_size; + + // Holds size of the pool managed by the allocator. + size_t cur_pool_size; + + // Whether to print pool usage statistics + int pool_trace; + + // Memory limits that can be shared between multiple pool instances, + // i.e. if multiple pools use the same shared_limits sum of those pools' + // sizes cannot exceed max_size. + umf_disjoint_pool_shared_limits_handle_t shared_limits; + + // Name used in traces + char *name; +} umf_disjoint_pool_params_t; + +typedef struct disjoint_pool_t { + // Keep the list of known slabs to quickly find required one during the + // free() + critnib *known_slabs; // (void *, slab_t *) + + // Handle to the memory provider + umf_memory_provider_handle_t provider; + + // Array of bucket_t* + bucket_t **buckets; + size_t buckets_num; + + // Configuration for this instance + umf_disjoint_pool_params_t params; + + umf_disjoint_pool_shared_limits_handle_t default_shared_limits; + + // Used in algorithm for finding buckets + size_t min_bucket_size_exp; + + // Coarse-grain allocation min alignment + size_t provider_min_page_size; +} disjoint_pool_t; + +#endif // UMF_POOL_DISJOINT_INTERNAL_H diff --git a/src/utils/utils_common.h b/src/utils/utils_common.h index d8ea9bf6a..7824e74af 100644 --- a/src/utils/utils_common.h +++ b/src/utils/utils_common.h @@ -38,6 +38,8 @@ typedef enum umf_purge_advise_t { expression; \ } while (0) +#define IS_POWER_OF_2(value) ((value) != 0 && ((value) & ((value)-1)) == 0) + #define IS_ALIGNED(value, align) \ ((align == 0 || (((value) & ((align)-1)) == 0))) #define IS_NOT_ALIGNED(value, align) \ diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index 287f5d12a..910c859b0 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -11,6 +11,7 @@ #define UMF_UTILS_CONCURRENCY_H 1 #include +#include #ifdef _WIN32 #include @@ -45,11 +46,27 @@ typedef struct utils_mutex_t { } utils_mutex_t; size_t utils_mutex_get_size(void); -utils_mutex_t *utils_mutex_init(void *ptr); +utils_mutex_t *utils_mutex_init(utils_mutex_t *ptr); void utils_mutex_destroy_not_free(utils_mutex_t *m); int utils_mutex_lock(utils_mutex_t *mutex); int utils_mutex_unlock(utils_mutex_t *mutex); +typedef struct utils_rwlock_t { +#ifdef _WIN32 + // Slim Read/Wrtiter lock + SRWLOCK lock; +#else + pthread_rwlock_t rwlock; +#endif +} utils_rwlock_t; + +utils_rwlock_t *utils_rwlock_init(utils_rwlock_t *ptr); +void utils_rwlock_destroy_not_free(utils_rwlock_t *rwlock); +int utils_read_lock(utils_rwlock_t *rwlock); +int utils_write_lock(utils_rwlock_t *rwlock); +int utils_read_unlock(utils_rwlock_t *rwlock); +int utils_write_unlock(utils_rwlock_t *rwlock); + #if defined(_WIN32) #define UTIL_ONCE_FLAG INIT_ONCE #define UTIL_ONCE_FLAG_INIT INIT_ONCE_STATIC_INIT diff --git a/src/utils/utils_posix_concurrency.c b/src/utils/utils_posix_concurrency.c index 531e09c10..44a317361 100644 --- a/src/utils/utils_posix_concurrency.c +++ b/src/utils/utils_posix_concurrency.c @@ -11,10 +11,11 @@ #include #include "utils_concurrency.h" +#include "utils_log.h" size_t utils_mutex_get_size(void) { return sizeof(pthread_mutex_t); } -utils_mutex_t *utils_mutex_init(void *ptr) { +utils_mutex_t *utils_mutex_init(utils_mutex_t *ptr) { pthread_mutex_t *mutex = (pthread_mutex_t *)ptr; int ret = pthread_mutex_init(mutex, NULL); return ret == 0 ? ((utils_mutex_t *)mutex) : NULL; @@ -23,7 +24,9 @@ utils_mutex_t *utils_mutex_init(void *ptr) { void utils_mutex_destroy_not_free(utils_mutex_t *m) { pthread_mutex_t *mutex = (pthread_mutex_t *)m; int ret = pthread_mutex_destroy(mutex); - (void)ret; // TODO: add logging + if (ret) { + LOG_ERR("pthread_mutex_destroy failed"); + } } int utils_mutex_lock(utils_mutex_t *m) { @@ -37,3 +40,33 @@ int utils_mutex_unlock(utils_mutex_t *m) { void utils_init_once(UTIL_ONCE_FLAG *flag, void (*oneCb)(void)) { pthread_once(flag, oneCb); } + +utils_rwlock_t *utils_rwlock_init(utils_rwlock_t *ptr) { + pthread_rwlock_t *rwlock = (pthread_rwlock_t *)ptr; + int ret = pthread_rwlock_init(rwlock, NULL); + return ret == 0 ? ((utils_rwlock_t *)rwlock) : NULL; +} + +void utils_rwlock_destroy_not_free(utils_rwlock_t *ptr) { + pthread_rwlock_t *rwlock = (pthread_rwlock_t *)ptr; + int ret = pthread_rwlock_destroy(rwlock); + if (ret) { + LOG_ERR("pthread_rwlock_destroy failed"); + } +} + +int utils_read_lock(utils_rwlock_t *rwlock) { + return pthread_rwlock_rdlock((pthread_rwlock_t *)rwlock); +} + +int utils_write_lock(utils_rwlock_t *rwlock) { + return pthread_rwlock_wrlock((pthread_rwlock_t *)rwlock); +} + +int utils_read_unlock(utils_rwlock_t *rwlock) { + return pthread_rwlock_unlock((pthread_rwlock_t *)rwlock); +} + +int utils_write_unlock(utils_rwlock_t *rwlock) { + return pthread_rwlock_unlock((pthread_rwlock_t *)rwlock); +} diff --git a/src/utils/utils_windows_concurrency.c b/src/utils/utils_windows_concurrency.c index e2cc574a9..faa302be3 100644 --- a/src/utils/utils_windows_concurrency.c +++ b/src/utils/utils_windows_concurrency.c @@ -11,35 +11,61 @@ size_t utils_mutex_get_size(void) { return sizeof(utils_mutex_t); } -utils_mutex_t *utils_mutex_init(void *ptr) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)ptr; - InitializeCriticalSection(&mutex_internal->lock); - return (utils_mutex_t *)mutex_internal; +utils_mutex_t *utils_mutex_init(utils_mutex_t *mutex) { + InitializeCriticalSection(&mutex->lock); + return mutex; } void utils_mutex_destroy_not_free(utils_mutex_t *mutex) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; - DeleteCriticalSection(&mutex_internal->lock); + DeleteCriticalSection(&mutex->lock); } int utils_mutex_lock(utils_mutex_t *mutex) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; - EnterCriticalSection(&mutex_internal->lock); + EnterCriticalSection(&mutex->lock); - if (mutex_internal->lock.RecursionCount > 1) { - LeaveCriticalSection(&mutex_internal->lock); + if (mutex->lock.RecursionCount > 1) { + LeaveCriticalSection(&mutex->lock); /* deadlock detected */ - return -1; + abort(); } return 0; } int utils_mutex_unlock(utils_mutex_t *mutex) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; - LeaveCriticalSection(&mutex_internal->lock); + LeaveCriticalSection(&mutex->lock); return 0; } +utils_rwlock_t *utils_rwlock_init(utils_rwlock_t *rwlock) { + InitializeSRWLock(&rwlock->lock); + return 0; // never fails +} + +void utils_rwlock_destroy_not_free(utils_rwlock_t *rwlock) { + // there is no call to destroy SWR lock + (void)rwlock; +} + +int utils_read_lock(utils_rwlock_t *rwlock) { + AcquireSRWLockShared(&rwlock->lock); + return 0; // never fails +} + +int utils_write_lock(utils_rwlock_t *rwlock) { + AcquireSRWLockExclusive(&rwlock->lock); + return 0; // never fails +} + +int utils_read_unlock(utils_rwlock_t *rwlock) { + ReleaseSRWLockShared(&rwlock->lock); + return 0; // never fails +} + +int utils_write_unlock(utils_rwlock_t *rwlock) { + ReleaseSRWLockExclusive(&rwlock->lock); + return 0; // never fails +} + static BOOL CALLBACK initOnceCb(PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContext) { (void)InitOnce; // unused diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index cdbe2425f..ecdde95e1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -71,10 +71,6 @@ function(build_umf_test) set(CPL_DEFS ${CPL_DEFS} UMF_POOL_SCALABLE_ENABLED=1) endif() - if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - set(CPL_DEFS ${CPL_DEFS} UMF_POOL_DISJOINT_ENABLED=1) - endif() - set(TEST_LIBS umf_test_common ${ARG_LIBS} @@ -192,10 +188,6 @@ if(UMF_BUILD_SHARED_LIBRARY) endif() endif() -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - set(LIB_DISJOINT_POOL disjoint_pool) -endif() - if(UMF_BUILD_SHARED_LIBRARY) # if build as shared library, ba symbols won't be visible in tests set(BA_SOURCES_FOR_TEST ${BA_SOURCES}) @@ -237,32 +229,29 @@ add_umf_test( SRCS coarse_lib.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST} coarse) -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - add_umf_test( - NAME disjointPool - SRCS pools/disjoint_pool.cpp malloc_compliance_tests.cpp - LIBS disjoint_pool) +add_umf_test( + NAME disjoint_pool + SRCS pools/disjoint_pool.cpp malloc_compliance_tests.cpp + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + +add_umf_test( + NAME c_api_disjoint_pool + SRCS c_api/disjoint_pool.c ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + +if(LINUX AND (NOT UMF_DISABLE_HWLOC)) + # this test uses the file provider add_umf_test( - NAME c_api_disjoint_pool - SRCS c_api/disjoint_pool.c - LIBS disjoint_pool) - if(LINUX AND (NOT UMF_DISABLE_HWLOC)) - # this test uses the file provider - add_umf_test( - NAME disjointPoolFileProv - SRCS disjointPoolFileProv.cpp - LIBS disjoint_pool) - endif() + NAME disjoint_pool_file_prov + SRCS disjoint_pool_file_prov.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) endif() -if(UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_POOL_JEMALLOC_ENABLED +if(UMF_POOL_JEMALLOC_ENABLED AND UMF_POOL_SCALABLE_ENABLED AND (NOT UMF_DISABLE_HWLOC)) - add_umf_test( - NAME c_api_multi_pool - SRCS c_api/multi_pool.c - LIBS disjoint_pool) + add_umf_test(NAME c_api_multi_pool SRCS c_api/multi_pool.c) endif() if(UMF_POOL_JEMALLOC_ENABLED AND (NOT UMF_DISABLE_HWLOC)) @@ -293,7 +282,7 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME provider_os_memory SRCS provider_os_memory.cpp ${BA_SOURCES_FOR_TEST} - LIBS ${UMF_UTILS_FOR_TEST} ${LIB_DISJOINT_POOL}) + LIBS ${UMF_UTILS_FOR_TEST}) add_umf_test( NAME provider_os_memory_multiple_numa_nodes SRCS provider_os_memory_multiple_numa_nodes.cpp @@ -618,37 +607,33 @@ if(LINUX) # TODO add IPC tests for CUDA - if(UMF_BUILD_GPU_TESTS - AND UMF_BUILD_LEVEL_ZERO_PROVIDER - AND UMF_BUILD_LIBUMF_POOL_DISJOINT) + if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) build_umf_test( NAME ipc_level_zero_prov_consumer SRCS providers/ipc_level_zero_prov_consumer.c common/ipc_common.c providers/ipc_level_zero_prov_common.c ${UMF_UTILS_DIR}/utils_level_zero.cpp - LIBS ze_loader disjoint_pool ${UMF_UTILS_FOR_TEST}) + LIBS ze_loader ${UMF_UTILS_FOR_TEST}) build_umf_test( NAME ipc_level_zero_prov_producer SRCS providers/ipc_level_zero_prov_producer.c common/ipc_common.c providers/ipc_level_zero_prov_common.c ${UMF_UTILS_DIR}/utils_level_zero.cpp - LIBS ze_loader disjoint_pool ${UMF_UTILS_FOR_TEST}) + LIBS ze_loader ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_level_zero_prov SRC_DIR providers) endif() - if(UMF_BUILD_GPU_TESTS - AND UMF_BUILD_CUDA_PROVIDER - AND UMF_BUILD_LIBUMF_POOL_DISJOINT) + if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_CUDA_PROVIDER) build_umf_test( NAME ipc_cuda_prov_consumer SRCS providers/ipc_cuda_prov_consumer.c common/ipc_common.c providers/ipc_cuda_prov_common.c providers/cuda_helpers.cpp - LIBS cuda disjoint_pool ${UMF_UTILS_FOR_TEST}) + LIBS cuda ${UMF_UTILS_FOR_TEST}) build_umf_test( NAME ipc_cuda_prov_producer SRCS providers/ipc_cuda_prov_producer.c common/ipc_common.c providers/ipc_cuda_prov_common.c providers/cuda_helpers.cpp - LIBS cuda disjoint_pool ${UMF_UTILS_FOR_TEST}) + LIBS cuda ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_cuda_prov SRC_DIR providers) endif() else() @@ -701,41 +686,34 @@ if(LINUX ) endif() - if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) + if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLES ${EXAMPLES} level_zero_shared_memory) else() message( STATUS - "GPU level zero shared memory example requires UMF_BUILD_GPU_EXAMPLES, " - "UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT " - "to be turned ON - skipping") + "GPU level zero shared memory example requires UMF_BUILD_GPU_EXAMPLES and " + "UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping") endif() if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_CUDA_PROVIDER AND UMF_CUDA_ENABLED) set(EXAMPLES ${EXAMPLES} cuda_shared_memory) else() message( STATUS - "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES, " - "UMF_BUILD_CUDA_PROVIDER, UMF_BUILD_LIBUMF_POOL_DISJOINT " - "to be turned ON and installed CUDA libraries - skipping") + "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES " + "and UMF_BUILD_CUDA_PROVIDER to be turned ON and installed CUDA " + "libraries - skipping") endif() # TODO add IPC examples for CUDA - if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) + if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLES ${EXAMPLES} ipc_level_zero) else() message( - STATUS - "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" - ) + STATUS "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES and " + "UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping") endif() if(UMF_POOL_SCALABLE_ENABLED) diff --git a/test/c_api/disjoint_pool.c b/test/c_api/disjoint_pool.c index 4d4634def..b529497c8 100644 --- a/test/c_api/disjoint_pool.c +++ b/test/c_api/disjoint_pool.c @@ -1,10 +1,11 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include -#include "pool_disjoint.h" +#include + #include "provider_null.h" #include "test_helpers.h" #include "test_ut_asserts.h" diff --git a/test/disjointPoolFileProv.cpp b/test/disjoint_pool_file_prov.cpp similarity index 99% rename from test/disjointPoolFileProv.cpp rename to test/disjoint_pool_file_prov.cpp index 383487a87..b874d2a49 100644 --- a/test/disjointPoolFileProv.cpp +++ b/test/disjoint_pool_file_prov.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/test/pools/disjoint_pool.cpp b/test/pools/disjoint_pool.cpp index 025f546be..dad960187 100644 --- a/test/pools/disjoint_pool.cpp +++ b/test/pools/disjoint_pool.cpp @@ -4,9 +4,11 @@ #include +#include + #include "pool.hpp" +#include "pool/pool_disjoint_internal.h" #include "poolFixtures.hpp" -#include "pool_disjoint.h" #include "provider.hpp" #include "provider_null.h" #include "provider_trace.h" @@ -57,11 +59,130 @@ umf_result_t poolConfigDestroy(void *config) { using umf_test::test; using namespace umf_test; +TEST_F(test, internals) { + static umf_result_t expectedResult = UMF_RESULT_SUCCESS; + struct memory_provider : public umf_test::provider_base_t { + umf_result_t alloc(size_t size, size_t alignment, void **ptr) noexcept { + *ptr = umf_ba_global_aligned_alloc(size, alignment); + return UMF_RESULT_SUCCESS; + } + + umf_result_t free(void *ptr, [[maybe_unused]] size_t size) noexcept { + // do the actual free only when we expect the success + if (expectedResult == UMF_RESULT_SUCCESS) { + umf_ba_global_free(ptr); + } + return expectedResult; + } + + umf_result_t + get_min_page_size([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t *pageSize) noexcept { + *pageSize = 1024; + return UMF_RESULT_SUCCESS; + } + }; + umf_memory_provider_ops_t provider_ops = + umf::providerMakeCOps(); + + auto providerUnique = + wrapProviderUnique(createProviderChecked(&provider_ops, nullptr)); + + umf_memory_provider_handle_t provider_handle; + provider_handle = providerUnique.get(); + + umf_disjoint_pool_params_handle_t params = + (umf_disjoint_pool_params_handle_t)defaultPoolConfig(); + // set to maximum tracing + params->pool_trace = 3; + params->max_poolable_size = 1024 * 1024; + + // in "internals" test we use ops interface to directly manipulate the pool + // structure + umf_memory_pool_ops_t *ops = umfDisjointPoolOps(); + EXPECT_NE(ops, nullptr); + + disjoint_pool_t *pool; + umf_result_t res = ops->initialize(provider_handle, params, (void **)&pool); + EXPECT_EQ(res, UMF_RESULT_SUCCESS); + EXPECT_NE(pool, nullptr); + EXPECT_EQ(pool->provider_min_page_size, 1024); + + // check buckets sizes + size_t expected_size = DEFAULT_DISJOINT_MIN_BUCKET_SIZE; + EXPECT_EQ(pool->buckets[0]->size, expected_size); + EXPECT_EQ(pool->buckets[pool->buckets_num - 1]->size, + (size_t)1 << 31); // 2GB + for (size_t i = 0; i < pool->buckets_num; i++) { + bucket_t *bucket = pool->buckets[i]; + EXPECT_NE(bucket, nullptr); + EXPECT_EQ(bucket->size, expected_size); + + // assuming DEFAULT_DISJOINT_MIN_BUCKET_SIZE = 64, expected bucket + // sizes are: 64, 96, 128, 192, 256, ..., 2GB + if (i % 2 == 0) { + expected_size += expected_size / 2; + } else { + expected_size = DEFAULT_DISJOINT_MIN_BUCKET_SIZE << ((i + 1) / 2); + } + } + + // test small allocations + size_t size = 8; + void *ptr = ops->malloc(pool, size); + EXPECT_NE(ptr, nullptr); + + // get bucket - because of small size this should be the first bucket in + // the pool + bucket_t *bucket = pool->buckets[0]; + EXPECT_NE(bucket, nullptr); + + // check bucket stats + EXPECT_EQ(bucket->alloc_count, 1); + + // first allocation will always use external memory (newly added to the + // pool) and this is counted as allocation from the outside of the pool + EXPECT_EQ(bucket->alloc_pool_count, 0); + EXPECT_EQ(bucket->curr_slabs_in_use, 1); + + // check slab - there should be only single slab allocated + EXPECT_NE(bucket->available_slabs, nullptr); + EXPECT_EQ(bucket->available_slabs_num, 1); + EXPECT_EQ(bucket->available_slabs->next, nullptr); + slab_t *slab = bucket->available_slabs->val; + + // check slab stats + EXPECT_GE(slab->slab_size, params->slab_min_size); + EXPECT_GE(slab->num_chunks_total, slab->slab_size / bucket->size); + + // check allocation in slab + EXPECT_EQ(slab->chunks[0], true); + EXPECT_EQ(slab->chunks[1], false); + EXPECT_EQ(slab->first_free_chunk_idx, 1); + + // TODO: + // * multiple alloc + free from single bucket + // * alignments + // * full slab alloc + // * slab overflow + // * chunked slabs + // * multiple alloc + free from different buckets + // * alloc something outside pool (> MaxPoolableSize) + // * test capacity + // * check minBucketSize + // * test large objects + // * check available_slabs_num + + // cleanup + ops->finalize(pool); + umfDisjointPoolParamsDestroy(params); +} + TEST_F(test, freeErrorPropagation) { static umf_result_t expectedResult = UMF_RESULT_SUCCESS; struct memory_provider : public umf_test::provider_base_t { - umf_result_t alloc(size_t size, size_t, void **ptr) noexcept { - *ptr = umf_ba_global_alloc(size); + umf_result_t alloc(size_t size, size_t alignment, void **ptr) noexcept { + *ptr = umf_ba_global_aligned_alloc(size, alignment); return UMF_RESULT_SUCCESS; } @@ -117,8 +238,8 @@ TEST_F(test, sharedLimits) { static size_t numFrees = 0; struct memory_provider : public umf_test::provider_base_t { - umf_result_t alloc(size_t size, size_t, void **ptr) noexcept { - *ptr = umf_ba_global_alloc(size); + umf_result_t alloc(size_t size, size_t alignment, void **ptr) noexcept { + *ptr = umf_ba_global_aligned_alloc(size, alignment); numAllocs++; return UMF_RESULT_SUCCESS; } diff --git a/test/provider_os_memory.cpp b/test/provider_os_memory.cpp index ddc44548e..5b647b642 100644 --- a/test/provider_os_memory.cpp +++ b/test/provider_os_memory.cpp @@ -9,10 +9,8 @@ #include "test_helpers.h" #include -#include -#ifdef UMF_POOL_DISJOINT_ENABLED #include -#endif +#include #ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif @@ -428,8 +426,6 @@ umf_result_t destroyOsMemoryProviderParamsShared(void *params) { HostMemoryAccessor hostAccessor; -#ifdef UMF_POOL_DISJOINT_ENABLED - void *createDisjointPoolParams() { umf_disjoint_pool_params_handle_t params = nullptr; umf_result_t res = umfDisjointPoolParamsCreate(¶ms); @@ -465,14 +461,10 @@ umf_result_t destroyDisjointPoolParams(void *params) { static_cast(params)); } -#endif - static std::vector ipcTestParamsList = { -#ifdef UMF_POOL_DISJOINT_ENABLED {umfDisjointPoolOps(), createDisjointPoolParams, destroyDisjointPoolParams, umfOsMemoryProviderOps(), createOsMemoryProviderParamsShared, destroyOsMemoryProviderParamsShared, &hostAccessor}, -#endif #ifdef UMF_POOL_JEMALLOC_ENABLED {umfJemallocPoolOps(), nullptr, nullptr, umfOsMemoryProviderOps(), createOsMemoryProviderParamsShared, destroyOsMemoryProviderParamsShared, diff --git a/test/supp/drd-umf_test-disjoint_pool.supp b/test/supp/drd-umf_test-disjoint_pool.supp new file mode 100644 index 000000000..24a44b93d --- /dev/null +++ b/test/supp/drd-umf_test-disjoint_pool.supp @@ -0,0 +1,7 @@ +{ + False-positive ConflictingAccess in critnib_insert + drd:ConflictingAccess + fun:store + fun:critnib_insert + ... +} diff --git a/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp b/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp deleted file mode 100644 index 2f669eb31..000000000 --- a/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp +++ /dev/null @@ -1,24 +0,0 @@ -{ - Incompatibility with helgrind's implementation (pthread_mutex_lock with a pthread_rwlock_t* argument) - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:_ZL20__gthread_mutex_lockP15pthread_mutex_t - ... -} - -{ - Incompatibility with helgrind's implementation (pthread_mutex_unlock with a pthread_rwlock_t* argument) - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:_ZL22__gthread_mutex_unlockP15pthread_mutex_t - ... -} - -{ - Incompatibility with helgrind's implementation (lock order "0xA before 0xB" violated) - Helgrind:LockOrder - obj:*vgpreload_helgrind-amd64-linux.so - fun:_ZStL23__glibcxx_rwlock_wrlockP16pthread_rwlock_t - fun:_ZNSt22__shared_mutex_pthread4lockEv - ... -} diff --git a/test/supp/helgrind-umf_test-disjointPool.supp b/test/supp/helgrind-umf_test-disjoint_pool.supp similarity index 53% rename from test/supp/helgrind-umf_test-disjointPool.supp rename to test/supp/helgrind-umf_test-disjoint_pool.supp index 3ada32736..929674e8e 100644 --- a/test/supp/helgrind-umf_test-disjointPool.supp +++ b/test/supp/helgrind-umf_test-disjoint_pool.supp @@ -29,25 +29,9 @@ } { - Incompatibility with helgrind's implementation ("pthread_rwlock_{rd,rw}lock with a pthread_mutex_t* argument") - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:*glibcxx_rwlock_wrlock*pthread_rwlock_t - ... -} - -{ - Incompatibility with helgrind's implementation ("pthread_rwlock_unlock with a pthread_mutex_t* argument") - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:*glibcxx_rwlock_unlock*pthread_rwlock_t - ... -} - -{ - Incompatibility with helgrind's implementation ("pthread_rwlock_{rd,rw}lock with a pthread_mutex_t* argument") - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:*glibcxx_rwlock_rdlock*pthread_rwlock_t* + False-positive Race in critnib_insert + Helgrind:Race + fun:store + fun:critnib_insert ... } diff --git a/test/test_installation.py b/test/test_installation.py index b5dd676dc..ef30ac759 100644 --- a/test/test_installation.py +++ b/test/test_installation.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -278,11 +278,6 @@ def parse_arguments(self) -> argparse.Namespace: action="store_true", help="Add this argument if the proxy library should be built together with the UMF library", ) - self.parser.add_argument( - "--disjoint-pool", - action="store_true", - help="Add this argument if the UMF was built with Disjoint Pool enabled", - ) self.parser.add_argument( "--umf-version", action="store", @@ -299,8 +294,6 @@ def run(self) -> None: build_dir = Path(workspace_dir, self.args.build_dir) install_dir = Path(workspace_dir, self.args.install_dir) pools = [] - if self.args.disjoint_pool: - pools.append("disjoint_pool") umf_version = Version(self.args.umf_version) From 41429f486e13712af242bc21330a121d20f5d74f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Mon, 10 Feb 2025 14:58:49 +0100 Subject: [PATCH 05/12] reenable disjoint pool benchmark --- benchmark/benchmark.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index ad29e9029..401b06d26 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -154,13 +154,11 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix, UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_fix) ->Apply(&default_multiple_alloc_fix_size); -// TODO: debug why this crashes -/*UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) ->Apply(&default_multiple_alloc_uniform_size); -*/ #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, From fd56adcd6e2993e99d002916289f5f25b6f2df96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Fri, 7 Feb 2025 15:42:45 +0100 Subject: [PATCH 06/12] fix all sizeof MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index e2288e49e..267791333 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -87,12 +87,12 @@ static slab_t *create_slab(bucket_t *bucket, bool full_size) { } else { slab->num_chunks_total = bucket_slab_min_size(bucket) / bucket->size; slab->chunks = - umf_ba_global_alloc(sizeof(bool) * slab->num_chunks_total); + umf_ba_global_alloc(sizeof(*slab->chunks) * slab->num_chunks_total); if (slab->chunks == NULL) { LOG_ERR("allocation of slab chunks failed!"); goto free_slab_iter; } - memset(slab->chunks, 0, sizeof(bool) * slab->num_chunks_total); + memset(slab->chunks, 0, sizeof(*slab->chunks) * slab->num_chunks_total); } // if slab_min_size is not a multiple of bucket size, we would have some // padding at the end of the slab @@ -703,8 +703,8 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { disjoint_pool->buckets_num += 2; } - disjoint_pool->buckets = - umf_ba_global_alloc(sizeof(bucket_t *) * disjoint_pool->buckets_num); + disjoint_pool->buckets = umf_ba_global_alloc( + sizeof(*disjoint_pool->buckets) * disjoint_pool->buckets_num); int i = 0; Size1 = ts1; @@ -964,8 +964,7 @@ umf_memory_pool_ops_t *umfDisjointPoolOps(void) { umf_disjoint_pool_shared_limits_t * umfDisjointPoolSharedLimitsCreate(size_t max_size) { - umf_disjoint_pool_shared_limits_t *ptr = - umf_ba_global_alloc(sizeof(umf_disjoint_pool_shared_limits_t)); + umf_disjoint_pool_shared_limits_t *ptr = umf_ba_global_alloc(sizeof(*ptr)); //umf_ba_global_alloc(sizeof(*ptr)); ptr->max_size = max_size; ptr->total_size = 0; @@ -1109,7 +1108,7 @@ umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - char *newName = umf_ba_global_alloc(sizeof(char) * (strlen(name) + 1)); + char *newName = umf_ba_global_alloc(sizeof(*newName) * (strlen(name) + 1)); if (newName == NULL) { LOG_ERR("cannot allocate memory for disjoint pool name"); return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; From a74fcb404a1d421de5548d5b2392c3efc2ab56f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Fri, 7 Feb 2025 15:46:41 +0100 Subject: [PATCH 07/12] add missing error check for allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 267791333..bf1a841a1 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -965,7 +965,10 @@ umf_memory_pool_ops_t *umfDisjointPoolOps(void) { umf_disjoint_pool_shared_limits_t * umfDisjointPoolSharedLimitsCreate(size_t max_size) { umf_disjoint_pool_shared_limits_t *ptr = umf_ba_global_alloc(sizeof(*ptr)); - //umf_ba_global_alloc(sizeof(*ptr)); + if (ptr == NULL) { + LOG_ERR("cannot allocate memory for disjoint pool shared limits"); + return NULL; + } ptr->max_size = max_size; ptr->total_size = 0; return ptr; From 9ec909e01506fdcf5027cf36a411594c96644190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Fri, 7 Feb 2025 15:51:28 +0100 Subject: [PATCH 08/12] remove not needed forward declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index bf1a841a1..b4488ad52 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -16,16 +16,11 @@ #include "utils_sanitizers.h" // Forward declarations -static slab_t *create_slab(bucket_t *bucket, bool full_size); -static void destroy_slab(slab_t *slab); - static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool); static bool bucket_can_pool(bucket_t *bucket); static void bucket_decrement_pool(bucket_t *bucket, bool *from_pool); static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool); -static slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, - bool *from_pool); static __TLS umf_result_t TLS_last_allocation_error; From 75dea9524d5a3146403319c54ad533687e97f007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Fri, 7 Feb 2025 17:17:07 +0100 Subject: [PATCH 09/12] remove distiguation between "chunked slab" and "full slab" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead having two modes - we can have only chunked slabs and full slabs are just a chunked slab with one chunk. This removes extra complexity in the code. Should not have performance impact, as we added few extra steps for big allocations, but removed extra branch in the code. Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 124 ++++++++------------------------------- 1 file changed, 23 insertions(+), 101 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index b4488ad52..9cfae16fe 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -18,7 +18,7 @@ // Forward declarations static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool); static bool bucket_can_pool(bucket_t *bucket); -static void bucket_decrement_pool(bucket_t *bucket, bool *from_pool); +static void bucket_decrement_pool(bucket_t *bucket); static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool); @@ -52,7 +52,7 @@ static size_t bucket_slab_alloc_size(bucket_t *bucket) { return utils_max(bucket->size, bucket_slab_min_size(bucket)); } -static slab_t *create_slab(bucket_t *bucket, bool full_size) { +static slab_t *create_slab(bucket_t *bucket) { assert(bucket); umf_result_t res = UMF_RESULT_SUCCESS; @@ -76,19 +76,16 @@ static slab_t *create_slab(bucket_t *bucket, bool full_size) { slab->iter->val = slab; slab->iter->prev = slab->iter->next = NULL; - if (full_size) { - slab->num_chunks_total = 0; - slab->chunks = NULL; - } else { - slab->num_chunks_total = bucket_slab_min_size(bucket) / bucket->size; - slab->chunks = - umf_ba_global_alloc(sizeof(*slab->chunks) * slab->num_chunks_total); - if (slab->chunks == NULL) { - LOG_ERR("allocation of slab chunks failed!"); - goto free_slab_iter; - } - memset(slab->chunks, 0, sizeof(*slab->chunks) * slab->num_chunks_total); + slab->num_chunks_total = + utils_max(bucket_slab_min_size(bucket) / bucket->size, 1); + slab->chunks = + umf_ba_global_alloc(sizeof(*slab->chunks) * slab->num_chunks_total); + if (slab->chunks == NULL) { + LOG_ERR("allocation of slab chunks failed!"); + goto free_slab_iter; } + memset(slab->chunks, 0, sizeof(*slab->chunks) * slab->num_chunks_total); + // if slab_min_size is not a multiple of bucket size, we would have some // padding at the end of the slab slab->slab_size = bucket_slab_alloc_size(bucket); @@ -157,9 +154,6 @@ static size_t slab_find_first_available_chunk_idx(const slab_t *slab) { } static void *slab_get_chunk(slab_t *slab) { - // slab has to be allocated in chunk mode - assert(slab->chunks && slab->num_chunks_total > 0); - // free chunk must exist, otherwise we would have allocated another slab const size_t chunk_idx = slab_find_first_available_chunk_idx(slab); assert(chunk_idx != SIZE_MAX); @@ -356,8 +350,8 @@ static size_t bucket_chunk_cut_off(bucket_t *bucket) { return bucket_slab_min_size(bucket) / 2; } -static slab_t *bucket_create_slab(bucket_t *bucket, bool full_size) { - slab_t *slab = create_slab(bucket, full_size); +static slab_t *bucket_create_slab(bucket_t *bucket) { + slab_t *slab = create_slab(bucket); if (slab == NULL) { LOG_ERR("create_slab failed!") return NULL; @@ -377,69 +371,20 @@ static slab_t *bucket_create_slab(bucket_t *bucket, bool full_size) { return slab; } -static slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, - bool *from_pool) { - // return a slab that will be used for a single allocation - if (bucket->available_slabs == NULL) { - bucket_create_slab(bucket, true /* full size */); - *from_pool = false; - } else { - bucket_decrement_pool(bucket, from_pool); - } - - return bucket->available_slabs; -} - -// NOTE: this function must be called under bucket->bucket_lock -static void *bucket_get_free_slab(bucket_t *bucket, bool *from_pool) { - slab_list_item_t *slab_it = bucket_get_avail_full_slab(bucket, from_pool); - if (slab_it == NULL) { - return NULL; - } - - slab_t *slab = slab_it->val; - void *ptr = slab_get(slab); - - DL_DELETE(bucket->available_slabs, slab_it); - bucket->available_slabs_num--; - slab_it->prev = NULL; - DL_PREPEND(bucket->unavailable_slabs, slab_it); - - return ptr; -} - -// NOTE: this function must be called under bucket->bucket_lock -static void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool) { - slab_list_item_t *slab_it = slab->iter; - assert(slab_it->val != NULL); - *to_pool = bucket_can_pool(bucket); - if (*to_pool) { - DL_DELETE(bucket->unavailable_slabs, slab_it); - slab_it->prev = NULL; - DL_PREPEND(bucket->available_slabs, slab_it); - bucket->available_slabs_num++; - } else { - slab_unreg(slab_it->val); - DL_DELETE(bucket->unavailable_slabs, slab_it); - destroy_slab(slab_it->val); - } -} - static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool) { if (bucket->available_slabs == NULL) { - bucket_create_slab(bucket, false /* chunked */); + bucket_create_slab(bucket); *from_pool = false; } else { slab_t *slab = bucket->available_slabs->val; + // Allocation from existing slab is treated as from pool for statistics. + *from_pool = true; if (slab->num_chunks_allocated == 0) { // If this was an empty slab, it was in the pool. // Now it is no longer in the pool, so update count. --bucket->chunked_slabs_in_pool; - bucket_decrement_pool(bucket, from_pool); - } else { - // Allocation from existing slab is treated as from pool for statistics. - *from_pool = true; + bucket_decrement_pool(bucket); } } @@ -475,10 +420,7 @@ static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) { in_pool * bucket_slab_alloc_size(bucket); } -static void bucket_decrement_pool(bucket_t *bucket, bool *from_pool) { - // If a slab was available in the pool then note that the current pooled - // size has reduced by the size of a slab in this bucket. - *from_pool = true; +static void bucket_decrement_pool(bucket_t *bucket) { bucket_update_stats(bucket, 1, -1); utils_fetch_and_add64(&bucket->shared_limits->total_size, -(long long)bucket_slab_alloc_size(bucket)); @@ -487,13 +429,7 @@ static void bucket_decrement_pool(bucket_t *bucket, bool *from_pool) { static bool bucket_can_pool(bucket_t *bucket) { size_t new_free_slabs_in_bucket; - // check if this bucket is used in chunked form or as full slabs - bool chunked_bucket = bucket->size <= bucket_chunk_cut_off(bucket); - if (chunked_bucket) { - new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1; - } else { - new_free_slabs_in_bucket = bucket->available_slabs_num + 1; - } + new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1; // we keep at most params.capacity slabs in the pool if (bucket_capacity(bucket) >= new_free_slabs_in_bucket) { @@ -509,9 +445,7 @@ static bool bucket_can_pool(bucket_t *bucket) { if (utils_compare_exchange(&bucket->shared_limits->total_size, &pool_size, &new_pool_size)) { - if (chunked_bucket) { - ++bucket->chunked_slabs_in_pool; - } + ++bucket->chunked_slabs_in_pool; bucket_update_stats(bucket, -1, 1); return true; @@ -614,11 +548,7 @@ static void *disjoint_pool_allocate(disjoint_pool_t *pool, size_t size) { utils_mutex_lock(&bucket->bucket_lock); bool from_pool = false; - if (size > bucket_chunk_cut_off(bucket)) { - ptr = bucket_get_free_slab(bucket, &from_pool); - } else { - ptr = bucket_get_free_chunk(bucket, &from_pool); - } + ptr = bucket_get_free_chunk(bucket, &from_pool); if (ptr == NULL) { TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -799,11 +729,7 @@ void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) { utils_mutex_lock(&bucket->bucket_lock); - if (aligned_size > bucket_chunk_cut_off(bucket)) { - ptr = bucket_get_free_slab(bucket, &from_pool); - } else { - ptr = bucket_get_free_chunk(bucket, &from_pool); - } + ptr = bucket_get_free_chunk(bucket, &from_pool); if (ptr == NULL) { TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -889,11 +815,7 @@ umf_result_t disjoint_pool_free(void *pool, void *ptr) { utils_mutex_lock(&bucket->bucket_lock); utils_annotate_memory_inaccessible(ptr, bucket->size); - if (bucket->size <= bucket_chunk_cut_off(bucket)) { - bucket_free_chunk(bucket, ptr, slab, &to_pool); - } else { - bucket_free_slab(bucket, slab, &to_pool); - } + bucket_free_chunk(bucket, ptr, slab, &to_pool); if (disjoint_pool->params.pool_trace > 1) { bucket->free_count++; From 6bb22d89ae2cfe4d7500678a5c3fc903aaaea488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Mon, 10 Feb 2025 17:26:10 +0100 Subject: [PATCH 10/12] do not allocate slab->iter dynamically MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no reason to do so. This is only extra performance overhead. Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 21 ++++++--------------- src/pool/pool_disjoint_internal.h | 12 ++++++------ 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 9cfae16fe..a1abbc414 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -68,13 +68,8 @@ static slab_t *create_slab(bucket_t *bucket) { slab->first_free_chunk_idx = 0; slab->bucket = bucket; - slab->iter = umf_ba_global_alloc(sizeof(*slab->iter)); - if (slab->iter == NULL) { - LOG_ERR("allocation of new slab iter failed!"); - goto free_slab; - } - slab->iter->val = slab; - slab->iter->prev = slab->iter->next = NULL; + slab->iter.val = slab; + slab->iter.prev = slab->iter.next = NULL; slab->num_chunks_total = utils_max(bucket_slab_min_size(bucket) / bucket->size, 1); @@ -82,7 +77,7 @@ static slab_t *create_slab(bucket_t *bucket) { umf_ba_global_alloc(sizeof(*slab->chunks) * slab->num_chunks_total); if (slab->chunks == NULL) { LOG_ERR("allocation of slab chunks failed!"); - goto free_slab_iter; + goto free_slab; } memset(slab->chunks, 0, sizeof(*slab->chunks) * slab->num_chunks_total); @@ -111,9 +106,6 @@ static slab_t *create_slab(bucket_t *bucket) { free_slab_chunks: umf_ba_global_free(slab->chunks); -free_slab_iter: - umf_ba_global_free(slab->iter); - free_slab: umf_ba_global_free(slab); return NULL; @@ -131,7 +123,6 @@ static void destroy_slab(slab_t *slab) { } umf_ba_global_free(slab->chunks); - umf_ba_global_free(slab->iter); umf_ba_global_free(slab); } @@ -296,7 +287,7 @@ static void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, // in case if the slab was previously full and now has single available // chunk, it should be moved to the list of available slabs if (slab_get_num_free_chunks(slab) == 1) { - slab_list_item_t *slab_it = slab->iter; + slab_list_item_t *slab_it = &slab->iter; assert(slab_it->val != NULL); DL_DELETE(bucket->unavailable_slabs, slab_it); DL_PREPEND(bucket->available_slabs, slab_it); @@ -312,7 +303,7 @@ static void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, *to_pool = bucket_can_pool(bucket); if (*to_pool == false) { // remove slab - slab_list_item_t *slab_it = slab->iter; + slab_list_item_t *slab_it = &slab->iter; assert(slab_it->val != NULL); slab_unreg(slab_it->val); DL_DELETE(bucket->available_slabs, slab_it); @@ -364,7 +355,7 @@ static slab_t *bucket_create_slab(bucket_t *bucket) { return NULL; } - DL_PREPEND(bucket->available_slabs, slab->iter); + DL_PREPEND(bucket->available_slabs, &slab->iter); bucket->available_slabs_num++; bucket_update_stats(bucket, 1, 0); diff --git a/src/pool/pool_disjoint_internal.h b/src/pool/pool_disjoint_internal.h index 56a25e611..3d656689c 100644 --- a/src/pool/pool_disjoint_internal.h +++ b/src/pool/pool_disjoint_internal.h @@ -81,6 +81,11 @@ typedef struct bucket_t { size_t max_slabs_in_use; } bucket_t; +typedef struct slab_list_item_t { + slab_t *val; + struct slab_list_item_t *prev, *next; +} slab_list_item_t; + // Represents the allocated memory block of size 'slab_min_size' // Internally, it splits the memory block into chunks. The number of // chunks depends on the size of a Bucket which created the Slab. @@ -107,14 +112,9 @@ typedef struct slab_t { // Store iterator to the corresponding node in avail/unavail list // to achieve O(1) removal - slab_list_item_t *iter; + slab_list_item_t iter; } slab_t; -typedef struct slab_list_item_t { - slab_t *val; - struct slab_list_item_t *prev, *next; -} slab_list_item_t; - typedef struct umf_disjoint_pool_shared_limits_t { size_t max_size; size_t total_size; // requires atomic access From b6181b62a90c42322aad61fa883efbfa5b05a6df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Tue, 11 Feb 2025 15:02:21 +0100 Subject: [PATCH 11/12] rename slab_Reg to pool_register_slab MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this improves redability, and reduces "spaghetti" in the code Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index a1abbc414..090d1dd9e 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -199,9 +199,7 @@ static bool slab_has_avail(const slab_t *slab) { return slab->num_chunks_allocated < slab->num_chunks_total; } -static umf_result_t slab_reg(slab_t *slab) { - bucket_t *bucket = slab->bucket; - disjoint_pool_t *pool = bucket->pool; +static umf_result_t pool_register_slab(disjoint_pool_t *pool, slab_t *slab) { critnib *slabs = pool->known_slabs; // NOTE: changed vs original DisjointPool implementation - currently slab @@ -225,9 +223,7 @@ static umf_result_t slab_reg(slab_t *slab) { return res; } -static umf_result_t slab_unreg(slab_t *slab) { - bucket_t *bucket = slab->bucket; - disjoint_pool_t *pool = bucket->pool; +static umf_result_t pool_unregister_slab(disjoint_pool_t *pool, slab_t *slab) { critnib *slabs = pool->known_slabs; void *slab_addr = slab_get(slab); @@ -305,7 +301,7 @@ static void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, // remove slab slab_list_item_t *slab_it = &slab->iter; assert(slab_it->val != NULL); - slab_unreg(slab_it->val); + pool_unregister_slab(bucket->pool, slab_it->val); DL_DELETE(bucket->available_slabs, slab_it); bucket->available_slabs_num--; destroy_slab(slab_it->val); @@ -348,7 +344,7 @@ static slab_t *bucket_create_slab(bucket_t *bucket) { return NULL; } - umf_result_t res = slab_reg(slab); + umf_result_t res = pool_register_slab(bucket->pool, slab); if (res != UMF_RESULT_SUCCESS) { LOG_ERR("slab_reg failed!") destroy_slab(slab); From 368ab19cdc0dfd7a6b2e6b4b00c7b18dfca0fcc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Tue, 11 Feb 2025 16:37:37 +0100 Subject: [PATCH 12/12] Rename bucket_capacity to bucket_ma_pooled_slabs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this function has nothing releated to capacity. input parameter to disjointpool should also be renamed, but this is topic for the diffrent pull request Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 090d1dd9e..ef7b3875d 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -378,8 +378,8 @@ static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, return bucket->available_slabs; } -static size_t bucket_capacity(bucket_t *bucket) { - // For buckets used in chunked mode, just one slab in pool is sufficient. +static size_t bucket_max_pooled_slabs(bucket_t *bucket) { + // For small buckets where slabs are split to chunks, just one pooled slab is sufficient. // For larger buckets, the capacity could be more and is adjustable. if (bucket->size <= bucket_chunk_cut_off(bucket)) { return 1; @@ -419,7 +419,7 @@ static bool bucket_can_pool(bucket_t *bucket) { new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1; // we keep at most params.capacity slabs in the pool - if (bucket_capacity(bucket) >= new_free_slabs_in_bucket) { + if (bucket_max_pooled_slabs(bucket) >= new_free_slabs_in_bucket) { size_t pool_size = 0; utils_atomic_load_acquire(&bucket->shared_limits->total_size, &pool_size);