From 4094ad80790e0b60d5975d47a4bca67cf65ba48c Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 4 Mar 2025 15:25:53 +0000 Subject: [PATCH 1/4] Revert "temporary disable DP MT benchmark" --- benchmark/benchmark.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 60636a559f..e5e055a551 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -96,9 +96,7 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) ->Apply(&default_multiple_alloc_uniform_size) - ->Apply(&singlethreaded); -// TODO: change to multithreaded -//->Apply(&multithreaded); + ->Apply(&multithreaded); #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, From fcf0a374716bacc31e8ebd966fb6911aec8d395e Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 4 Mar 2025 15:30:57 +0000 Subject: [PATCH 2/4] cleanup atomics --- src/critnib/critnib.c | 74 ++++----- src/ipc_cache.c | 2 +- src/libumf.c | 6 +- src/pool/pool_disjoint.c | 7 +- src/pool/pool_disjoint_internal.h | 2 +- src/provider/provider_os_memory.c | 2 +- src/provider/provider_os_memory_internal.h | 4 +- src/provider/provider_tracking.c | 12 +- src/utils/utils_concurrency.h | 155 +++++++++++++----- test/supp/drd-umf_test-disjoint_pool.supp | 2 +- test/supp/drd-umf_test-ipc.supp | 27 +++ .../drd-umf_test-jemalloc_coarse_devdax.supp | 2 +- .../drd-umf_test-jemalloc_coarse_file.supp | 2 +- ...d-umf_test-provider_devdax_memory_ipc.supp | 1 + ...drd-umf_test-provider_file_memory_ipc.supp | 18 ++ .../supp/drd-umf_test-provider_os_memory.supp | 1 + .../supp/helgrind-umf_test-disjoint_pool.supp | 2 +- test/supp/helgrind-umf_test-ipc.supp | 39 ++++- ...grind-umf_test-jemalloc_coarse_devdax.supp | 2 +- ...elgrind-umf_test-jemalloc_coarse_file.supp | 2 +- ...d-umf_test-provider_devdax_memory_ipc.supp | 1 + ...ind-umf_test-provider_file_memory_ipc.supp | 30 +++- .../helgrind-umf_test-provider_os_memory.supp | 1 + 23 files changed, 288 insertions(+), 106 deletions(-) diff --git a/src/critnib/critnib.c b/src/critnib/critnib.c index 62d14af73d..394a671248 100644 --- a/src/critnib/critnib.c +++ b/src/critnib/critnib.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -133,24 +133,6 @@ struct critnib { struct utils_mutex_t mutex; /* writes/removes */ }; -/* - * atomic load - */ -static void load(void *src, void *dst) { - utils_atomic_load_acquire((word *)src, (word *)dst); -} - -static void load64(uint64_t *src, uint64_t *dst) { - utils_atomic_load_acquire(src, dst); -} - -/* - * atomic store - */ -static void store(void *dst, void *src) { - utils_atomic_store_release((word *)dst, (word)src); -} - /* * internal: is_leaf -- check tagged pointer for leafness */ @@ -303,7 +285,7 @@ static void free_leaf(struct critnib *__restrict c, */ static struct critnib_leaf *alloc_leaf(struct critnib *__restrict c) { if (!c->deleted_leaf) { - return umf_ba_global_alloc(sizeof(struct critnib_leaf)); + return umf_ba_global_aligned_alloc(sizeof(struct critnib_leaf), 8); } struct critnib_leaf *k = c->deleted_leaf; @@ -343,10 +325,8 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { struct critnib_node *n = c->root; if (!n) { - store(&c->root, kn); - + utils_atomic_store_release_ptr((void **)&c->root, kn); utils_mutex_unlock(&c->mutex); - return 0; } @@ -361,7 +341,8 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { if (!n) { n = prev; - store(&n->child[slice_index(key, n->shift)], kn); + utils_atomic_store_release_ptr( + (void **)&n->child[slice_index(key, n->shift)], kn); utils_mutex_unlock(&c->mutex); @@ -406,7 +387,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { m->child[slice_index(path, sh)] = n; m->shift = sh; m->path = key & path_mask(sh); - store(parent, m); + utils_atomic_store_release_ptr((void **)parent, m); utils_mutex_unlock(&c->mutex); @@ -427,7 +408,8 @@ void *critnib_remove(struct critnib *c, word key) { goto not_found; } - word del = (utils_atomic_increment(&c->remove_count) - 1) % DELETED_LIFE; + word del = + (utils_atomic_increment_u64(&c->remove_count) - 1) % DELETED_LIFE; free_node(c, c->pending_del_nodes[del]); free_leaf(c, c->pending_del_leaves[del]); c->pending_del_nodes[del] = NULL; @@ -436,7 +418,7 @@ void *critnib_remove(struct critnib *c, word key) { if (is_leaf(n)) { k = to_leaf(n); if (k->key == key) { - store(&c->root, NULL); + utils_atomic_store_release_ptr((void **)&c->root, NULL); goto del_leaf; } @@ -466,7 +448,8 @@ void *critnib_remove(struct critnib *c, word key) { goto not_found; } - store(&n->child[slice_index(key, n->shift)], NULL); + utils_atomic_store_release_ptr( + (void **)&n->child[slice_index(key, n->shift)], NULL); /* Remove the node if there's only one remaining child. */ int ochild = -1; @@ -482,7 +465,7 @@ void *critnib_remove(struct critnib *c, word key) { ASSERTne(ochild, -1); - store(n_parent, n->child[ochild]); + utils_atomic_store_release_ptr((void **)n_parent, n->child[ochild]); c->pending_del_nodes[del] = n; del_leaf: @@ -511,8 +494,8 @@ void *critnib_get(struct critnib *c, word key) { do { struct critnib_node *n; - load64(&c->remove_count, &wrs1); - load(&c->root, &n); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs1); + utils_atomic_load_acquire_ptr((void **)&c->root, (void **)&n); /* * critbit algorithm: dive into the tree, looking at nothing but @@ -520,13 +503,14 @@ void *critnib_get(struct critnib *c, word key) { * going wrong way if our path is missing, but that's ok... */ while (n && !is_leaf(n)) { - load(&n->child[slice_index(key, n->shift)], &n); + utils_atomic_load_acquire_ptr( + (void **)&n->child[slice_index(key, n->shift)], (void **)&n); } /* ... as we check it at the end. */ struct critnib_leaf *k = to_leaf(n); res = (n && k->key == key) ? k->value : NULL; - load64(&c->remove_count, &wrs2); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs2); } while (wrs1 + DELETED_LIFE <= wrs2); return res; @@ -597,7 +581,7 @@ static struct critnib_leaf *find_le(struct critnib_node *__restrict n, /* recursive call: follow the path */ { struct critnib_node *m; - load(&n->child[nib], &m); + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&m); struct critnib_leaf *k = find_le(m, key); if (k) { return k; @@ -611,7 +595,7 @@ static struct critnib_leaf *find_le(struct critnib_node *__restrict n, */ for (; nib > 0; nib--) { struct critnib_node *m; - load(&n->child[nib - 1], &m); + utils_atomic_load_acquire_ptr((void **)&n->child[nib - 1], (void **)&m); if (m) { n = m; if (is_leaf(n)) { @@ -635,12 +619,12 @@ void *critnib_find_le(struct critnib *c, word key) { void *res; do { - load64(&c->remove_count, &wrs1); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs1); struct critnib_node *n; /* avoid a subtle TOCTOU */ - load(&c->root, &n); + utils_atomic_load_acquire_ptr((void **)&c->root, (void **)&n); struct critnib_leaf *k = n ? find_le(n, key) : NULL; res = k ? k->value : NULL; - load64(&c->remove_count, &wrs2); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs2); } while (wrs1 + DELETED_LIFE <= wrs2); return res; @@ -694,7 +678,7 @@ static struct critnib_leaf *find_ge(struct critnib_node *__restrict n, unsigned nib = slice_index(key, n->shift); { struct critnib_node *m; - load(&n->child[nib], &m); + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&m); struct critnib_leaf *k = find_ge(m, key); if (k) { return k; @@ -703,7 +687,7 @@ static struct critnib_leaf *find_ge(struct critnib_node *__restrict n, for (; nib < NIB; nib++) { struct critnib_node *m; - load(&n->child[nib + 1], &m); + utils_atomic_load_acquire_ptr((void **)&n->child[nib + 1], (void **)&m); if (m) { n = m; if (is_leaf(n)) { @@ -741,9 +725,9 @@ int critnib_find(struct critnib *c, uintptr_t key, enum find_dir_t dir, } do { - load64(&c->remove_count, &wrs1); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs1); struct critnib_node *n; - load(&c->root, &n); + utils_atomic_load_acquire_ptr((void **)&c->root, (void **)&n); if (dir < 0) { k = find_le(n, key); @@ -751,7 +735,9 @@ int critnib_find(struct critnib *c, uintptr_t key, enum find_dir_t dir, k = find_ge(n, key); } else { while (n && !is_leaf(n)) { - load(&n->child[slice_index(key, n->shift)], &n); + utils_atomic_load_acquire_ptr( + (void **)&n->child[slice_index(key, n->shift)], + (void **)&n); } struct critnib_leaf *kk = to_leaf(n); @@ -761,7 +747,7 @@ int critnib_find(struct critnib *c, uintptr_t key, enum find_dir_t dir, _rkey = k->key; _rvalue = k->value; } - load64(&c->remove_count, &wrs2); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs2); } while (wrs1 + DELETED_LIFE <= wrs2); if (k) { diff --git a/src/ipc_cache.c b/src/ipc_cache.c index cab5fc478d..6d5d39e4f2 100644 --- a/src/ipc_cache.c +++ b/src/ipc_cache.c @@ -232,7 +232,7 @@ umf_result_t umfIpcOpenedCacheGet(ipc_opened_cache_handle_t cache, exit: if (ret == UMF_RESULT_SUCCESS) { - utils_atomic_increment(&entry->ref_count); + utils_atomic_increment_u64(&entry->ref_count); *retEntry = &entry->value; } diff --git a/src/libumf.c b/src/libumf.c index f8f6cc61ff..e357b2583a 100644 --- a/src/libumf.c +++ b/src/libumf.c @@ -24,10 +24,10 @@ umf_memory_tracker_handle_t TRACKER = NULL; -static unsigned long long umfRefCount = 0; +static uint64_t umfRefCount = 0; int umfInit(void) { - if (utils_fetch_and_add64(&umfRefCount, 1) == 0) { + if (utils_fetch_and_add_u64(&umfRefCount, 1) == 0) { utils_log_init(); TRACKER = umfMemoryTrackerCreate(); if (!TRACKER) { @@ -54,7 +54,7 @@ int umfInit(void) { } void umfTearDown(void) { - if (utils_fetch_and_add64(&umfRefCount, -1) == 1) { + if (utils_fetch_and_sub_u64(&umfRefCount, 1) == 1) { #if !defined(_WIN32) && !defined(UMF_NO_HWLOC) umfMemspaceHostAllDestroy(); umfMemspaceHighestCapacityDestroy(); diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 9adb1a7a4f..7a2f327e46 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -20,6 +21,7 @@ #include "provider/provider_tracking.h" #include "uthash/utlist.h" #include "utils_common.h" +#include "utils_concurrency.h" #include "utils_log.h" #include "utils_math.h" @@ -523,7 +525,7 @@ static void disjoint_pool_print_stats(disjoint_pool_t *pool) { utils_mutex_unlock(&bucket->bucket_lock); } - LOG_DEBUG("current pool size: %zu", + LOG_DEBUG("current pool size: %" PRIu64, disjoint_pool_get_limits(pool)->total_size); LOG_DEBUG("suggested setting=;%c%s:%zu,%zu,64K", (char)tolower(name[0]), (name + 1), high_bucket_size, high_peak_slabs_in_use); @@ -864,7 +866,8 @@ umf_result_t disjoint_pool_free(void *pool, void *ptr) { if (disjoint_pool->params.pool_trace > 2) { const char *name = disjoint_pool->params.name; - LOG_DEBUG("freed %s %p to %s, current total pool size: %zu, current " + LOG_DEBUG("freed %s %p to %s, current total pool size: %" PRIu64 + ", current " "pool size for %s: %zu", name, ptr, (to_pool ? "pool" : "provider"), disjoint_pool_get_limits(disjoint_pool)->total_size, name, diff --git a/src/pool/pool_disjoint_internal.h b/src/pool/pool_disjoint_internal.h index 86460509b4..2b5de64bc3 100644 --- a/src/pool/pool_disjoint_internal.h +++ b/src/pool/pool_disjoint_internal.h @@ -102,7 +102,7 @@ typedef struct slab_t { typedef struct umf_disjoint_pool_shared_limits_t { size_t max_size; - size_t total_size; // requires atomic access + uint64_t total_size; // requires atomic access } umf_disjoint_pool_shared_limits_t; typedef struct umf_disjoint_pool_params_t { diff --git a/src/provider/provider_os_memory.c b/src/provider/provider_os_memory.c index bd5ea9c691..f0cd3abaed 100644 --- a/src/provider/provider_os_memory.c +++ b/src/provider/provider_os_memory.c @@ -934,7 +934,7 @@ static membind_t membindFirst(os_memory_provider_t *provider, void *addr, if (provider->mode == UMF_NUMA_MODE_INTERLEAVE) { assert(provider->part_size != 0); - size_t s = utils_fetch_and_add64(&provider->alloc_sum, size); + size_t s = utils_fetch_and_add_u64(&provider->alloc_sum, size); membind.node = (s / provider->part_size) % provider->nodeset_len; membind.bitmap = provider->nodeset[membind.node]; membind.bind_size = ALIGN_UP(provider->part_size, membind.page_size); diff --git a/src/provider/provider_os_memory_internal.h b/src/provider/provider_os_memory_internal.h index faf0de2473..4a603b1dad 100644 --- a/src/provider/provider_os_memory_internal.h +++ b/src/provider/provider_os_memory_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -58,7 +58,7 @@ typedef struct os_memory_provider_t { int numa_flags; // combination of hwloc flags size_t part_size; - size_t alloc_sum; // sum of all allocations - used for manual interleaving + uint64_t alloc_sum; // sum of all allocations - used for manual interleaving struct { unsigned weight; diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index f9a98e87fe..4696bc562d 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -565,7 +565,7 @@ static umf_result_t trackingGetIpcHandle(void *provider, const void *ptr, return ret; } - cache_value->handle_id = utils_atomic_increment(&IPC_HANDLE_ID); + cache_value->handle_id = utils_atomic_increment_u64(&IPC_HANDLE_ID); cache_value->ipcDataSize = ipcDataSize; int insRes = critnib_insert(p->ipcCache, (uintptr_t)ptr, @@ -703,18 +703,20 @@ static umf_result_t trackingOpenIpcHandle(void *provider, void *providerIpcData, assert(cache_entry != NULL); void *mapped_ptr = NULL; - utils_atomic_load_acquire(&(cache_entry->mapped_base_ptr), &mapped_ptr); + utils_atomic_load_acquire_ptr(&(cache_entry->mapped_base_ptr), + (void **)&mapped_ptr); if (mapped_ptr == NULL) { utils_mutex_lock(&(cache_entry->mmap_lock)); - utils_atomic_load_acquire(&(cache_entry->mapped_base_ptr), &mapped_ptr); + utils_atomic_load_acquire_ptr(&(cache_entry->mapped_base_ptr), + (void **)&mapped_ptr); if (mapped_ptr == NULL) { ret = upstreamOpenIPCHandle(p, providerIpcData, ipcUmfData->baseSize, &mapped_ptr); if (ret == UMF_RESULT_SUCCESS) { // Put to the cache cache_entry->mapped_size = ipcUmfData->baseSize; - utils_atomic_store_release(&(cache_entry->mapped_base_ptr), - mapped_ptr); + utils_atomic_store_release_ptr(&(cache_entry->mapped_base_ptr), + mapped_ptr); } } utils_mutex_unlock(&(cache_entry->mmap_lock)); diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index 910c859b0a..0104b86468 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -10,6 +10,8 @@ #ifndef UMF_UTILS_CONCURRENCY_H #define UMF_UTILS_CONCURRENCY_H 1 +#include +#include #include #include @@ -19,7 +21,7 @@ #include "utils_windows_intrin.h" #pragma intrinsic(_BitScanForward64) -#else +#else /* !_WIN32 */ #include #ifndef __cplusplus @@ -27,10 +29,18 @@ #else /* __cplusplus */ #include #define _Atomic(X) std::atomic + +// TODO remove cpp code from this file +using std::memory_order_acq_rel; +using std::memory_order_acquire; +using std::memory_order_relaxed; +using std::memory_order_release; + #endif /* __cplusplus */ -#endif /* _WIN32 */ +#endif /* !_WIN32 */ +#include "utils_common.h" #include "utils_sanitizers.h" #ifdef __cplusplus @@ -79,70 +89,137 @@ void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)); #if defined(_WIN32) -static __inline unsigned char utils_lssb_index(long long value) { +static inline unsigned char utils_lssb_index(long long value) { unsigned long ret; _BitScanForward64(&ret, value); return (unsigned char)ret; } -static __inline unsigned char utils_mssb_index(long long value) { +static inline unsigned char utils_mssb_index(long long value) { unsigned long ret; _BitScanReverse64(&ret, value); return (unsigned char)ret; } // There is no good way to do atomic_load on windows... -#define utils_atomic_load_acquire(object, dest) \ - do { \ - *(LONG64 *)dest = \ - InterlockedOr64Acquire((LONG64 volatile *)object, 0); \ - } while (0) +static inline void utils_atomic_load_acquire_u64(uint64_t *ptr, uint64_t *out) { + // NOTE: Windows cl complains about direct accessing 'ptr' which is next + // accessed using Interlocked* functions (warning 28112 - disabled) + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + + // On Windows, there is no equivalent to __atomic_load, so we use cmpxchg + // with 0, 0 here. This will always return the value under the pointer + // without writing anything. + LONG64 ret = InterlockedCompareExchange64((LONG64 volatile *)ptr, 0, 0); + *out = *(uint64_t *)&ret; +} + +static inline void utils_atomic_load_acquire_ptr(void **ptr, void **out) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + uintptr_t ret = (uintptr_t)InterlockedCompareExchangePointer(ptr, 0, 0); + *(uintptr_t *)out = ret; +} -#define utils_atomic_store_release(object, desired) \ - InterlockedExchange64((LONG64 volatile *)object, (LONG64)desired) +static inline void utils_atomic_store_release_ptr(void **ptr, void *val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + InterlockedExchangePointer(ptr, val); +} -#define utils_atomic_increment(object) \ - InterlockedIncrement64((LONG64 volatile *)object) +static inline uint64_t utils_atomic_increment_u64(uint64_t *ptr) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return incremented value + return InterlockedIncrement64((LONG64 volatile *)ptr); +} -#define utils_atomic_decrement(object) \ - InterlockedDecrement64((LONG64 volatile *)object) +static inline uint64_t utils_atomic_decrement_u64(uint64_t *ptr) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return decremented value + return InterlockedDecrement64((LONG64 volatile *)ptr); +} -#define utils_fetch_and_add64(ptr, value) \ - InterlockedExchangeAdd64((LONG64 *)(ptr), value) +static inline uint64_t utils_fetch_and_add_u64(uint64_t *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return the value that had previously been in *ptr + return InterlockedExchangeAdd64((LONG64 volatile *)(ptr), val); +} -// NOTE: windows version have different order of args -#define utils_compare_exchange(object, desired, expected) \ - InterlockedCompareExchange64((LONG64 volatile *)object, *expected, *desired) +static inline uint64_t utils_fetch_and_sub_u64(uint64_t *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return the value that had previously been in *ptr + // NOTE: on Windows there is no *Sub* version of InterlockedExchange + return InterlockedExchangeAdd64((LONG64 volatile *)(ptr), -(LONG64)val); +} + +static inline bool utils_compare_exchange_u64(uint64_t *ptr, uint64_t *expected, + uint64_t *desired) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + LONG64 out = InterlockedCompareExchange64( + (LONG64 volatile *)ptr, *(LONG64 *)desired, *(LONG64 *)expected); + if (out == *(LONG64 *)expected) { + return true; + } + + // else + *expected = out; + return false; +} #else // !defined(_WIN32) #define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x)) #define utils_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x))) -#define utils_atomic_load_acquire(object, dest) \ - do { \ - utils_annotate_acquire((void *)object); \ - __atomic_load(object, dest, memory_order_acquire); \ - } while (0) +static inline void utils_atomic_load_acquire_u64(uint64_t *ptr, uint64_t *out) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + ASSERT_IS_ALIGNED((uintptr_t)out, 8); + __atomic_load(ptr, out, memory_order_acquire); + utils_annotate_acquire(ptr); +} -#define utils_atomic_store_release(object, desired) \ - do { \ - __atomic_store_n(object, desired, memory_order_release); \ - utils_annotate_release((void *)object); \ - } while (0) +static inline void utils_atomic_load_acquire_ptr(void **ptr, void **out) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + ASSERT_IS_ALIGNED((uintptr_t)out, 8); + __atomic_load((uintptr_t *)ptr, (uintptr_t *)out, memory_order_acquire); + utils_annotate_acquire(ptr); +} -#define utils_atomic_increment(object) \ - __atomic_add_fetch(object, 1, memory_order_acq_rel) +static inline void utils_atomic_store_release_ptr(void **ptr, void *val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + utils_annotate_release(ptr); + __atomic_store_n((uintptr_t *)ptr, (uintptr_t)val, memory_order_release); +} -#define utils_atomic_decrement(object) \ - __atomic_sub_fetch(object, 1, memory_order_acq_rel) +static inline uint64_t utils_atomic_increment_u64(uint64_t *val) { + ASSERT_IS_ALIGNED((uintptr_t)val, 8); + // return incremented value + return __atomic_add_fetch(val, 1, memory_order_acq_rel); +} -#define utils_fetch_and_add64(object, value) \ - __atomic_fetch_add(object, value, memory_order_acq_rel) +static inline uint64_t utils_atomic_decrement_u64(uint64_t *val) { + ASSERT_IS_ALIGNED((uintptr_t)val, 8); + // return decremented value + return __atomic_sub_fetch(val, 1, memory_order_acq_rel); +} -#define utils_compare_exchange(object, expected, desired) \ - __atomic_compare_exchange(object, expected, desired, 0 /* strong */, \ - memory_order_acq_rel, memory_order_relaxed) +static inline uint64_t utils_fetch_and_add_u64(uint64_t *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return the value that had previously been in *ptr + return __atomic_fetch_add(ptr, val, memory_order_acq_rel); +} + +static inline uint64_t utils_fetch_and_sub_u64(uint64_t *ptr, uint64_t val) { + // return the value that had previously been in *ptr + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + return __atomic_fetch_sub(ptr, val, memory_order_acq_rel); +} + +static inline bool utils_compare_exchange_u64(uint64_t *ptr, uint64_t *expected, + uint64_t *desired) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + return __atomic_compare_exchange(ptr, expected, desired, 0 /* strong */, + memory_order_acq_rel, + memory_order_relaxed); +} #endif // !defined(_WIN32) diff --git a/test/supp/drd-umf_test-disjoint_pool.supp b/test/supp/drd-umf_test-disjoint_pool.supp index 24a44b93d4..2a5548d27e 100644 --- a/test/supp/drd-umf_test-disjoint_pool.supp +++ b/test/supp/drd-umf_test-disjoint_pool.supp @@ -1,7 +1,7 @@ { False-positive ConflictingAccess in critnib_insert drd:ConflictingAccess - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } diff --git a/test/supp/drd-umf_test-ipc.supp b/test/supp/drd-umf_test-ipc.supp index 76844585d1..fbdbd0183f 100644 --- a/test/supp/drd-umf_test-ipc.supp +++ b/test/supp/drd-umf_test-ipc.supp @@ -5,3 +5,30 @@ fun:pthread_cond_destroy@* ... } + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} diff --git a/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp b/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp index bc4f2295f4..8d8746861c 100644 --- a/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp +++ b/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp @@ -9,7 +9,7 @@ { False-positive ConflictingAccess in critnib_insert drd:ConflictingAccess - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } diff --git a/test/supp/drd-umf_test-jemalloc_coarse_file.supp b/test/supp/drd-umf_test-jemalloc_coarse_file.supp index bc4f2295f4..8d8746861c 100644 --- a/test/supp/drd-umf_test-jemalloc_coarse_file.supp +++ b/test/supp/drd-umf_test-jemalloc_coarse_file.supp @@ -9,7 +9,7 @@ { False-positive ConflictingAccess in critnib_insert drd:ConflictingAccess - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } diff --git a/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp b/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp index 025834658f..f6f12aa1ea 100644 --- a/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp +++ b/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp @@ -1,6 +1,7 @@ { [false-positive] Double check locking pattern in trackingOpenIpcHandle drd:ConflictingAccess + fun:utils_atomic_store_release_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle diff --git a/test/supp/drd-umf_test-provider_file_memory_ipc.supp b/test/supp/drd-umf_test-provider_file_memory_ipc.supp index a15d860aa5..72fd6d87cc 100644 --- a/test/supp/drd-umf_test-provider_file_memory_ipc.supp +++ b/test/supp/drd-umf_test-provider_file_memory_ipc.supp @@ -9,12 +9,30 @@ { [false-positive] Double check locking pattern in trackingOpenIpcHandle drd:ConflictingAccess + fun:utils_atomic_store_release_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle ... } +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + { False-positive ConflictingAccess in jemalloc drd:ConflictingAccess diff --git a/test/supp/drd-umf_test-provider_os_memory.supp b/test/supp/drd-umf_test-provider_os_memory.supp index 025834658f..f6f12aa1ea 100644 --- a/test/supp/drd-umf_test-provider_os_memory.supp +++ b/test/supp/drd-umf_test-provider_os_memory.supp @@ -1,6 +1,7 @@ { [false-positive] Double check locking pattern in trackingOpenIpcHandle drd:ConflictingAccess + fun:utils_atomic_store_release_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle diff --git a/test/supp/helgrind-umf_test-disjoint_pool.supp b/test/supp/helgrind-umf_test-disjoint_pool.supp index 929674e8e5..65dfdd2c78 100644 --- a/test/supp/helgrind-umf_test-disjoint_pool.supp +++ b/test/supp/helgrind-umf_test-disjoint_pool.supp @@ -31,7 +31,7 @@ { False-positive Race in critnib_insert Helgrind:Race - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } diff --git a/test/supp/helgrind-umf_test-ipc.supp b/test/supp/helgrind-umf_test-ipc.supp index e46140c197..04f3a91993 100644 --- a/test/supp/helgrind-umf_test-ipc.supp +++ b/test/supp/helgrind-umf_test-ipc.supp @@ -1,7 +1,7 @@ { False-positive race in critnib_insert (lack of instrumentation) Helgrind:Race - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } @@ -14,3 +14,40 @@ fun:critnib_find ... } + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] umfMemoryProviderGetIPCHandle + Helgrind:Race + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] umfMemoryProviderGetIPCHandle + Helgrind:Race + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} diff --git a/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp b/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp index ac8969c5ae..2f4980f519 100644 --- a/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp +++ b/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp @@ -9,7 +9,7 @@ { False-positive Race in critnib_insert Helgrind:Race - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } diff --git a/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp b/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp index ac8969c5ae..2f4980f519 100644 --- a/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp +++ b/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp @@ -9,7 +9,7 @@ { False-positive Race in critnib_insert Helgrind:Race - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } diff --git a/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp b/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp index d6401e8ee7..4bc776f432 100644 --- a/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp +++ b/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp @@ -1,6 +1,7 @@ { [false-positive] Double check locking pattern in trackingOpenIpcHandle Helgrind:Race + fun:utils_atomic_store_release_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle diff --git a/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp b/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp index cdc0bd8df4..de22665f51 100644 --- a/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp +++ b/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp @@ -1,6 +1,17 @@ { [false-positive] Double check locking pattern in trackingOpenIpcHandle Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_load_acquire_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle @@ -10,7 +21,7 @@ { False-positive race in critnib_insert (lack of instrumentation) Helgrind:Race - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } @@ -40,3 +51,20 @@ fun:tbb_pool_finalize ... } + +{ + [false-positive] trackingGetIpcHandle + Helgrind:Race + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] trackingGetIpcHandle + Helgrind:Race + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} diff --git a/test/supp/helgrind-umf_test-provider_os_memory.supp b/test/supp/helgrind-umf_test-provider_os_memory.supp index d6401e8ee7..4bc776f432 100644 --- a/test/supp/helgrind-umf_test-provider_os_memory.supp +++ b/test/supp/helgrind-umf_test-provider_os_memory.supp @@ -1,6 +1,7 @@ { [false-positive] Double check locking pattern in trackingOpenIpcHandle Helgrind:Race + fun:utils_atomic_store_release_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle From 112a80c963f91d52b347ad6fd6aa5590396992c3 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 4 Mar 2025 15:31:24 +0000 Subject: [PATCH 3/4] prevent from deadlock in DP bucket_can_pool() --- src/pool/pool_disjoint.c | 47 +++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 7a2f327e46..0bd88bd246 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -36,7 +36,6 @@ // Forward declarations static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool); static bool bucket_can_pool(bucket_t *bucket); -static void bucket_decrement_pool(bucket_t *bucket); static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool); @@ -318,6 +317,7 @@ static void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, assert(slab_it->val != NULL); pool_unregister_slab(bucket->pool, slab_it->val); DL_DELETE(bucket->available_slabs, slab_it); + assert(bucket->available_slabs_num > 0); bucket->available_slabs_num--; destroy_slab(slab_it->val); } @@ -383,10 +383,16 @@ static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, // Allocation from existing slab is treated as from pool for statistics. *from_pool = true; if (slab->num_chunks_allocated == 0) { + assert(bucket->chunked_slabs_in_pool > 0); // If this was an empty slab, it was in the pool. // Now it is no longer in the pool, so update count. --bucket->chunked_slabs_in_pool; - bucket_decrement_pool(bucket); + uint64_t size_to_sub = bucket_slab_alloc_size(bucket); + uint64_t old_size = utils_fetch_and_sub_u64( + &bucket->shared_limits->total_size, size_to_sub); + (void)old_size; + assert(old_size >= size_to_sub); + bucket_update_stats(bucket, 1, -1); } } @@ -422,12 +428,6 @@ static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) { in_pool * bucket_slab_alloc_size(bucket); } -static void bucket_decrement_pool(bucket_t *bucket) { - bucket_update_stats(bucket, 1, -1); - utils_fetch_and_add64(&bucket->shared_limits->total_size, - -(long long)bucket_slab_alloc_size(bucket)); -} - static bool bucket_can_pool(bucket_t *bucket) { size_t new_free_slabs_in_bucket; @@ -435,23 +435,20 @@ static bool bucket_can_pool(bucket_t *bucket) { // we keep at most params.capacity slabs in the pool if (bucket_max_pooled_slabs(bucket) >= new_free_slabs_in_bucket) { - size_t pool_size = 0; - utils_atomic_load_acquire(&bucket->shared_limits->total_size, - &pool_size); - while (true) { - size_t new_pool_size = pool_size + bucket_slab_alloc_size(bucket); - - if (bucket->shared_limits->max_size < new_pool_size) { - break; - } - - if (utils_compare_exchange(&bucket->shared_limits->total_size, - &pool_size, &new_pool_size)) { - ++bucket->chunked_slabs_in_pool; - - bucket_update_stats(bucket, -1, 1); - return true; - } + + uint64_t size_to_add = bucket_slab_alloc_size(bucket); + size_t previous_size = utils_fetch_and_add_u64( + &bucket->shared_limits->total_size, size_to_add); + + if (previous_size + size_to_add <= bucket->shared_limits->max_size) { + ++bucket->chunked_slabs_in_pool; + bucket_update_stats(bucket, -1, 1); + return true; + } else { + uint64_t old = utils_fetch_and_sub_u64( + &bucket->shared_limits->total_size, size_to_add); + (void)old; + assert(old >= size_to_add); } } From ab4a76f903981bff79826ef25aa44a0f75d2e4ed Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 4 Mar 2025 15:31:32 +0000 Subject: [PATCH 4/4] implement valgrind macros used in critnib --- src/critnib/critnib.c | 12 ++++++------ src/utils/utils_common.h | 3 --- src/utils/utils_sanitizers.h | 20 +++++++++++++++++++- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/critnib/critnib.c b/src/critnib/critnib.c index 394a671248..c95637f20c 100644 --- a/src/critnib/critnib.c +++ b/src/critnib/critnib.c @@ -174,8 +174,8 @@ struct critnib *critnib_new(void) { goto err_free_critnib; } - VALGRIND_HG_DRD_DISABLE_CHECKING(&c->root, sizeof(c->root)); - VALGRIND_HG_DRD_DISABLE_CHECKING(&c->remove_count, sizeof(c->remove_count)); + utils_annotate_memory_no_check(&c->root, sizeof(c->root)); + utils_annotate_memory_no_check(&c->remove_count, sizeof(c->remove_count)); return c; err_free_critnib: @@ -260,7 +260,7 @@ static struct critnib_node *alloc_node(struct critnib *__restrict c) { struct critnib_node *n = c->deleted_node; c->deleted_node = n->child[0]; - VALGRIND_ANNOTATE_NEW_MEMORY(n, sizeof(*n)); + utils_annotate_memory_new(n, sizeof(*n)); return n; } @@ -291,7 +291,7 @@ static struct critnib_leaf *alloc_leaf(struct critnib *__restrict c) { struct critnib_leaf *k = c->deleted_leaf; c->deleted_leaf = k->value; - VALGRIND_ANNOTATE_NEW_MEMORY(k, sizeof(*k)); + utils_annotate_memory_new(k, sizeof(*k)); return k; } @@ -316,7 +316,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { return ENOMEM; } - VALGRIND_HG_DRD_DISABLE_CHECKING(k, sizeof(struct critnib_leaf)); + utils_annotate_memory_no_check(k, sizeof(struct critnib_leaf)); k->key = key; k->value = value; @@ -377,7 +377,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { return ENOMEM; } - VALGRIND_HG_DRD_DISABLE_CHECKING(m, sizeof(struct critnib_node)); + utils_annotate_memory_no_check(m, sizeof(struct critnib_node)); for (int i = 0; i < SLNODES; i++) { m->child[i] = NULL; diff --git a/src/utils/utils_common.h b/src/utils/utils_common.h index 7824e74af2..fff44f390c 100644 --- a/src/utils/utils_common.h +++ b/src/utils/utils_common.h @@ -53,9 +53,6 @@ typedef enum umf_purge_advise_t { #define ASSERT_IS_ALIGNED(value, align) \ DO_WHILE_EXPRS(assert(IS_ALIGNED(value, align))) -#define VALGRIND_ANNOTATE_NEW_MEMORY(p, s) DO_WHILE_EMPTY -#define VALGRIND_HG_DRD_DISABLE_CHECKING(p, s) DO_WHILE_EMPTY - #ifdef _WIN32 /* Windows */ #define __TLS __declspec(thread) diff --git a/src/utils/utils_sanitizers.h b/src/utils/utils_sanitizers.h index 3498e4b705..f8896d0ae3 100644 --- a/src/utils/utils_sanitizers.h +++ b/src/utils/utils_sanitizers.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -168,6 +168,24 @@ static inline void utils_annotate_memory_inaccessible(void *ptr, size_t size) { #endif } +static inline void utils_annotate_memory_new(void *ptr, size_t size) { +#ifdef UMF_VG_DRD_ENABLED + ANNOTATE_NEW_MEMORY(ptr, size); +#else + (void)ptr; + (void)size; +#endif +} + +static inline void utils_annotate_memory_no_check(void *ptr, size_t size) { +#ifdef UMF_VG_HELGRIND_ENABLED + VALGRIND_HG_DISABLE_CHECKING(ptr, size); +#else + (void)ptr; + (void)size; +#endif +} + #ifdef __cplusplus } #endif