From f9346fd6f0bb01da14bde1d0d43fb8bced9ff438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Wed, 19 Mar 2025 15:06:12 +0100 Subject: [PATCH] add ctl to fixed provider --- benchmark/benchmark_umf.hpp | 3 +- src/ctl/ctl.c | 13 ++- src/provider/provider_ctl_stats_impl.h | 112 +++++++++++++++++++++ src/provider/provider_ctl_stats_type.h | 25 +++++ src/provider/provider_fixed_memory.c | 42 +++++++- src/provider/provider_os_memory.c | 105 ++----------------- src/provider/provider_os_memory_internal.h | 8 +- src/utils/utils_concurrency.h | 28 ++++++ 8 files changed, 228 insertions(+), 108 deletions(-) create mode 100644 src/provider/provider_ctl_stats_impl.h create mode 100644 src/provider/provider_ctl_stats_type.h diff --git a/benchmark/benchmark_umf.hpp b/benchmark/benchmark_umf.hpp index 9553d6fdb5..0bc93a49a7 100644 --- a/benchmark/benchmark_umf.hpp +++ b/benchmark/benchmark_umf.hpp @@ -44,7 +44,8 @@ struct provider_interface { if (state.thread_index() != 0) { return; } - umfCtlExec("umf.provider.by_handle.stats.reset", provider, NULL); + umfCtlExec("umf.provider.by_handle.stats.peak_memory.reset", provider, + NULL); } void postBench([[maybe_unused]] ::benchmark::State &state) { diff --git a/src/ctl/ctl.c b/src/ctl/ctl.c index 99ab2d96e6..da6661d262 100644 --- a/src/ctl/ctl.c +++ b/src/ctl/ctl.c @@ -102,7 +102,7 @@ umf_result_t umfCtlSet(const char *name, void *ctx, void *arg) { } umf_result_t umfCtlExec(const char *name, void *ctx, void *arg) { - if (name == NULL || arg == NULL || ctx == NULL) { + if (name == NULL || ctx == NULL) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; } return ctl_query(NULL, ctx, CTL_QUERY_PROGRAMMATIC, name, @@ -140,6 +140,7 @@ static const umf_ctl_node_t *ctl_find_node(const umf_ctl_node_t *nodes, * in the main ctl tree. */ while (node_name != NULL) { + char *next_node = strtok_r(NULL, CTL_QUERY_NODE_SEPARATOR, &sptr); *name_offset = node_name - parse_str; if (n != NULL && n->type == CTL_NODE_SUBTREE) { // if a subtree occurs, the subtree handler should be called @@ -168,6 +169,14 @@ static const umf_ctl_node_t *ctl_find_node(const umf_ctl_node_t *nodes, if (index_entry && n->type == CTL_NODE_INDEXED) { break; } else if (strcmp(n->name, node_name) == 0) { + if (n->type == CTL_NODE_LEAF && next_node != NULL) { + // this is not the last node in the query, so it couldn't be leaf + continue; + } + if (n->type != CTL_NODE_LEAF && next_node == NULL) { + // this is the last node in the query, so it must be a leaf + continue; + } break; } } @@ -181,7 +190,7 @@ static const umf_ctl_node_t *ctl_find_node(const umf_ctl_node_t *nodes, } nodes = n->children; - node_name = strtok_r(NULL, CTL_QUERY_NODE_SEPARATOR, &sptr); + node_name = next_node; } umf_ba_global_free(parse_str); diff --git a/src/provider/provider_ctl_stats_impl.h b/src/provider/provider_ctl_stats_impl.h new file mode 100644 index 0000000000..a13d312760 --- /dev/null +++ b/src/provider/provider_ctl_stats_impl.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifdef UMF_PROVIDER_CTL_STATS_IMPL_H +#error This file should not be included more than once +#else +#define UMF_PROVIDER_CTL_STATS_IMPL_H 1 + +#ifndef CTL_PROVIDER_TYPE +#error "CTL_PROVIDER_TYPE must be defined" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include "ctl/ctl.h" +#include "utils/utils_assert.h" + +static int CTL_READ_HANDLER(peak_memory)(void *ctx, + umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)extra_name, (void)query_type; + + size_t *arg_out = arg; + CTL_PROVIDER_TYPE *provider = (CTL_PROVIDER_TYPE *)ctx; + utils_atomic_load_acquire_size_t(&provider->stats.peak_memory, arg_out); + return 0; +} + +static int CTL_READ_HANDLER(allocated_memory)(void *ctx, + umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)extra_name, (void)query_type; + + size_t *arg_out = arg; + CTL_PROVIDER_TYPE *provider = (CTL_PROVIDER_TYPE *)ctx; + utils_atomic_load_acquire_size_t(&provider->stats.allocated_memory, + arg_out); + return 0; +} + +static int CTL_RUNNABLE_HANDLER(reset)(void *ctx, umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)arg, (void)extra_name, (void)query_type; + + CTL_PROVIDER_TYPE *provider = (CTL_PROVIDER_TYPE *)ctx; + size_t allocated; + size_t current_peak; + + utils_atomic_load_acquire_size_t(&provider->stats.peak_memory, + ¤t_peak); + do { + utils_atomic_load_acquire_size_t(&provider->stats.allocated_memory, + &allocated); + } while (!utils_compare_exchange_size_t(&provider->stats.peak_memory, + ¤t_peak, &allocated)); + + return 0; +} + +static const umf_ctl_node_t CTL_NODE(peak_memory)[] = {CTL_LEAF_RUNNABLE(reset), + CTL_NODE_END}; + +static const umf_ctl_node_t CTL_NODE(stats)[] = { + CTL_LEAF_RO(allocated_memory), CTL_LEAF_RO(peak_memory), + CTL_CHILD(peak_memory), CTL_LEAF_RUNNABLE(reset), CTL_NODE_END}; + +static inline void provider_ctl_stats_alloc(CTL_PROVIDER_TYPE *provider, + size_t size) { + size_t allocated = + utils_fetch_and_add_size_t(&provider->stats.allocated_memory, size) + + size; + + size_t peak; + utils_atomic_load_acquire_size_t(&provider->stats.peak_memory, &peak); + + // If the compare-exchange fails, 'peak' is updated to the current value + // of peak_memory. We then re-check whether allocated is still greater than + // the updated peak value. + while (allocated > peak && + !utils_compare_exchange_size_t(&provider->stats.peak_memory, &peak, + &allocated)) { + ; + } +} + +static inline void provider_ctl_stats_free(CTL_PROVIDER_TYPE *provider, + size_t size) { + utils_fetch_and_sub_size_t(&provider->stats.allocated_memory, size); +} + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/provider/provider_ctl_stats_type.h b/src/provider/provider_ctl_stats_type.h new file mode 100644 index 0000000000..309b253b14 --- /dev/null +++ b/src/provider/provider_ctl_stats_type.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_PROVIDER_CTL_STATS_TYPE_H +#define UMF_PROVIDER_CTL_STATS_TYPE_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct ctl_stats_t { + size_t allocated_memory; + size_t peak_memory; +} ctl_stats_t; + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/provider/provider_fixed_memory.c b/src/provider/provider_fixed_memory.c index eeeb8b7025..034f6e3005 100644 --- a/src/provider/provider_fixed_memory.c +++ b/src/provider/provider_fixed_memory.c @@ -20,6 +20,7 @@ #include "base_alloc_global.h" #include "coarse.h" #include "libumf.h" +#include "provider_ctl_stats_type.h" #include "utils_common.h" #include "utils_concurrency.h" #include "utils_log.h" @@ -30,6 +31,7 @@ typedef struct fixed_memory_provider_t { void *base; // base address of memory size_t size; // size of the memory region coarse_t *coarse; // coarse library handle + ctl_stats_t stats; } fixed_memory_provider_t; // Fixed Memory provider settings struct @@ -52,6 +54,17 @@ static __TLS fixed_last_native_error_t TLS_last_native_error; #define _UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED \ (UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED - UMF_FIXED_RESULT_SUCCESS) +#define CTL_PROVIDER_TYPE fixed_memory_provider_t +#include "provider_ctl_stats_impl.h" + +struct ctl *fixed_memory_ctl_root; +static UTIL_ONCE_FLAG ctl_initialized = UTIL_ONCE_FLAG_INIT; + +static void initialize_fixed_ctl(void) { + fixed_memory_ctl_root = ctl_new(); + CTL_REGISTER_MODULE(fixed_memory_ctl_root, stats); +} + static const char *Native_error_str[] = { [_UMF_FIXED_RESULT_SUCCESS] = "success", [_UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED] = "force purging failed"}; @@ -153,7 +166,14 @@ static umf_result_t fixed_alloc(void *provider, size_t size, size_t alignment, fixed_memory_provider_t *fixed_provider = (fixed_memory_provider_t *)provider; - return coarse_alloc(fixed_provider->coarse, size, alignment, resultPtr); + umf_result_t ret = + coarse_alloc(fixed_provider->coarse, size, alignment, resultPtr); + + if (ret == UMF_RESULT_SUCCESS) { + provider_ctl_stats_alloc(fixed_provider, size); + } + + return ret; } static void fixed_get_last_native_error(void *provider, const char **ppMessage, @@ -250,7 +270,22 @@ static umf_result_t fixed_allocation_merge(void *provider, void *lowPtr, static umf_result_t fixed_free(void *provider, void *ptr, size_t size) { fixed_memory_provider_t *fixed_provider = (fixed_memory_provider_t *)provider; - return coarse_free(fixed_provider->coarse, ptr, size); + + umf_result_t ret = coarse_free(fixed_provider->coarse, ptr, size); + + if (ret == UMF_RESULT_SUCCESS) { + provider_ctl_stats_free(fixed_provider, size); + } + + return ret; +} + +static umf_result_t fixed_ctl(void *provider, int operationType, + const char *name, void *arg, + umf_ctl_query_type_t query_type) { + utils_init_once(&ctl_initialized, initialize_fixed_ctl); + return ctl_query(fixed_memory_ctl_root, provider, operationType, name, + query_type, arg); } static umf_memory_provider_ops_t UMF_FIXED_MEMORY_PROVIDER_OPS = { @@ -271,7 +306,8 @@ static umf_memory_provider_ops_t UMF_FIXED_MEMORY_PROVIDER_OPS = { .ipc.get_ipc_handle = NULL, .ipc.put_ipc_handle = NULL, .ipc.open_ipc_handle = NULL, - .ipc.close_ipc_handle = NULL}; + .ipc.close_ipc_handle = NULL, + .ctl = fixed_ctl}; umf_memory_provider_ops_t *umfFixedMemoryProviderOps(void) { return &UMF_FIXED_MEMORY_PROVIDER_OPS; diff --git a/src/provider/provider_os_memory.c b/src/provider/provider_os_memory.c index 1ecb397fe8..638ef7ff5e 100644 --- a/src/provider/provider_os_memory.c +++ b/src/provider/provider_os_memory.c @@ -102,6 +102,9 @@ umf_result_t umfOsMemoryProviderParamsSetPartitions( #include "utils_concurrency.h" #include "utils_log.h" +#define CTL_PROVIDER_TYPE os_memory_provider_t +#include "provider_ctl_stats_impl.h" + #define NODESET_STR_BUF_LEN 1024 #define TLS_MSG_BUF_LEN 1024 @@ -189,70 +192,6 @@ static int CTL_READ_HANDLER(ipc_enabled)(void *ctx, return 0; } -static int CTL_READ_HANDLER(peak_memory)(void *ctx, - umf_ctl_query_source_t source, - void *arg, - umf_ctl_index_utlist_t *indexes, - const char *extra_name, - umf_ctl_query_type_t query_type) { - /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; - - size_t *arg_out = arg; - os_memory_provider_t *os_provider = (os_memory_provider_t *)ctx; - COMPILE_ERROR_ON(sizeof(os_provider->stats.peak_memory) != - sizeof(uint64_t)); - utils_atomic_load_acquire_u64((uint64_t *)&os_provider->stats.peak_memory, - (uint64_t *)arg_out); - return 0; -} - -static int CTL_READ_HANDLER(allocated_memory)(void *ctx, - umf_ctl_query_source_t source, - void *arg, - umf_ctl_index_utlist_t *indexes, - const char *extra_name, - umf_ctl_query_type_t query_type) { - /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; - - size_t *arg_out = arg; - os_memory_provider_t *os_provider = (os_memory_provider_t *)ctx; - COMPILE_ERROR_ON(sizeof(os_provider->stats.allocated_memory) != - sizeof(uint64_t)); - COMPILE_ERROR_ON(sizeof(*arg_out) != sizeof(uint64_t)); - utils_atomic_load_acquire_u64( - (uint64_t *)&os_provider->stats.allocated_memory, (uint64_t *)arg_out); - return 0; -} - -static int CTL_RUNNABLE_HANDLER(reset)(void *ctx, umf_ctl_query_source_t source, - void *arg, - umf_ctl_index_utlist_t *indexes, - const char *extra_name, - umf_ctl_query_type_t query_type) { - /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)arg, (void)extra_name, (void)query_type; - - os_memory_provider_t *os_provider = (os_memory_provider_t *)ctx; - size_t allocated; - - COMPILE_ERROR_ON(sizeof(os_provider->stats.allocated_memory) != - sizeof(uint64_t)); - COMPILE_ERROR_ON(sizeof(allocated) != sizeof(uint64_t)); - - utils_atomic_load_acquire_u64( - (uint64_t *)&os_provider->stats.allocated_memory, - (uint64_t *)&allocated); - utils_atomic_store_release_u64((uint64_t *)&os_provider->stats.peak_memory, - (uint64_t)allocated); - - return 0; -} -static const umf_ctl_node_t CTL_NODE(stats)[] = { - CTL_LEAF_RO(allocated_memory), CTL_LEAF_RO(peak_memory), - CTL_LEAF_RUNNABLE(reset), CTL_NODE_END}; - static const umf_ctl_node_t CTL_NODE(params)[] = {CTL_LEAF_RO(ipc_enabled), CTL_NODE_END}; @@ -1176,29 +1115,7 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment, *resultPtr = addr; - COMPILE_ERROR_ON(sizeof(os_provider->stats.allocated_memory) != - sizeof(uint64_t)); - COMPILE_ERROR_ON(sizeof(os_provider->stats.peak_memory) != - sizeof(uint64_t)); - COMPILE_ERROR_ON(sizeof(size) != sizeof(uint64_t)); - // TODO: Change to memory_order_relaxed when we will have a proper wrapper - size_t allocated = - utils_fetch_and_add_u64( - (uint64_t *)&os_provider->stats.allocated_memory, (uint64_t)size) + - size; - - uint64_t peak; - utils_atomic_load_acquire_u64((uint64_t *)&os_provider->stats.peak_memory, - &peak); - - while (allocated > peak && !utils_compare_exchange_u64( - (uint64_t *)&os_provider->stats.peak_memory, - &peak, (uint64_t *)&allocated)) { - /* If the compare-exchange fails, 'peak' is updated to the current value of peak_memory. - We then re-check whether allocated is still greater than the updated peak value. */ - ; - } - + provider_ctl_stats_alloc(os_provider, size); return UMF_RESULT_SUCCESS; err_unmap: @@ -1226,13 +1143,7 @@ static umf_result_t os_free(void *provider, void *ptr, size_t size) { return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; } - COMPILE_ERROR_ON(sizeof(size) != sizeof(uint64_t)); - COMPILE_ERROR_ON(sizeof(os_provider->stats.allocated_memory) != - sizeof(uint64_t)); - - // TODO: Change it to memory_order_relaxed when we will have a proper wrapper - utils_fetch_and_sub_u64((uint64_t *)&os_provider->stats.allocated_memory, - size); + provider_ctl_stats_free(os_provider, size); return UMF_RESULT_SUCCESS; } @@ -1530,11 +1441,9 @@ static umf_result_t os_close_ipc_handle(void *provider, void *ptr, static umf_result_t os_ctl(void *hProvider, int operationType, const char *name, void *arg, umf_ctl_query_type_t query_type) { - (void)operationType; // unused - os_memory_provider_t *os_provider = (os_memory_provider_t *)hProvider; utils_init_once(&ctl_initialized, initialize_os_ctl); - return ctl_query(os_memory_ctl_root, os_provider, CTL_QUERY_PROGRAMMATIC, - name, query_type, arg); + return ctl_query(os_memory_ctl_root, hProvider, operationType, name, + query_type, arg); } static umf_memory_provider_ops_t UMF_OS_MEMORY_PROVIDER_OPS = { diff --git a/src/provider/provider_os_memory_internal.h b/src/provider/provider_os_memory_internal.h index a3f35cbd3e..4d2e8e2176 100644 --- a/src/provider/provider_os_memory_internal.h +++ b/src/provider/provider_os_memory_internal.h @@ -22,6 +22,8 @@ #include "utils_common.h" #include "utils_concurrency.h" +#include "provider_ctl_stats_type.h" + #ifdef __cplusplus extern "C" { #endif @@ -67,10 +69,8 @@ typedef struct os_memory_provider_t { size_t partitions_weight_sum; hwloc_topology_t topo; - struct { - size_t allocated_memory; - size_t peak_memory; - } stats; + + ctl_stats_t stats; } os_memory_provider_t; #ifdef __cplusplus diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index 67df0169f7..266d0983c2 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -40,6 +40,7 @@ using std::memory_order_release; #endif /* !_WIN32 */ +#include "utils_assert.h" #include "utils_common.h" #include "utils_sanitizers.h" @@ -219,6 +220,33 @@ static inline bool utils_compare_exchange_u64(uint64_t *ptr, uint64_t *expected, #endif // !defined(_WIN32) +static inline void utils_atomic_load_acquire_size_t(size_t *ptr, size_t *out) { + COMPILE_ERROR_ON(sizeof(size_t) != sizeof(uint64_t)); + utils_atomic_load_acquire_u64((uint64_t *)ptr, (uint64_t *)out); +} + +static inline void utils_atomic_store_release_size_t(size_t *ptr, size_t val) { + COMPILE_ERROR_ON(sizeof(size_t) != sizeof(uint64_t)); + utils_atomic_store_release_u64((uint64_t *)ptr, (uint64_t)val); +} + +static inline size_t utils_fetch_and_add_size_t(size_t *ptr, size_t val) { + COMPILE_ERROR_ON(sizeof(size_t) != sizeof(uint64_t)); + return utils_fetch_and_add_u64((uint64_t *)ptr, (uint64_t)val); +} + +static inline size_t utils_fetch_and_sub_size_t(size_t *ptr, size_t val) { + COMPILE_ERROR_ON(sizeof(size_t) != sizeof(uint64_t)); + return utils_fetch_and_sub_u64((uint64_t *)ptr, (uint64_t)val); +} + +static inline bool utils_compare_exchange_size_t(size_t *ptr, size_t *expected, + size_t *desired) { + COMPILE_ERROR_ON(sizeof(size_t) != sizeof(uint64_t)); + return utils_compare_exchange_u64((uint64_t *)ptr, (uint64_t *)expected, + (uint64_t *)desired); +} + #ifdef __cplusplus } #endif