From 891f184cfa8d3df0a025853e50cf1a1be498adaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Wed, 27 Aug 2025 16:31:17 +0200 Subject: [PATCH 1/4] initialize hwloc topology only before first fork, not always --- src/libumf.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/libumf.c b/src/libumf.c index 9df7ee29cd..4ccde9bb4f 100644 --- a/src/libumf.c +++ b/src/libumf.c @@ -40,6 +40,13 @@ static umf_ctl_node_t CTL_NODE(umf)[] = {CTL_CHILD(provider), CTL_CHILD(pool), void initialize_global_ctl(void) { CTL_REGISTER_MODULE(NULL, umf); } +// Benchmarks may fork multiple times and topology init is slow. +// Init topology before fork (if not already) so children don't repeat it. +// TODO: This is a hack. Better solution is needed. +#if !defined(_WIN32) && !defined(UMF_NO_HWLOC) +static void atfork_prepare(void) { umfGetTopologyReduced(); } +#endif + umf_result_t umfInit(void) { utils_init_once(&initMutexOnce, initialize_init_mutex); @@ -74,11 +81,11 @@ umf_result_t umfInit(void) { if (TRACKER) { LOG_DEBUG("UMF library initialized"); } -#if !defined(UMF_NO_HWLOC) - // some benchmarks uses multiple forks, and topology initialization is very slow - // so if we initialize topology before the first fork, we can get significant performance gain. - umfGetTopologyReduced(); + +#if !defined(_WIN32) && !defined(UMF_NO_HWLOC) + pthread_atfork(atfork_prepare, NULL, NULL); #endif + return UMF_RESULT_SUCCESS; } From 7cf1e78a2fb74565827efbbee50062a0f3a71862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 3 Sep 2025 17:24:14 +0200 Subject: [PATCH 2/4] 1.0.2 release --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index c55cb871e9..b2c618c3db 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Wed Sep 03 2025 Łukasz Stolarczuk + + * Version 1.0.2 + + This patch release contains following change: + - initialize hwloc topology only before first fork, not always (#1509) + Fri Aug 08 2025 Łukasz Stolarczuk * Version 1.0.1 From 532f4c88e9fcdee9a3168efc8da7168252384c61 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Fri, 12 Sep 2025 19:50:06 +0200 Subject: [PATCH 3/4] load libcuda.so.1 instead of libcuda.so on linux --- src/provider/provider_cuda.c | 2 +- test/providers/cuda_helpers.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index 983be6b55a..953876fc12 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -149,7 +149,7 @@ static void init_cu_global_state(void) { #ifdef _WIN32 const char *lib_name = "nvcuda.dll"; #else - const char *lib_name = "libcuda.so"; + const char *lib_name = "libcuda.so.1"; #endif // The CUDA shared library should be already loaded by the user // of the CUDA provider. UMF just want to reuse it diff --git a/test/providers/cuda_helpers.cpp b/test/providers/cuda_helpers.cpp index 3e81c184ff..4678ea4acd 100644 --- a/test/providers/cuda_helpers.cpp +++ b/test/providers/cuda_helpers.cpp @@ -110,7 +110,7 @@ int InitCUDAOps() { #ifdef _WIN32 const char *lib_name = "nvcuda.dll"; #else - const char *lib_name = "libcuda.so"; + const char *lib_name = "libcuda.so.1"; #endif // CUDA symbols #if OPEN_CU_LIBRARY_GLOBAL From e7d285269e04d3c0e70917c24945c80a4ea3313a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Tue, 16 Sep 2025 15:04:27 +0200 Subject: [PATCH 4/4] 1.0.3 release --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index b2c618c3db..0de579ecf8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Tue Sep 16 2025 Łukasz Stolarczuk + + * Version 1.0.3 + + This patch release contains following change: + - load libcuda.so.1 instead of libcuda.so on linux (#1518) + Wed Sep 03 2025 Łukasz Stolarczuk * Version 1.0.2