diff --git a/.github/workflows/pr_push.yml b/.github/workflows/pr_push.yml index 52bd73756..cef83c9e8 100644 --- a/.github/workflows/pr_push.yml +++ b/.github/workflows/pr_push.yml @@ -99,6 +99,6 @@ jobs: uses: ./.github/workflows/reusable_compatibility.yml strategy: matrix: - tag: ["v1.0.0"] + tag: ["v1.0.1"] with: tag: ${{matrix.tag}} diff --git a/.github/workflows/reusable_compatibility.yml b/.github/workflows/reusable_compatibility.yml index 573812ba4..8f38fdfe7 100644 --- a/.github/workflows/reusable_compatibility.yml +++ b/.github/workflows/reusable_compatibility.yml @@ -9,7 +9,10 @@ on: tag: description: Check backward compatibility with this tag type: string - default: "v1.0.0" + # While we're still compatible with v1.0.0, we implemented a fix in v1.0.1 + # to verify if the split operation is supported (in jemalloc pool). + # Without bumping the tag we'd have to omit some tests. + default: "v1.0.1" permissions: contents: read @@ -99,7 +102,7 @@ jobs: UMF_LOG: level:warning;flush:debug;output:stderr;pid:no LD_LIBRARY_PATH: ${{github.workspace}}/latest_version/build/lib/ run: | - ctest --verbose -E test_memoryProvider + ctest --verbose -E "test_memoryProvider" test/test_memoryProvider --gtest_filter="-*Trace" # Browse all folders in the examples directory, build them using the @@ -226,7 +229,9 @@ jobs: UMF_LOG: level:warning;flush:debug;output:stderr;pid:no run: | cp ${{github.workspace}}/latest_version/build/bin/Debug/umf.dll ${{github.workspace}}/tag_version/build/bin/Debug/umf.dll - ctest -C Debug --verbose -E test_memoryProvider + ctest -C Debug --verbose -E "test_memoryProvider" + $env:Path = "${{github.workspace}}/tag_version/build/bin/Debug;${{env.VCPKG_BIN_PATH}};$env:Path" + test/Debug/test_memoryProvider.exe --gtest_filter="-*Trace" # Browse all folders in the examples directory, build them using the # latest UMF version, and run them, excluding those in the exclude list. @@ -368,7 +373,7 @@ jobs: UMF_LOG: level:warning;flush:debug;output:stderr;pid:no LD_LIBRARY_PATH: ${{github.workspace}}/latest_version/build/lib/ run: | - ctest --verbose -E test_memoryProvider + ctest --verbose -E "test_memoryProvider" test/test_memoryProvider --gtest_filter="-*Trace" # Browse all folders in the examples directory, build them using the diff --git a/CMakeLists.txt b/CMakeLists.txt index 0250fe2f6..0102031d4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -203,7 +203,14 @@ else() DEPENDS ${jemalloc_targ_SOURCE_DIR}/configure) if(NOT UMF_QEMU_BUILD) - set(MAKE_ARGUMENTS "-j$(nproc)") + if(CMAKE_GENERATOR STREQUAL "Ninja") + # While CMake is supposed to escape this in the generated build + # files, for some reason, it doesn't do so here. Until it's fixed, + # we just manually escape it for ninja. + set(MAKE_ARGUMENTS "-j$$(nproc)") + else() + set(MAKE_ARGUMENTS "-j$(nproc)") + endif() endif() add_custom_command( diff --git a/ChangeLog b/ChangeLog index 72b7fdec6..c55cb871e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Fri Aug 08 2025 Łukasz Stolarczuk + + * Version 1.0.1 + + This patch release contains following changes: + - make topology_init faster (#1469) + - verify if the provider supports the split operation (#1465) + - fix build failure when building for jemalloc with ninja (#1474) + Mon Jul 21 2025 Łukasz Stolarczuk * Version 1.0.0 diff --git a/src/libumf.c b/src/libumf.c index 62aae14a3..de87935aa 100644 --- a/src/libumf.c +++ b/src/libumf.c @@ -91,7 +91,11 @@ umf_result_t umfInit(void) { if (TRACKER) { LOG_DEBUG("UMF library initialized"); } - +#if !defined(UMF_NO_HWLOC) + // some benchmarks uses multiple forks, and topology initialization is very slow + // so if we initialize topology before the first fork, we can get significant performance gain. + umfGetTopologyReduced(); +#endif return UMF_RESULT_SUCCESS; } diff --git a/src/memspaces/memspace_host_all.c b/src/memspaces/memspace_host_all.c index b83d3a0f1..5ff9e8edd 100644 --- a/src/memspaces/memspace_host_all.c +++ b/src/memspaces/memspace_host_all.c @@ -35,7 +35,7 @@ static umf_result_t umfMemspaceHostAllCreate(umf_memspace_handle_t *hMemspace) { umf_result_t umf_ret = UMF_RESULT_SUCCESS; - hwloc_topology_t topology = umfGetTopology(); + hwloc_topology_t topology = umfGetTopologyReduced(); if (!topology) { // TODO: What would be an approrpiate err? return UMF_RESULT_ERROR_UNKNOWN; diff --git a/src/memtargets/memtarget_numa.c b/src/memtargets/memtarget_numa.c index a0a1e592a..d6431ef9f 100644 --- a/src/memtargets/memtarget_numa.c +++ b/src/memtargets/memtarget_numa.c @@ -218,7 +218,7 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - hwloc_topology_t topology = umfGetTopology(); + hwloc_topology_t topology = umfGetTopologyReduced(); if (!topology) { return UMF_RESULT_ERROR_NOT_SUPPORTED; } diff --git a/src/pool/pool_jemalloc.c b/src/pool/pool_jemalloc.c index 1a029d66d..29e095322 100644 --- a/src/pool/pool_jemalloc.c +++ b/src/pool/pool_jemalloc.c @@ -12,6 +12,8 @@ #include #include "base_alloc_global.h" +#include "memory_provider_internal.h" +#include "provider_tracking.h" #include "utils_common.h" #include "utils_concurrency.h" #include "utils_log.h" @@ -283,8 +285,15 @@ static bool arena_extent_split(extent_hooks_t *extent_hooks, void *addr, jemalloc_memory_pool_t *pool = get_pool_by_arena_index(arena_ind); assert(pool); - return umfMemoryProviderAllocationSplit(pool->provider, addr, size, - size_a) != UMF_RESULT_SUCCESS; + + umf_result_t ret = + umfMemoryProviderAllocationSplit(pool->provider, addr, size, size_a); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("memory provider failed to split a memory region, while " + "jemalloc requires that"); + } + + return ret != UMF_RESULT_SUCCESS; } // arena_extent_merge - an extent merge function conforms to the extent_merge_t type and optionally @@ -435,11 +444,45 @@ static void *op_aligned_alloc(void *pool, size_t size, size_t alignment) { return ptr; } +// Verify if the memory provider supports the split() operation, +// because jemalloc pool requires that. +static umf_result_t verify_split(umf_memory_provider_handle_t provider) { + // Retrieve the upstream memory provider + umf_memory_provider_handle_t upstream_provider = NULL; + umfTrackingMemoryProviderGetUpstreamProvider( + umfMemoryProviderGetPriv(provider), &upstream_provider); + + size_t page_size = 0; + umf_result_t ret = + umfMemoryProviderGetMinPageSize(upstream_provider, NULL, &page_size); + if (ret != UMF_RESULT_SUCCESS) { + return ret; + } + + size_t size = 2 * page_size; // use double the page size for the split test + if (UMF_RESULT_ERROR_NOT_SUPPORTED == + umfMemoryProviderAllocationSplit(upstream_provider, (void *)size, size, + page_size)) { + LOG_ERR("memory provider does not support the split operation, while " + "jemalloc pool requires that"); + return UMF_RESULT_ERROR_NOT_SUPPORTED; + } + + return UMF_RESULT_SUCCESS; +} + static umf_result_t op_initialize(umf_memory_provider_handle_t provider, const void *params, void **out_pool) { assert(provider); assert(out_pool); + // Verify if the memory provider supports the split() operation, + // because jemalloc pool requires that. + umf_result_t ret = verify_split(provider); + if (ret != UMF_RESULT_SUCCESS) { + return ret; + } + extent_hooks_t *pHooks = &arena_extent_hooks; size_t unsigned_size = sizeof(unsigned); int n_arenas_set_from_params = 0; diff --git a/src/provider/provider_os_memory.c b/src/provider/provider_os_memory.c index 7e003484f..c83277cb7 100644 --- a/src/provider/provider_os_memory.c +++ b/src/provider/provider_os_memory.c @@ -24,6 +24,7 @@ #include "ctl/ctl_internal.h" #include "libumf.h" #include "provider_os_memory_internal.h" +#include "topology.h" #include "utils_assert.h" #include "utils_common.h" #include "utils_concurrency.h" @@ -561,27 +562,19 @@ static umf_result_t os_initialize(const void *params, void **provider) { snprintf(os_provider->name, sizeof(os_provider->name), "%s", in_params->name); - int r = hwloc_topology_init(&os_provider->topo); - if (r) { - LOG_ERR("HWLOC topology init failed"); - ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - goto err_free_os_provider; - } - - r = hwloc_topology_load(os_provider->topo); - if (r) { + os_provider->topo = umfGetTopologyReduced(); + if (!os_provider->topo) { os_store_last_native_error(UMF_OS_RESULT_ERROR_TOPO_DISCOVERY_FAILED, 0); LOG_ERR("HWLOC topology discovery failed"); ret = UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; - goto err_destroy_hwloc_topology; } os_provider->fd_offset_map = critnib_new(NULL, NULL); if (!os_provider->fd_offset_map) { LOG_ERR("creating file descriptor offset map failed"); ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - goto err_destroy_hwloc_topology; + goto err_free_os_provider; } ret = translate_params(in_params, os_provider); @@ -624,8 +617,6 @@ static umf_result_t os_initialize(const void *params, void **provider) { free_bitmaps(os_provider); err_destroy_critnib: critnib_delete(os_provider->fd_offset_map); -err_destroy_hwloc_topology: - hwloc_topology_destroy(os_provider->topo); err_free_os_provider: umf_ba_global_free(os_provider); return ret; @@ -649,7 +640,7 @@ static umf_result_t os_finalize(void *provider) { if (os_provider->nodeset_str_buf) { umf_ba_global_free(os_provider->nodeset_str_buf); } - hwloc_topology_destroy(os_provider->topo); + umf_ba_global_free(os_provider); return UMF_RESULT_SUCCESS; } diff --git a/src/topology.c b/src/topology.c index eab7992ce..da7666f87 100644 --- a/src/topology.c +++ b/src/topology.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -13,7 +13,9 @@ #include "utils_log.h" static hwloc_topology_t topology = NULL; +static hwloc_topology_t topology_reduced = NULL; static UTIL_ONCE_FLAG topology_initialized = UTIL_ONCE_FLAG_INIT; +static UTIL_ONCE_FLAG topology_reduced_initialized = UTIL_ONCE_FLAG_INIT; void umfDestroyTopology(void) { if (topology) { @@ -24,22 +26,55 @@ void umfDestroyTopology(void) { memcpy(&topology_initialized, &is_initialized, sizeof(topology_initialized)); } + if (topology_reduced) { + hwloc_topology_destroy(topology_reduced); + + // portable version of "topology_initialized = UTIL_ONCE_FLAG_INIT;" + static UTIL_ONCE_FLAG is_initialized = UTIL_ONCE_FLAG_INIT; + memcpy(&topology_reduced_initialized, &is_initialized, + sizeof(topology_reduced_initialized)); + } } -static void umfCreateTopology(void) { - if (hwloc_topology_init(&topology)) { +static void umfCreateTopologyHelper(bool reduced, + hwloc_topology_t *topology_ptr) { + if (hwloc_topology_init(topology_ptr)) { LOG_ERR("Failed to initialize topology"); - topology = NULL; + *topology_ptr = NULL; return; } - if (hwloc_topology_load(topology)) { + if (reduced) { + // Set the topology to only include NUMA nodes and memory + // to improve performance of the topology load on large systems + if (hwloc_topology_set_all_types_filter(*topology_ptr, + HWLOC_TYPE_FILTER_KEEP_NONE)) { + LOG_ERR("Failed to set topology filter"); + hwloc_topology_destroy(*topology_ptr); + *topology_ptr = NULL; + return; + } + } + if (hwloc_topology_load(*topology_ptr)) { LOG_ERR("Failed to initialize topology"); - hwloc_topology_destroy(topology); - topology = NULL; + hwloc_topology_destroy(*topology_ptr); + *topology_ptr = NULL; } } +static void umfCreateTopology(void) { + umfCreateTopologyHelper(false, &topology); +} + +static void umfCreateTopologyReduced(void) { + umfCreateTopologyHelper(true, &topology_reduced); +} + +hwloc_topology_t umfGetTopologyReduced(void) { + utils_init_once(&topology_reduced_initialized, umfCreateTopologyReduced); + return topology_reduced; +} + hwloc_topology_t umfGetTopology(void) { utils_init_once(&topology_initialized, umfCreateTopology); return topology; diff --git a/src/topology.h b/src/topology.h index c20defda7..5d5982a0c 100644 --- a/src/topology.h +++ b/src/topology.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -17,6 +17,7 @@ extern "C" { #endif hwloc_topology_t umfGetTopology(void); +hwloc_topology_t umfGetTopologyReduced(void); void umfDestroyTopology(void); #ifdef __cplusplus diff --git a/test/common/provider.hpp b/test/common/provider.hpp index c9206b5f6..b46c92305 100644 --- a/test/common/provider.hpp +++ b/test/common/provider.hpp @@ -66,23 +66,23 @@ auto wrapProviderUnique(umf_memory_provider_handle_t hProvider) { typedef struct provider_base_t { umf_result_t initialize() noexcept { return UMF_RESULT_SUCCESS; }; umf_result_t alloc(size_t, size_t, void **) noexcept { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t free([[maybe_unused]] void *ptr, [[maybe_unused]] size_t size) noexcept { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t get_last_native_error(const char **, int32_t *) noexcept { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t get_recommended_page_size([[maybe_unused]] size_t size, [[maybe_unused]] size_t *pageSize) noexcept { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t get_min_page_size([[maybe_unused]] const void *ptr, [[maybe_unused]] size_t *pageSize) noexcept { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t get_name(const char **name) noexcept { *name = "base"; @@ -90,45 +90,45 @@ typedef struct provider_base_t { } umf_result_t ext_purge_lazy([[maybe_unused]] void *ptr, [[maybe_unused]] size_t size) noexcept { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t ext_purge_force([[maybe_unused]] void *ptr, [[maybe_unused]] size_t size) noexcept { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t ext_allocation_merge([[maybe_unused]] void *lowPtr, [[maybe_unused]] void *highPtr, [[maybe_unused]] size_t totalSize) { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t ext_allocation_split([[maybe_unused]] void *ptr, [[maybe_unused]] size_t totalSize, [[maybe_unused]] size_t firstSize) { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t ext_get_ipc_handle_size([[maybe_unused]] size_t *size) noexcept { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t ext_get_ipc_handle([[maybe_unused]] const void *ptr, [[maybe_unused]] size_t size, [[maybe_unused]] void *providerIpcData) noexcept { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t ext_put_ipc_handle([[maybe_unused]] void *providerIpcData) noexcept { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t ext_open_ipc_handle([[maybe_unused]] void *providerIpcData, [[maybe_unused]] void **ptr) noexcept { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t ext_close_ipc_handle([[maybe_unused]] void *ptr, [[maybe_unused]] size_t size) noexcept { - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t ext_ctl([[maybe_unused]] umf_ctl_query_source_t source, diff --git a/test/pools/jemalloc_pool.cpp b/test/pools/jemalloc_pool.cpp index 61115cc71..c87b049c1 100644 --- a/test/pools/jemalloc_pool.cpp +++ b/test/pools/jemalloc_pool.cpp @@ -208,6 +208,25 @@ TEST_F(test, jemallocPoolName) { umfJemallocPoolParamsDestroy(params); } +TEST_F(test, jemallocProviderDoesNotSupportSplit) { + umf_jemalloc_pool_params_handle_t params = nullptr; + umf_result_t res = umfJemallocPoolParamsCreate(¶ms); + EXPECT_EQ(res, UMF_RESULT_SUCCESS); + + umf_memory_provider_handle_t ba_provider; + umf_result_t ret = + umfMemoryProviderCreate(&BA_GLOBAL_PROVIDER_OPS, nullptr, &ba_provider); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + umf_memory_pool_handle_t pool = nullptr; + res = umfPoolCreate(umfJemallocPoolOps(), ba_provider, params, 0, &pool); + EXPECT_EQ(res, UMF_RESULT_ERROR_NOT_SUPPORTED); + EXPECT_EQ(pool, nullptr); + + umfMemoryProviderDestroy(ba_provider); + umfJemallocPoolParamsDestroy(params); +} + TEST_F(test, jemallocPoolCustomName) { umf_jemalloc_pool_params_handle_t params = nullptr; umf_result_t res = umfJemallocPoolParamsCreate(¶ms);