Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ add_library(
src/shuffler/postbox.cpp
src/shuffler/shuffler.cpp
src/statistics.cpp
src/system_info.cpp
src/topology_discovery.cpp
src/utils.cpp
)
Expand Down
12 changes: 1 addition & 11 deletions cpp/include/rapidsmpf/bootstrap/utils.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
* SPDX-License-Identifier: Apache-2.0
*/

Expand All @@ -24,16 +24,6 @@ namespace rapidsmpf::bootstrap {
*/
std::string get_current_cpu_affinity();

/**
* @brief Get current NUMA node(s) for memory binding.
*
* Queries the NUMA node associated with the CPU the current process is running on.
* This is a best-effort approach and may not be accurate in all cases.
*
* @return Vector of NUMA node IDs. Empty if NUMA is not available or detection fails.
*/
std::vector<int> get_current_numa_nodes();

/**
* @brief Get UCX_NET_DEVICES from environment.
*
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/rapidsmpf/memory/pinned_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#include <rapidsmpf/error.hpp>
#include <rapidsmpf/memory/host_memory_resource.hpp>
#include <rapidsmpf/memory/pinned_memory_resource.hpp>
#include <rapidsmpf/system_info.hpp>
#include <rapidsmpf/utils.hpp>


Expand Down Expand Up @@ -87,7 +87,7 @@ class PinnedMemoryResource final : public HostMemoryResource {
* @throws rapidsmpf::cuda_error If pinned host memory pools are not supported by
* the current CUDA version or if CUDA initialization fails.
*/
PinnedMemoryResource(int numa_id = get_current_numa_node_id());
PinnedMemoryResource(int numa_id = get_current_numa_node());

/**
* @brief Create a pinned memory resource if the system supports pinned memory.
Expand All @@ -101,7 +101,7 @@ class PinnedMemoryResource final : public HostMemoryResource {
* @see PinnedMemoryResource::PinnedMemoryResource
*/
static std::shared_ptr<PinnedMemoryResource> make_if_available(
int numa_id = get_current_numa_node_id()
int numa_id = get_current_numa_node()
);

~PinnedMemoryResource() override;
Expand Down
83 changes: 83 additions & 0 deletions cpp/include/rapidsmpf/system_info.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/**
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once

#include <cstdint>
#include <vector>

namespace rapidsmpf {

/**
* @brief Get the total amount of system memory.
*
* @return Total host memory in bytes.
*
* @note On WSL and in containerized environments, the returned value
* reflects the memory visible to the Linux kernel instance, which may
* differ from the physical memory of the host.
*
* @note Terminates the process if `sysconf(_SC_PAGE_SIZE)` or
* `sysconf(_SC_PHYS_PAGES)` fails.
*/
std::uint64_t get_total_host_memory() noexcept;

/**
* @brief Get the NUMA node ID associated with the calling CPU thread.
*
* A NUMA (Non-Uniform Memory Access) node represents a group of CPU cores and
* memory that have faster access to each other than to memory attached to
* other nodes. On NUMA systems, binding allocations and threads to the same
* NUMA node can significantly reduce memory access latency and improve
* bandwidth.
*
* This function returns the NUMA node on which the calling thread is currently
* executing, as determined by the operating system's CPU and memory topology.
* The value can change if the thread migrates between CPUs.
*
* If NUMA support is not available on the system or cannot be queried, the
* function returns 0, which corresponds to the single implicit NUMA node on
* non-NUMA systems.
*
* @return The NUMA node ID of the calling thread, or 0 if NUMA is unavailable.
*/
int get_current_numa_node() noexcept;

/**
* @brief Get current NUMA node(s) for memory binding.
*
* Queries the NUMA node associated with the CPU on which the calling thread is
* currently executing. This is a best-effort approach and may not be accurate
* in all cases.
*
* Since processes are typically scheduled on CPUs that are local to their
* memory, using the CPU's NUMA node (via `numa_node_of_cpu`) provides a
* reasonable approximation that works well in practice for topology-aware
* binding scenarios. This intentionally avoids querying the process memory
* binding policy programmatically.
*
* If NUMA support is not available or the NUMA node cannot be determined, the
* function returns a vector containing a single element, `0`, which corresponds
* to the single implicit NUMA node on non-NUMA systems.
*
* @return Vector of NUMA node IDs associated with the calling thread.
*/
std::vector<int> get_current_numa_nodes() noexcept;

/**
* @brief Get the total amount of host memory for a NUMA node.
*
* @param numa_id
* NUMA node for which to query the total host memory. Defaults to the
* current NUMA node as returned by `get_current_numa_node()`.
*
* @note If NUMA support is not available or the node size cannot be
* determined, this function falls back to returning the total host memory.
*
* @return Total host memory of the NUMA node in bytes.
*/
std::uint64_t get_numa_node_host_memory(int numa_id = get_current_numa_node()) noexcept;


} // namespace rapidsmpf
29 changes: 8 additions & 21 deletions cpp/include/rapidsmpf/utils.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
Expand All @@ -17,11 +17,17 @@
#include <utility>
#include <vector>

#include <cuda.h>
#include <cuda_runtime_api.h>

namespace rapidsmpf {

/** @brief Helper macro to check if the CUDA version is at least the specified version.
*
* @param version The minimum CUDA version to check against. Must be in the format of
* MAJOR*1000 + MINOR*10.
*/
#define RAPIDSMPF_CUDA_VERSION_AT_LEAST(version) (CUDART_VERSION >= version)

/// Alias for high-resolution clock from the chrono library.
using Clock = std::chrono::high_resolution_clock;
/// Alias for a duration type representing time in seconds as a double.
Expand Down Expand Up @@ -382,25 +388,6 @@ struct overloaded : Ts... {
using Ts::operator()...;
};

/// @brief Helper macro to check if the CUDA version is at least the specified version.
/// @param version The minimum CUDA version to check against. Must be in the format of
/// MAJOR*1000 + MINOR*10.
#define RAPIDSMPF_CUDA_VERSION_AT_LEAST(version) (CUDART_VERSION >= version)


/**
* @brief Gets the NUMA node ID of the current CPU process.
*
* @note This function is only available if built with NUMA support. (See
* `RAPIDSMPF_NUMA_SUPPORT` CMake option.)
*
* @return The NUMA node ID of the current CPU process.
*
* @throws std::runtime_error If built with NUMA support but libnuma is not available
* at runtime or if the NUMA node ID cannot be retrieved.
*/
int get_current_numa_node_id();

/**
* @brief Backport of `std::ranges::contains` from C++23 for C++20.
*
Expand Down
4 changes: 1 addition & 3 deletions cpp/src/bootstrap/bootstrap.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
/**
* SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
* SPDX-License-Identifier: Apache-2.0
*/

#include <chrono>
#include <cstdlib>
#include <sstream>
#include <stdexcept>
#include <string_view>

Expand Down
25 changes: 1 addition & 24 deletions cpp/src/bootstrap/utils.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -72,29 +72,6 @@ std::string get_current_cpu_affinity() {
return oss.str();
}

std::vector<int> get_current_numa_nodes() {
std::vector<int> numa_nodes;
#if RAPIDSMPF_HAVE_NUMA
if (numa_available() == -1) {
return numa_nodes;
}

// Since processes are typically bound to CPUs on the same NUMA node as their memory,
// using the CPU's NUMA node (via numa_node_of_cpu) is a reasonable approximation
// that works well in practice for topology-aware binding scenarios, thus
// intentionally avoiding the need to get the memory binding policy programmatically
// for now.
int cpu = sched_getcpu();
if (cpu >= 0) {
int numa_node = numa_node_of_cpu(cpu);
if (numa_node >= 0) {
numa_nodes.push_back(numa_node);
}
}
#endif
return numa_nodes;
}

std::string get_ucx_net_devices() {
char* env = std::getenv("UCX_NET_DEVICES");
return env ? std::string(env) : std::string();
Expand Down
86 changes: 86 additions & 0 deletions cpp/src/system_info.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/**
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES.
* SPDX-License-Identifier: Apache-2.0
*/


#include <exception>
#include <iostream>

#include <sched.h>
#include <unistd.h>

#include <rapidsmpf/system_info.hpp>


#if RAPIDSMPF_HAVE_NUMA
#include <numa.h>
#endif

namespace rapidsmpf {

std::uint64_t get_total_host_memory() noexcept {
static const uint64_t ret = [] {
auto const page_size = ::sysconf(_SC_PAGE_SIZE);
auto const phys_pages = ::sysconf(_SC_PHYS_PAGES);

if (page_size == -1 || phys_pages == -1) {
std::cerr << "get_total_host_memory() - fatal error: "
<< "sysconf(_SC_PAGE_SIZE/_SC_PHYS_PAGES) failed" << std::endl;
std::terminate();
}
return static_cast<std::uint64_t>(page_size)
* static_cast<std::uint64_t>(phys_pages);
}();
return ret;
}

int get_current_numa_node() noexcept {
#if RAPIDSMPF_HAVE_NUMA
static const int ret = [] {
if (numa_available() == -1) {
return 0;
}
return numa_node_of_cpu(sched_getcpu());
}();
return ret;
#else
return 0;
#endif
}

std::vector<int> get_current_numa_nodes() noexcept {
std::vector<int> ret;
#if RAPIDSMPF_HAVE_NUMA
int const cpu = ::sched_getcpu();
if (numa_available() != -1 && cpu >= 0) {
int numa_node = numa_node_of_cpu(cpu);
if (numa_node >= 0) {
ret.push_back(numa_node);
}
}
#endif
if (ret.empty()) {
return {0};
}
return ret;
}

std::uint64_t get_numa_node_host_memory([[maybe_unused]] int numa_id) noexcept {
long long ret = -1;

#if RAPIDSMPF_HAVE_NUMA
if (numa_available() == -1) {
return get_total_host_memory();
}
long long ignored = 0;
ret = numa_node_size64(numa_id, &ignored);
#endif

if (ret == -1) {
return get_total_host_memory();
}
return static_cast<std::uint64_t>(ret);
}

} // namespace rapidsmpf
27 changes: 2 additions & 25 deletions cpp/src/utils.cpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
/**
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES.
* SPDX-License-Identifier: Apache-2.0
*/

#include <algorithm>

#if RAPIDSMPF_HAVE_NUMA
#include <numa.h>
#include <sched.h>
#endif
#include <ranges>

#include <rapidsmpf/error.hpp>
#include <rapidsmpf/utils.hpp>
Expand Down Expand Up @@ -75,23 +71,4 @@ bool parse_string(std::string const& value) {
throw std::invalid_argument("cannot parse \"" + std::string{value} + "\"");
}

int get_current_numa_node_id() {
#if RAPIDSMPF_HAVE_NUMA
static const int numa_node_id = [] {
RAPIDSMPF_EXPECTS(
numa_available() != -1, "NUMA is not available", std::runtime_error
);
int cpu = sched_getcpu();
int numa_node = numa_node_of_cpu(cpu);
RAPIDSMPF_EXPECTS(
numa_node >= 0, "failed to get NUMA node ID", std::runtime_error
);
return numa_node;
}();
return numa_node_id;
#else
return 0;
#endif
}

} // namespace rapidsmpf
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ target_sources(
test_shuffler.cpp
test_spill_manager.cpp
test_statistics.cpp
test_system_utils.cpp
test_utils.cpp
)

Expand Down
Loading