Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 5 additions & 9 deletions .github/workflows/pr_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,15 @@ permissions:
jobs:
CodeChecks:
uses: ./.github/workflows/reusable_checks.yml
FastBuild:
name: Fast builds
needs: [CodeChecks]
uses: ./.github/workflows/reusable_fast.yml
Build:
name: Basic builds
needs: [FastBuild]

uses: ./.github/workflows/reusable_basic.yml
DevDax:
needs: [FastBuild]

uses: ./.github/workflows/reusable_dax.yml
MultiNuma:
needs: [FastBuild]

uses: ./.github/workflows/reusable_multi_numa.yml
L0:
needs: [Build]
Expand All @@ -56,10 +52,10 @@ jobs:
runner: "CUDA"
shared_lib: "['ON']"
Sanitizers:
needs: [FastBuild]

uses: ./.github/workflows/reusable_sanitizers.yml
QEMU:
needs: [FastBuild]

uses: ./.github/workflows/reusable_qemu.yml
with:
short_run: true
Expand Down
2 changes: 2 additions & 0 deletions include/umf/providers/provider_os_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ typedef enum umf_numa_mode_t {
/// umf_numa_split_partition_t can be passed in umf_os_memory_provider_params_t structure
/// to specify other distribution.
UMF_NUMA_MODE_SPLIT,

/// The memory is allocated on the node of the CPU that triggered the
/// allocation. If this mode is specified, nodemask must be NULL and
/// maxnode must be 0.
Expand All @@ -58,6 +59,7 @@ typedef struct umf_numa_split_partition_t {
/// The weight of the partition, representing the proportion of
/// the allocation that should be assigned to this NUMA node.
unsigned weight;

/// The NUMA node where the pages assigned to this partition will be bound.
unsigned target;
} umf_numa_split_partition_t;
Expand Down
113 changes: 113 additions & 0 deletions src/memtargets/memtarget_numa.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

#if defined(_WIN32) || defined(__APPLE__)

hwloc_topology_t topology = umfGetTopology();
if (!topology) {
return UMF_RESULT_ERROR_NOT_SUPPORTED;
Expand All @@ -234,6 +236,44 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
}

*capacity = numaNode->attr->numanode.local_memory;

#else // Linux

struct numa_memtarget_t *numaTarget = (struct numa_memtarget_t *)memTarget;
unsigned node = numaTarget->physical_id;

char path[256];
snprintf(path, sizeof(path), "/sys/devices/system/node/node%u/meminfo",
node);
FILE *file = fopen(path, "r");
if (!file) {
LOG_PDEBUG("Opening sysfs file %s failed", path);
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

char line[256];
size_t node_size = 0;
while (fgets(line, sizeof(line), file)) {
// search for the MemTotal line
if (strncmp(line, "Node ", 5) == 0 &&
sscanf(line, "Node %u MemTotal: %zu kB", &node, &node_size) == 2 &&
node == numaTarget->physical_id) {
// convert kB to bytes
node_size *= 1024;
break;
}
}
fclose(file);

if (node_size == 0) {
LOG_ERR("Failed to find MemTotal for node %u", numaTarget->physical_id);
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

*capacity = (size_t)node_size;

#endif

return UMF_RESULT_SUCCESS;
}

Expand All @@ -254,9 +294,28 @@ static size_t memattr_get_worst_value(memattr_type_t type) {
}
}

#if !defined(_WIN32) && !defined(__APPLE__)

static size_t memattr_get_best_value(memattr_type_t type) {
switch (type) {
case MEMATTR_TYPE_BANDWIDTH:
return SIZE_MAX;
case MEMATTR_TYPE_LATENCY:
return 0;
default:
assert(0); // Should not be reachable
return 0;
}
}

#endif // !defined(_WIN32) && !defined(__APPLE__)

static umf_result_t query_attribute_value(void *srcMemoryTarget,
void *dstMemoryTarget, size_t *value,
memattr_type_t type) {

#if defined(_WIN32) || defined(__APPLE__)

hwloc_topology_t topology = umfGetTopology();
if (!topology) {
LOG_PERR("Retrieving cached topology failed");
Expand Down Expand Up @@ -315,6 +374,60 @@ static umf_result_t query_attribute_value(void *srcMemoryTarget,

*value = memAttrValue;

#else

struct numa_memtarget_t *srcNumaTarget =
(struct numa_memtarget_t *)srcMemoryTarget;
struct numa_memtarget_t *dstNumaTarget =
(struct numa_memtarget_t *)dstMemoryTarget;

if (srcNumaTarget->physical_id == dstNumaTarget->physical_id) {
// If both targets are the same, we return the best possible value.
*value = memattr_get_best_value(type);
return UMF_RESULT_SUCCESS;
}

// For Linux, we use sysfs to query the bandwidth and latency.
char path[256];
if (type == MEMATTR_TYPE_BANDWIDTH) {
snprintf(path, sizeof(path),
"/sys/devices/system/node/node%u/node%u/memory_bandwidth",
srcNumaTarget->physical_id, dstNumaTarget->physical_id);
} else if (type == MEMATTR_TYPE_LATENCY) {
snprintf(path, sizeof(path),
"/sys/devices/system/node/node%u/node%u/memory_latency",
srcNumaTarget->physical_id, dstNumaTarget->physical_id);
} else {
assert(0); // Shouldn't be reachable.
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}
FILE *file = fopen(path, "r");
if (!file) {
LOG_PDEBUG("Opening sysfs file %s failed", path);
*value = memattr_get_worst_value(type);
return UMF_RESULT_SUCCESS;
}

char line[64];
if (!fgets(line, sizeof(line), file)) {
LOG_PDEBUG("Reading sysfs file %s failed", path);
fclose(file);
*value = memattr_get_worst_value(type);
return UMF_RESULT_SUCCESS;
}
fclose(file);
char *endptr;
long long val = strtoll(line, &endptr, 10);
if (endptr == line || *endptr != '\n' || val < 0) {
LOG_PDEBUG("Parsing sysfs file %s failed", path);
*value = memattr_get_worst_value(type);
return UMF_RESULT_SUCCESS;
}

*value = (size_t)val;

#endif // _WIN32 || _APPLE_

return UMF_RESULT_SUCCESS;
}

Expand Down
81 changes: 79 additions & 2 deletions src/provider/provider_os_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,16 @@
#include <assert.h>
#include <errno.h>
#include <limits.h>

#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if !defined(_WIN32) && !defined(__APPLE__)
#include <numaif.h>
#include <sys/syscall.h>
#endif

#include <umf.h>
#include <umf/base.h>
#include <umf/memory_provider.h>
Expand All @@ -24,6 +29,7 @@
#include "ctl/ctl_internal.h"
#include "libumf.h"
#include "provider_os_memory_internal.h"
#include "topology.h"
#include "utils_assert.h"
#include "utils_common.h"
#include "utils_concurrency.h"
Expand All @@ -32,8 +38,8 @@
#define CTL_PROVIDER_TYPE os_memory_provider_t
#include "provider_ctl_stats_impl.h"

#define MAX_NUMNODES 1024
#define NODESET_STR_BUF_LEN 1024

#define TLS_MSG_BUF_LEN 1024

static const char *DEFAULT_NAME = "OS";
Expand Down Expand Up @@ -152,8 +158,14 @@ static umf_result_t initialize_nodeset(os_memory_provider_t *os_provider,
// Hwloc_set_area_membind fails if empty nodeset is passed so
// if no node is specified, just pass all available nodes.
// For modes where no node is needed, they will be ignored anyway.

#if defined(_WIN32) || defined(__APPLE__)
out_nodeset[0] = hwloc_bitmap_dup(
hwloc_topology_get_complete_nodeset(os_provider->topo));
#else
out_nodeset[0] = hwloc_bitmap_dup(umfGetTopology2());
#endif

if (!out_nodeset[0]) {
goto err_free_list;
}
Expand Down Expand Up @@ -518,6 +530,11 @@ translate_params(const umf_os_memory_provider_params_t *in_params,

provider->numa_flags =
getHwlocMembindFlags(in_params->numa_mode, is_dedicated_node_bind);

#if !defined(_WIN32) && !defined(__APPLE__)
provider->dedicated = is_dedicated_node_bind;
#endif

provider->mode = in_params->numa_mode;
provider->part_size = in_params->part_size;

Expand Down Expand Up @@ -561,6 +578,11 @@ static umf_result_t os_initialize(const void *params, void **provider) {
snprintf(os_provider->name, sizeof(os_provider->name), "%s",
in_params->name);

#if defined(_WIN32) || defined(__APPLE__)

//struct timespec ts_init_start, ts_init_end;
//clock_gettime(CLOCK_MONOTONIC, &ts_init_start);

int r = hwloc_topology_init(&os_provider->topo);
if (r) {
LOG_ERR("HWLOC topology init failed");
Expand All @@ -577,6 +599,13 @@ static umf_result_t os_initialize(const void *params, void **provider) {
goto err_destroy_hwloc_topology;
}

//clock_gettime(CLOCK_MONOTONIC, &ts_init_end);
//LOG_FATAL("HWLOC topology initialized in %ld.%09ld seconds",
// ts_init_end.tv_sec - ts_init_start.tv_sec,
// ts_init_end.tv_nsec - ts_init_start.tv_nsec);

#endif // _WIN32

os_provider->fd_offset_map = critnib_new(NULL, NULL);
if (!os_provider->fd_offset_map) {
LOG_ERR("creating file descriptor offset map failed");
Expand Down Expand Up @@ -625,8 +654,11 @@ static umf_result_t os_initialize(const void *params, void **provider) {
err_destroy_critnib:
critnib_delete(os_provider->fd_offset_map);
err_destroy_hwloc_topology:

#if defined(_WIN32) || defined(__APPLE__)
hwloc_topology_destroy(os_provider->topo);
err_free_os_provider:
#endif
umf_ba_global_free(os_provider);
return ret;
}
Expand All @@ -649,7 +681,10 @@ static umf_result_t os_finalize(void *provider) {
if (os_provider->nodeset_str_buf) {
umf_ba_global_free(os_provider->nodeset_str_buf);
}

#if defined(_WIN32) || defined(__APPLE__)
hwloc_topology_destroy(os_provider->topo);
#endif
umf_ba_global_free(os_provider);
return UMF_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -1012,10 +1047,52 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment,

do {
errno = 0;
ret = 0;

#if defined(_WIN32) || defined(__APPLE__)
ret = hwloc_set_area_membind(os_provider->topo, membind.addr,
membind.bind_size, membind.bitmap,
os_provider->numa_policy,
os_provider->numa_flags);
#else // !_WIN32 && !_APPLE__

// NOTE: could we done this

// on Linux, use mbind syscall directly instead of hwloc
unsigned long nodemask = 0;
int maxnode = 8 * sizeof(nodemask); // up to 64 nodes
if (membind.bitmap) {
for (int i = 0; i < maxnode; ++i) {
if (hwloc_bitmap_isset(membind.bitmap, i)) {
nodemask |= (1UL << i);
}
}
}

int mbind_mode = MPOL_DEFAULT;
if (os_provider->mode == UMF_NUMA_MODE_INTERLEAVE &&
os_provider->dedicated == 0) {
mbind_mode = MPOL_INTERLEAVE;
} else if (os_provider->mode == UMF_NUMA_MODE_SPLIT) {
mbind_mode = MPOL_BIND;
} else if (os_provider->mode == UMF_NUMA_MODE_LOCAL) {
mbind_mode = MPOL_LOCAL;
nodemask = 0;
} else if (os_provider->mode == UMF_NUMA_MODE_PREFERRED) {
mbind_mode = MPOL_BIND;
} else if (os_provider->mode == UMF_NUMA_MODE_BIND ||
os_provider->dedicated) {
mbind_mode = MPOL_BIND;
}

unsigned long mbind_flags = 0;
if (os_provider->dedicated) {
mbind_flags |= MPOL_MF_STRICT;
}

ret = syscall(__NR_mbind, membind.addr, membind.bind_size,
mbind_mode, &nodemask, maxnode, mbind_flags);
#endif // !_WIN32 && !_APPLE__

if (ret) {
os_store_last_native_error(UMF_OS_RESULT_ERROR_BIND_FAILED,
Expand Down
6 changes: 6 additions & 0 deletions src/provider/provider_os_memory_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,13 @@ typedef struct os_memory_provider_t {
unsigned partitions_len;
size_t partitions_weight_sum;

#if defined(_WIN32) || defined(__APPLE__)
hwloc_topology_t topo;
#else
// NOTE: on linux we don't want to use hwloc_topology_t directly because
// of its long initialization time
int dedicated;
#endif

char name[64];

Expand Down
Loading
Loading