Skip to content

Commit 273195f

Browse files
committed
x
1 parent 67e6263 commit 273195f

File tree

8 files changed

+285
-14
lines changed

8 files changed

+285
-14
lines changed

.github/workflows/pr_push.yml

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,15 @@ permissions:
1919
jobs:
2020
CodeChecks:
2121
uses: ./.github/workflows/reusable_checks.yml
22-
FastBuild:
23-
name: Fast builds
24-
needs: [CodeChecks]
25-
uses: ./.github/workflows/reusable_fast.yml
2622
Build:
2723
name: Basic builds
28-
needs: [FastBuild]
24+
2925
uses: ./.github/workflows/reusable_basic.yml
3026
DevDax:
31-
needs: [FastBuild]
27+
3228
uses: ./.github/workflows/reusable_dax.yml
3329
MultiNuma:
34-
needs: [FastBuild]
30+
3531
uses: ./.github/workflows/reusable_multi_numa.yml
3632
L0:
3733
needs: [Build]
@@ -56,10 +52,10 @@ jobs:
5652
runner: "CUDA"
5753
shared_lib: "['ON']"
5854
Sanitizers:
59-
needs: [FastBuild]
55+
6056
uses: ./.github/workflows/reusable_sanitizers.yml
6157
QEMU:
62-
needs: [FastBuild]
58+
6359
uses: ./.github/workflows/reusable_qemu.yml
6460
with:
6561
short_run: true

include/umf/providers/provider_os_memory.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ typedef enum umf_numa_mode_t {
4646
/// umf_numa_split_partition_t can be passed in umf_os_memory_provider_params_t structure
4747
/// to specify other distribution.
4848
UMF_NUMA_MODE_SPLIT,
49+
4950
/// The memory is allocated on the node of the CPU that triggered the
5051
/// allocation. If this mode is specified, nodemask must be NULL and
5152
/// maxnode must be 0.
@@ -58,6 +59,7 @@ typedef struct umf_numa_split_partition_t {
5859
/// The weight of the partition, representing the proportion of
5960
/// the allocation that should be assigned to this NUMA node.
6061
unsigned weight;
62+
6163
/// The NUMA node where the pages assigned to this partition will be bound.
6264
unsigned target;
6365
} umf_numa_split_partition_t;

src/memtargets/memtarget_numa.c

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
218218
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
219219
}
220220

221+
#if defined(_WIN32) || defined(__APPLE__)
222+
221223
hwloc_topology_t topology = umfGetTopology();
222224
if (!topology) {
223225
return UMF_RESULT_ERROR_NOT_SUPPORTED;
@@ -234,6 +236,44 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
234236
}
235237

236238
*capacity = numaNode->attr->numanode.local_memory;
239+
240+
#else // Linux
241+
242+
struct numa_memtarget_t *numaTarget = (struct numa_memtarget_t *)memTarget;
243+
unsigned node = numaTarget->physical_id;
244+
245+
char path[256];
246+
snprintf(path, sizeof(path), "/sys/devices/system/node/node%u/meminfo",
247+
node);
248+
FILE *file = fopen(path, "r");
249+
if (!file) {
250+
LOG_PDEBUG("Opening sysfs file %s failed", path);
251+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
252+
}
253+
254+
char line[256];
255+
size_t node_size = 0;
256+
while (fgets(line, sizeof(line), file)) {
257+
// search for the MemTotal line
258+
if (strncmp(line, "Node ", 5) == 0 &&
259+
sscanf(line, "Node %u MemTotal: %zu kB", &node, &node_size) == 2 &&
260+
node == numaTarget->physical_id) {
261+
// convert kB to bytes
262+
node_size *= 1024;
263+
break;
264+
}
265+
}
266+
fclose(file);
267+
268+
if (node_size == 0) {
269+
LOG_ERR("Failed to find MemTotal for node %u", numaTarget->physical_id);
270+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
271+
}
272+
273+
*capacity = (size_t)node_size;
274+
275+
#endif
276+
237277
return UMF_RESULT_SUCCESS;
238278
}
239279

@@ -254,9 +294,28 @@ static size_t memattr_get_worst_value(memattr_type_t type) {
254294
}
255295
}
256296

297+
#if !defined(_WIN32) && !defined(__APPLE__)
298+
299+
static size_t memattr_get_best_value(memattr_type_t type) {
300+
switch (type) {
301+
case MEMATTR_TYPE_BANDWIDTH:
302+
return SIZE_MAX;
303+
case MEMATTR_TYPE_LATENCY:
304+
return 0;
305+
default:
306+
assert(0); // Should not be reachable
307+
return 0;
308+
}
309+
}
310+
311+
#endif // !defined(_WIN32) && !defined(__APPLE__)
312+
257313
static umf_result_t query_attribute_value(void *srcMemoryTarget,
258314
void *dstMemoryTarget, size_t *value,
259315
memattr_type_t type) {
316+
317+
#if defined(_WIN32) || defined(__APPLE__)
318+
260319
hwloc_topology_t topology = umfGetTopology();
261320
if (!topology) {
262321
LOG_PERR("Retrieving cached topology failed");
@@ -315,6 +374,60 @@ static umf_result_t query_attribute_value(void *srcMemoryTarget,
315374

316375
*value = memAttrValue;
317376

377+
#else
378+
379+
struct numa_memtarget_t *srcNumaTarget =
380+
(struct numa_memtarget_t *)srcMemoryTarget;
381+
struct numa_memtarget_t *dstNumaTarget =
382+
(struct numa_memtarget_t *)dstMemoryTarget;
383+
384+
if (srcNumaTarget->physical_id == dstNumaTarget->physical_id) {
385+
// If both targets are the same, we return the best possible value.
386+
*value = memattr_get_best_value(type);
387+
return UMF_RESULT_SUCCESS;
388+
}
389+
390+
// For Linux, we use sysfs to query the bandwidth and latency.
391+
char path[256];
392+
if (type == MEMATTR_TYPE_BANDWIDTH) {
393+
snprintf(path, sizeof(path),
394+
"/sys/devices/system/node/node%u/node%u/memory_bandwidth",
395+
srcNumaTarget->physical_id, dstNumaTarget->physical_id);
396+
} else if (type == MEMATTR_TYPE_LATENCY) {
397+
snprintf(path, sizeof(path),
398+
"/sys/devices/system/node/node%u/node%u/memory_latency",
399+
srcNumaTarget->physical_id, dstNumaTarget->physical_id);
400+
} else {
401+
assert(0); // Shouldn't be reachable.
402+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
403+
}
404+
FILE *file = fopen(path, "r");
405+
if (!file) {
406+
LOG_PDEBUG("Opening sysfs file %s failed", path);
407+
*value = memattr_get_worst_value(type);
408+
return UMF_RESULT_SUCCESS;
409+
}
410+
411+
char line[64];
412+
if (!fgets(line, sizeof(line), file)) {
413+
LOG_PDEBUG("Reading sysfs file %s failed", path);
414+
fclose(file);
415+
*value = memattr_get_worst_value(type);
416+
return UMF_RESULT_SUCCESS;
417+
}
418+
fclose(file);
419+
char *endptr;
420+
long long val = strtoll(line, &endptr, 10);
421+
if (endptr == line || *endptr != '\n' || val < 0) {
422+
LOG_PDEBUG("Parsing sysfs file %s failed", path);
423+
*value = memattr_get_worst_value(type);
424+
return UMF_RESULT_SUCCESS;
425+
}
426+
427+
*value = (size_t)val;
428+
429+
#endif // _WIN32 || _APPLE_
430+
318431
return UMF_RESULT_SUCCESS;
319432
}
320433

src/provider/provider_os_memory.c

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,16 @@
88
#include <assert.h>
99
#include <errno.h>
1010
#include <limits.h>
11-
1211
#include <stddef.h>
1312
#include <stdio.h>
1413
#include <stdlib.h>
1514
#include <string.h>
15+
16+
#if !defined(_WIN32) && !defined(__APPLE__)
17+
#include <numaif.h>
18+
#include <sys/syscall.h>
19+
#endif
20+
1621
#include <umf.h>
1722
#include <umf/base.h>
1823
#include <umf/memory_provider.h>
@@ -24,6 +29,7 @@
2429
#include "ctl/ctl_internal.h"
2530
#include "libumf.h"
2631
#include "provider_os_memory_internal.h"
32+
#include "topology.h"
2733
#include "utils_assert.h"
2834
#include "utils_common.h"
2935
#include "utils_concurrency.h"
@@ -32,8 +38,8 @@
3238
#define CTL_PROVIDER_TYPE os_memory_provider_t
3339
#include "provider_ctl_stats_impl.h"
3440

41+
#define MAX_NUMNODES 1024
3542
#define NODESET_STR_BUF_LEN 1024
36-
3743
#define TLS_MSG_BUF_LEN 1024
3844

3945
static const char *DEFAULT_NAME = "OS";
@@ -152,8 +158,14 @@ static umf_result_t initialize_nodeset(os_memory_provider_t *os_provider,
152158
// Hwloc_set_area_membind fails if empty nodeset is passed so
153159
// if no node is specified, just pass all available nodes.
154160
// For modes where no node is needed, they will be ignored anyway.
161+
162+
#if defined(_WIN32) || defined(__APPLE__)
155163
out_nodeset[0] = hwloc_bitmap_dup(
156164
hwloc_topology_get_complete_nodeset(os_provider->topo));
165+
#else
166+
out_nodeset[0] = hwloc_bitmap_dup(umfGetTopology2());
167+
#endif
168+
157169
if (!out_nodeset[0]) {
158170
goto err_free_list;
159171
}
@@ -518,6 +530,11 @@ translate_params(const umf_os_memory_provider_params_t *in_params,
518530

519531
provider->numa_flags =
520532
getHwlocMembindFlags(in_params->numa_mode, is_dedicated_node_bind);
533+
534+
#if !defined(_WIN32) && !defined(__APPLE__)
535+
provider->dedicated = is_dedicated_node_bind;
536+
#endif
537+
521538
provider->mode = in_params->numa_mode;
522539
provider->part_size = in_params->part_size;
523540

@@ -561,6 +578,11 @@ static umf_result_t os_initialize(const void *params, void **provider) {
561578
snprintf(os_provider->name, sizeof(os_provider->name), "%s",
562579
in_params->name);
563580

581+
#if defined(_WIN32) || defined(__APPLE__)
582+
583+
//struct timespec ts_init_start, ts_init_end;
584+
//clock_gettime(CLOCK_MONOTONIC, &ts_init_start);
585+
564586
int r = hwloc_topology_init(&os_provider->topo);
565587
if (r) {
566588
LOG_ERR("HWLOC topology init failed");
@@ -577,6 +599,13 @@ static umf_result_t os_initialize(const void *params, void **provider) {
577599
goto err_destroy_hwloc_topology;
578600
}
579601

602+
//clock_gettime(CLOCK_MONOTONIC, &ts_init_end);
603+
//LOG_FATAL("HWLOC topology initialized in %ld.%09ld seconds",
604+
// ts_init_end.tv_sec - ts_init_start.tv_sec,
605+
// ts_init_end.tv_nsec - ts_init_start.tv_nsec);
606+
607+
#endif // _WIN32
608+
580609
os_provider->fd_offset_map = critnib_new(NULL, NULL);
581610
if (!os_provider->fd_offset_map) {
582611
LOG_ERR("creating file descriptor offset map failed");
@@ -625,8 +654,11 @@ static umf_result_t os_initialize(const void *params, void **provider) {
625654
err_destroy_critnib:
626655
critnib_delete(os_provider->fd_offset_map);
627656
err_destroy_hwloc_topology:
657+
658+
#if defined(_WIN32) || defined(__APPLE__)
628659
hwloc_topology_destroy(os_provider->topo);
629660
err_free_os_provider:
661+
#endif
630662
umf_ba_global_free(os_provider);
631663
return ret;
632664
}
@@ -649,7 +681,10 @@ static umf_result_t os_finalize(void *provider) {
649681
if (os_provider->nodeset_str_buf) {
650682
umf_ba_global_free(os_provider->nodeset_str_buf);
651683
}
684+
685+
#if defined(_WIN32) || defined(__APPLE__)
652686
hwloc_topology_destroy(os_provider->topo);
687+
#endif
653688
umf_ba_global_free(os_provider);
654689
return UMF_RESULT_SUCCESS;
655690
}
@@ -1012,10 +1047,52 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment,
10121047

10131048
do {
10141049
errno = 0;
1050+
ret = 0;
1051+
1052+
#if defined(_WIN32) || defined(__APPLE__)
10151053
ret = hwloc_set_area_membind(os_provider->topo, membind.addr,
10161054
membind.bind_size, membind.bitmap,
10171055
os_provider->numa_policy,
10181056
os_provider->numa_flags);
1057+
#else // !_WIN32 && !_APPLE__
1058+
1059+
// NOTE: could we done this
1060+
1061+
// on Linux, use mbind syscall directly instead of hwloc
1062+
unsigned long nodemask = 0;
1063+
int maxnode = 8 * sizeof(nodemask); // up to 64 nodes
1064+
if (membind.bitmap) {
1065+
for (int i = 0; i < maxnode; ++i) {
1066+
if (hwloc_bitmap_isset(membind.bitmap, i)) {
1067+
nodemask |= (1UL << i);
1068+
}
1069+
}
1070+
}
1071+
1072+
int mbind_mode = MPOL_DEFAULT;
1073+
if (os_provider->mode == UMF_NUMA_MODE_INTERLEAVE &&
1074+
os_provider->dedicated == 0) {
1075+
mbind_mode = MPOL_INTERLEAVE;
1076+
} else if (os_provider->mode == UMF_NUMA_MODE_SPLIT) {
1077+
mbind_mode = MPOL_BIND;
1078+
} else if (os_provider->mode == UMF_NUMA_MODE_LOCAL) {
1079+
mbind_mode = MPOL_LOCAL;
1080+
nodemask = 0;
1081+
} else if (os_provider->mode == UMF_NUMA_MODE_PREFERRED) {
1082+
mbind_mode = MPOL_BIND;
1083+
} else if (os_provider->mode == UMF_NUMA_MODE_BIND ||
1084+
os_provider->dedicated) {
1085+
mbind_mode = MPOL_BIND;
1086+
}
1087+
1088+
unsigned long mbind_flags = 0;
1089+
if (os_provider->dedicated) {
1090+
mbind_flags |= MPOL_MF_STRICT;
1091+
}
1092+
1093+
ret = syscall(__NR_mbind, membind.addr, membind.bind_size,
1094+
mbind_mode, &nodemask, maxnode, mbind_flags);
1095+
#endif // !_WIN32 && !_APPLE__
10191096

10201097
if (ret) {
10211098
os_store_last_native_error(UMF_OS_RESULT_ERROR_BIND_FAILED,

src/provider/provider_os_memory_internal.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,13 @@ typedef struct os_memory_provider_t {
6868
unsigned partitions_len;
6969
size_t partitions_weight_sum;
7070

71+
#if defined(_WIN32) || defined(__APPLE__)
7172
hwloc_topology_t topo;
73+
#else
74+
// NOTE: on linux we don't want to use hwloc_topology_t directly because
75+
// of its long initialization time
76+
int dedicated;
77+
#endif
7278

7379
char name[64];
7480

0 commit comments

Comments
 (0)