Skip to content

Commit e8d1ee0

Browse files
committed
x
1 parent 3c015d4 commit e8d1ee0

File tree

6 files changed

+157
-27
lines changed

6 files changed

+157
-27
lines changed

include/umf/providers/provider_os_memory.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ typedef enum umf_numa_mode_t {
4646
/// umf_numa_split_partition_t can be passed in umf_os_memory_provider_params_t structure
4747
/// to specify other distribution.
4848
UMF_NUMA_MODE_SPLIT,
49+
4950
/// The memory is allocated on the node of the CPU that triggered the
5051
/// allocation. If this mode is specified, nodemask must be NULL and
5152
/// maxnode must be 0.

src/libumf.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ umf_result_t umfInit(void) {
4444
utils_mutex_lock(&initMutex);
4545

4646
if (umfRefCount == 0) {
47+
LOG_FATAL("umfInit");
48+
4749
utils_log_init();
4850
umf_result_t umf_result = umfMemoryTrackerCreate(&TRACKER);
4951
if (umf_result != UMF_RESULT_SUCCESS) {

src/provider/provider_os_memory.c

Lines changed: 119 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@
88
#include <assert.h>
99
#include <errno.h>
1010
#include <limits.h>
11-
11+
#include <numaif.h>
1212
#include <stddef.h>
1313
#include <stdio.h>
1414
#include <stdlib.h>
1515
#include <string.h>
16+
#include <sys/syscall.h>
17+
1618
#include <umf.h>
1719
#include <umf/base.h>
1820
#include <umf/memory_provider.h>
@@ -32,8 +34,8 @@
3234
#define CTL_PROVIDER_TYPE os_memory_provider_t
3335
#include "provider_ctl_stats_impl.h"
3436

37+
#define MAX_NUMNODES 1024
3538
#define NODESET_STR_BUF_LEN 1024
36-
3739
#define TLS_MSG_BUF_LEN 1024
3840

3941
typedef struct umf_os_memory_provider_params_t {
@@ -149,11 +151,47 @@ static umf_result_t initialize_nodeset(os_memory_provider_t *os_provider,
149151
// Hwloc_set_area_membind fails if empty nodeset is passed so
150152
// if no node is specified, just pass all available nodes.
151153
// For modes where no node is needed, they will be ignored anyway.
152-
out_nodeset[0] = hwloc_bitmap_dup(
153-
hwloc_topology_get_complete_nodeset(os_provider->topo));
154+
155+
size_t *nodes = umf_ba_global_alloc(sizeof(size_t) * MAX_NUMNODES);
156+
if (!nodes) {
157+
umf_ba_global_free(out_nodeset);
158+
os_provider->nodeset_len = 0;
159+
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
160+
}
161+
162+
size_t num = 0;
163+
int ret = utils_get_complete_nodeset(nodes, MAX_NUMNODES, &num);
164+
if (ret < 0) {
165+
umf_ba_global_free(out_nodeset);
166+
os_provider->nodeset_len = 0;
167+
umf_ba_global_free(nodes);
168+
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
169+
}
170+
171+
hwloc_bitmap_t complete_nodeset = hwloc_bitmap_alloc();
172+
if (!complete_nodeset) {
173+
umf_ba_global_free(out_nodeset);
174+
os_provider->nodeset_len = 0;
175+
umf_ba_global_free(nodes);
176+
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
177+
}
178+
179+
for (size_t i = 0; i < num; i++) {
180+
if (hwloc_bitmap_set(complete_nodeset, (int)nodes[i])) {
181+
umf_ba_global_free(out_nodeset);
182+
os_provider->nodeset_len = 0;
183+
hwloc_bitmap_free(complete_nodeset);
184+
umf_ba_global_free(nodes);
185+
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
186+
}
187+
}
188+
umf_ba_global_free(nodes);
189+
190+
out_nodeset[0] = hwloc_bitmap_dup(complete_nodeset);
154191
if (!out_nodeset[0]) {
155192
goto err_free_list;
156193
}
194+
hwloc_bitmap_free(complete_nodeset);
157195
return UMF_RESULT_SUCCESS;
158196
}
159197

@@ -515,6 +553,11 @@ translate_params(const umf_os_memory_provider_params_t *in_params,
515553

516554
provider->numa_flags =
517555
getHwlocMembindFlags(in_params->numa_mode, is_dedicated_node_bind);
556+
557+
//
558+
provider->mbind_mode = in_params->numa_mode;
559+
provider->dedicated = is_dedicated_node_bind;
560+
518561
provider->mode = in_params->numa_mode;
519562
provider->part_size = in_params->part_size;
520563

@@ -556,21 +599,34 @@ static umf_result_t os_initialize(const void *params, void **provider) {
556599

557600
memset(os_provider, 0, sizeof(*os_provider));
558601

559-
int r = hwloc_topology_init(&os_provider->topo);
560-
if (r) {
561-
LOG_ERR("HWLOC topology init failed");
562-
ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
563-
goto err_free_os_provider;
564-
}
565-
566-
r = hwloc_topology_load(os_provider->topo);
567-
if (r) {
568-
os_store_last_native_error(UMF_OS_RESULT_ERROR_TOPO_DISCOVERY_FAILED,
569-
0);
570-
LOG_ERR("HWLOC topology discovery failed");
571-
ret = UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC;
572-
goto err_destroy_hwloc_topology;
573-
}
602+
//struct timespec ts_init_start, ts_init_end;
603+
//clock_gettime(CLOCK_MONOTONIC, &ts_init_start);
604+
605+
//int r = hwloc_topology_init(&os_provider->topo);
606+
//if (r) {
607+
// LOG_ERR("HWLOC topology init failed");
608+
// ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
609+
// goto err_free_os_provider;
610+
//}
611+
612+
//hwloc_topology_set_all_types_filter(os_provider->topo,
613+
// HWLOC_TYPE_FILTER_KEEP_NONE);
614+
//hwloc_topology_set_type_filter(os_provider->topo, HWLOC_OBJ_CORE,
615+
// HWLOC_TYPE_FILTER_KEEP_ALL);
616+
617+
//r = hwloc_topology_load(os_provider->topo);
618+
//if (r) {
619+
// os_store_last_native_error(UMF_OS_RESULT_ERROR_TOPO_DISCOVERY_FAILED,
620+
// 0);
621+
// LOG_ERR("HWLOC topology discovery failed");
622+
// ret = UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC;
623+
// goto err_destroy_hwloc_topology;
624+
//}
625+
626+
//clock_gettime(CLOCK_MONOTONIC, &ts_init_end);
627+
//LOG_FATAL("HWLOC topology initialized in %ld.%09ld seconds",
628+
// ts_init_end.tv_sec - ts_init_start.tv_sec,
629+
// ts_init_end.tv_nsec - ts_init_start.tv_nsec);
574630

575631
os_provider->fd_offset_map = critnib_new(NULL, NULL);
576632
if (!os_provider->fd_offset_map) {
@@ -620,8 +676,8 @@ static umf_result_t os_initialize(const void *params, void **provider) {
620676
err_destroy_critnib:
621677
critnib_delete(os_provider->fd_offset_map);
622678
err_destroy_hwloc_topology:
623-
hwloc_topology_destroy(os_provider->topo);
624-
err_free_os_provider:
679+
// hwloc_topology_destroy(os_provider->topo);
680+
//err_free_os_provider:
625681
umf_ba_global_free(os_provider);
626682
return ret;
627683
}
@@ -644,7 +700,7 @@ static umf_result_t os_finalize(void *provider) {
644700
if (os_provider->nodeset_str_buf) {
645701
umf_ba_global_free(os_provider->nodeset_str_buf);
646702
}
647-
hwloc_topology_destroy(os_provider->topo);
703+
//hwloc_topology_destroy(os_provider->topo);
648704
umf_ba_global_free(os_provider);
649705
return UMF_RESULT_SUCCESS;
650706
}
@@ -1007,10 +1063,47 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment,
10071063

10081064
do {
10091065
errno = 0;
1010-
ret = hwloc_set_area_membind(os_provider->topo, membind.addr,
1011-
membind.bind_size, membind.bitmap,
1012-
os_provider->numa_policy,
1013-
os_provider->numa_flags);
1066+
ret = 0;
1067+
1068+
/* Use mbind syscall directly instead of hwloc */
1069+
/* Requires: #include <numaif.h> */
1070+
unsigned long nodemask = 0;
1071+
int maxnode = 8 * sizeof(nodemask); /* up to 64 nodes */
1072+
/* Convert hwloc_bitmap_t to nodemask (assume bitmap fits in ulong) */
1073+
if (membind.bitmap) {
1074+
for (int i = 0; i < maxnode; ++i) {
1075+
if (hwloc_bitmap_isset(membind.bitmap, i)) {
1076+
nodemask |= (1UL << i);
1077+
}
1078+
}
1079+
}
1080+
1081+
int mode = MPOL_DEFAULT;
1082+
if (os_provider->mbind_mode == UMF_NUMA_MODE_INTERLEAVE &&
1083+
os_provider->dedicated == 0) {
1084+
mode = MPOL_INTERLEAVE;
1085+
} else if (os_provider->mbind_mode == UMF_NUMA_MODE_SPLIT) {
1086+
mode = MPOL_BIND;
1087+
} else if (os_provider->mbind_mode == UMF_NUMA_MODE_LOCAL) {
1088+
mode = MPOL_LOCAL;
1089+
nodemask = 0; // MPOL_LOCAL does not use nodemask
1090+
} else if (os_provider->mbind_mode == UMF_NUMA_MODE_PREFERRED) {
1091+
mode = MPOL_BIND;
1092+
} else if (os_provider->mbind_mode == UMF_NUMA_MODE_BIND ||
1093+
os_provider->dedicated) {
1094+
mode = MPOL_BIND;
1095+
}
1096+
1097+
unsigned long mbind_flags = 0;
1098+
if (os_provider->dedicated) {
1099+
mbind_flags |= MPOL_MF_STRICT;
1100+
}
1101+
1102+
ret = syscall(__NR_mbind, membind.addr, membind.bind_size, mode,
1103+
&nodemask, maxnode, mbind_flags);
1104+
1105+
// ret = mbind(addr, membind.bind_size, mode, &nodemask, maxnode,
1106+
// mbind_flags);
10141107

10151108
if (ret) {
10161109
os_store_last_native_error(UMF_OS_RESULT_ERROR_BIND_FAILED,

src/provider/provider_os_memory_internal.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,14 @@ typedef struct os_memory_provider_t {
5555
hwloc_bitmap_t *nodeset;
5656
unsigned nodeset_len;
5757
char *nodeset_str_buf;
58+
5859
hwloc_membind_policy_t numa_policy;
5960
int numa_flags; // combination of hwloc flags
6061

62+
//
63+
umf_numa_mode_t mbind_mode;
64+
int dedicated;
65+
6166
size_t part_size;
6267
uint64_t alloc_sum; // sum of all allocations - used for manual interleaving
6368

@@ -68,7 +73,8 @@ typedef struct os_memory_provider_t {
6873
unsigned partitions_len;
6974
size_t partitions_weight_sum;
7075

71-
hwloc_topology_t topo;
76+
//
77+
//hwloc_topology_t topo;
7278

7379
ctl_stats_t stats;
7480
} os_memory_provider_t;

src/utils/utils_common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,8 @@ size_t utils_max(size_t a, size_t b);
188188

189189
size_t utils_min(size_t a, size_t b);
190190

191+
int utils_get_complete_nodeset(size_t *ids, size_t size, size_t *);
192+
191193
#ifdef __cplusplus
192194
}
193195
#endif

src/utils/utils_linux_common.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*
88
*/
99

10+
#include <dirent.h>
1011
#include <errno.h>
1112
#include <fcntl.h>
1213
#include <stdbool.h>
@@ -240,3 +241,28 @@ int utils_create_anonymous_fd(void) {
240241

241242
return fd;
242243
}
244+
245+
#include <stdlib.h>
246+
#include <string.h>
247+
int utils_get_complete_nodeset(size_t *nodes, size_t nodes_size, size_t *num) {
248+
DIR *dir = opendir("/sys/devices/system/node/");
249+
if (!dir) {
250+
return -1;
251+
}
252+
253+
struct dirent *entry;
254+
while ((entry = readdir(dir)) != NULL) {
255+
if (strncmp(entry->d_name, "node", 4) == 0) {
256+
char *endptr;
257+
long node_id = strtol(entry->d_name + 4, &endptr, 10);
258+
if (*endptr == '\0' && node_id >= 0 &&
259+
(size_t)node_id < nodes_size) {
260+
nodes[*num] = (size_t)node_id;
261+
(*num)++;
262+
}
263+
}
264+
}
265+
266+
closedir(dir);
267+
return 0;
268+
}

0 commit comments

Comments
 (0)