Skip to content

Commit 728d51f

Browse files
committed
btl/vader: modify how the max attachment address is determined
This PR removes the constant defining the max attachment address and replaces it with the largest address that shows up in /proc/self/maps. This should address issues found on AARCH64 where the max address may differ based on the configuration. Since the calculated max address may differ between processes the max address is sent as part of the modex and stored in the endpoint data. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent f86f805 commit 728d51f

File tree

6 files changed

+52
-11
lines changed

6 files changed

+52
-11
lines changed

opal/mca/btl/vader/btl_vader.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
1818
* Copyright (c) 2018 Triad National Security, LLC. All rights
1919
* reserved.
20+
* Copyright (c) 2020 Google, LLC. All rights reserved.
2021
*
2122
* $COPYRIGHT$
2223
*
@@ -82,6 +83,7 @@ union vader_modex_t {
8283
struct vader_modex_xpmem_t {
8384
xpmem_segid_t seg_id;
8485
void *segment_base;
86+
uintptr_t address_max;
8587
} xpmem;
8688
#endif
8789
struct vader_modex_other_t {
@@ -113,6 +115,7 @@ struct mca_btl_vader_component_t {
113115
int vader_free_list_inc; /**< number of elements to alloc when growing free lists */
114116
#if OPAL_BTL_VADER_HAVE_XPMEM
115117
xpmem_segid_t my_seg_id; /**< this rank's xpmem segment id */
118+
uintptr_t my_address_max; /**< largest address */
116119
mca_rcache_base_vma_module_t *vma_module; /**< registration cache for xpmem segments */
117120
#endif
118121
opal_shmem_ds_t seg_ds; /**< this rank's shared memory segment (when not using xpmem) */

opal/mca/btl/vader/btl_vader_component.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
2222
* Copyright (c) 2018 Triad National Security, LLC. All rights
2323
* reserved.
24-
* Copyright (c) 2019 Google, Inc. All rights reserved.
24+
* Copyright (c) 2019-2020 Google, Inc. All rights reserved.
2525
* $COPYRIGHT$
2626
*
2727
* Additional copyrights may follow
@@ -383,6 +383,7 @@ static int mca_btl_base_vader_modex_send (void)
383383
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
384384
modex.xpmem.seg_id = mca_btl_vader_component.my_seg_id;
385385
modex.xpmem.segment_base = mca_btl_vader_component.my_segment;
386+
modex.xpmem.address_max = mca_btl_vader_component.my_address_max;
386387

387388
modex_size = sizeof (modex.xpmem);
388389
} else {

opal/mca/btl/vader/btl_vader_endpoint.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ typedef struct mca_btl_base_endpoint_t {
7878
#if OPAL_BTL_VADER_HAVE_XPMEM
7979
struct {
8080
xpmem_apid_t apid; /**< xpmem apid for remote peer */
81+
uintptr_t address_max; /**< largest address that can be attached */
8182
} xpmem;
8283
#endif
8384
struct {

opal/mca/btl/vader/btl_vader_module.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
* and Technology (RIST). All rights reserved.
2020
* Copyright (c) 2018-2019 Triad National Security, LLC. All rights
2121
* reserved.
22+
* Copyright (c) 2020 Google, LLC. All rights reserved.
2223
* $COPYRIGHT$
2324
*
2425
* Additional copyrights may follow
@@ -216,6 +217,7 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_
216217
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
217218
/* always use xpmem if it is available */
218219
ep->segment_data.xpmem.apid = xpmem_get (modex->xpmem.seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666);
220+
ep->segment_data.xpmem.address_max = modex->xpmem.address_max;
219221
(void) vader_get_registation (ep, modex->xpmem.segment_base, mca_btl_vader_component.segment_size,
220222
MCA_RCACHE_FLAGS_PERSIST, (void **) &ep->segment_base);
221223
} else {

opal/mca/btl/vader/btl_vader_xpmem.c

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,47 @@
2222

2323
int mca_btl_vader_xpmem_init (void)
2424
{
25-
mca_btl_vader_component.my_seg_id = xpmem_make (0, VADER_MAX_ADDRESS, XPMEM_PERMIT_MODE, (void *)0666);
25+
/* Any attachment that goes past the Linux TASK_SIZE will always fail. To prevent this we need to
26+
* determine the value of TASK_SIZE. On x86_64 the value was hard-coded in vader to be
27+
* 0x7ffffffffffful but this approach does not work with AARCH64 (and possibly other architectures).
28+
* Since there is really no way to directly determine the value we can (in all cases?) look through
29+
* the mapping for this process to determine what the largest address is. This should be the top
30+
* of the stack. No heap allocations should be larger than this value. Since the largest address
31+
* may differ between processes the value must be shared as part of the modex and stored in the
32+
* endpoint. */
33+
FILE *fh = fopen("/proc/self/maps", "r");
34+
if (NULL == fh) {
35+
BTL_ERROR(("could not open /proc/self/maps for reading. disabling XPMEM"));
36+
return OPAL_ERR_NOT_AVAILABLE;
37+
}
38+
39+
char buffer[1024];
40+
uintptr_t address_max = 0;
41+
while (fgets(buffer, sizeof(buffer), fh)) {
42+
uintptr_t low, high;
43+
char *tmp;
44+
/* each line of /proc/self/maps starts with low-high in hexidecimal (without a 0x) */
45+
low = strtoul(buffer, &tmp, 16);
46+
high = strtoul(tmp+1, NULL, 16);
47+
if (address_max < high) {
48+
address_max = high;
49+
}
50+
}
51+
52+
fclose (fh);
53+
54+
if (0 == address_max) {
55+
BTL_ERROR(("could not determine the address max"));
56+
return OPAL_ERR_NOT_AVAILABLE;
57+
}
58+
59+
/* save the calcuated maximum */
60+
mca_btl_vader_component.my_address_max = address_max - 1;
61+
62+
/* it is safe to use XPMEM_MAXADDR_SIZE here (which is always (size_t)-1 even though
63+
* it is not safe for attach */
64+
mca_btl_vader_component.my_seg_id = xpmem_make (0, XPMEM_MAXADDR_SIZE, XPMEM_PERMIT_MODE,
65+
(void *)0666);
2666
if (-1 == mca_btl_vader_component.my_seg_id) {
2767
return OPAL_ERR_NOT_AVAILABLE;
2868
}
@@ -110,8 +150,8 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
110150

111151
base = OPAL_DOWN_ALIGN((uintptr_t) rem_ptr, attach_align, uintptr_t);
112152
bound = OPAL_ALIGN((uintptr_t) rem_ptr + size - 1, attach_align, uintptr_t) + 1;
113-
if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) {
114-
bound = VADER_MAX_ADDRESS;
153+
if (OPAL_UNLIKELY(bound > ep->segment_data.xpmem.address_max)) {
154+
bound = ep->segment_data.xpmem.address_max;
115155
}
116156

117157
check_ctx.base = base;

opal/mca/btl/vader/btl_vader_xpmem.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
44
* reserved.
55
* Copyright (c) 2016 ARM, Inc. All rights reserved.
6+
* Copyright (c) 2020 Google, LLC. All rights reserved.
67
* $COPYRIGHT$
78
*
89
* Additional copyrights may follow
@@ -32,13 +33,6 @@
3233
/* look up the remote pointer in the peer rcache and attach if
3334
* necessary */
3435

35-
/* largest address we can attach to using xpmem */
36-
#if defined(__x86_64__)
37-
#define VADER_MAX_ADDRESS ((uintptr_t)0x7ffffffff000ul)
38-
#else
39-
#define VADER_MAX_ADDRESS XPMEM_MAXADDR_SIZE
40-
#endif
41-
4236
struct mca_btl_base_endpoint_t;
4337

4438
int mca_btl_vader_xpmem_init (void);

0 commit comments

Comments
 (0)