Skip to content

Commit 61f96b3

Browse files
authored
Merge pull request #7283 from hjelmn/fix_issues_in_both_vader_and_opal_interval_tree_t_that_were_causing_issue_6524
Fix issues in both vader and opal interval tree t that were causing issue 6524
2 parents b3fe523 + f86f805 commit 61f96b3

File tree

2 files changed

+90
-34
lines changed

2 files changed

+90
-34
lines changed

opal/class/opal_interval_tree.c

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
1414
* reserved.
15+
* Copyright (c) 2020 Google, LLC. All rights reserved.
1516
* $COPYRIGHT$
1617
*
1718
* Additional copyrights may follow
@@ -39,7 +40,7 @@ static opal_interval_tree_node_t *opal_interval_tree_next (opal_interval_tree_t
3940
opal_interval_tree_node_t *node);
4041
static opal_interval_tree_node_t * opal_interval_tree_find_node(opal_interval_tree_t *tree,
4142
uint64_t low, uint64_t high,
42-
bool exact, void *data);
43+
void *data);
4344

4445
static opal_interval_tree_node_t *left_rotate (opal_interval_tree_t *tree, opal_interval_tree_node_t *x);
4546
static opal_interval_tree_node_t *right_rotate (opal_interval_tree_t *tree, opal_interval_tree_node_t *x);
@@ -355,31 +356,54 @@ int opal_interval_tree_insert (opal_interval_tree_t *tree, void *value, uint64_t
355356
return OPAL_SUCCESS;
356357
}
357358

359+
static int opal_interval_tree_compare_node(opal_interval_tree_node_t *node, uint64_t low, uint64_t high, void *data) {
360+
if ((data && node->low == low && node->high == high && node->data == data) ||
361+
(!data && node->low <= low && node->high >= high)) {
362+
return 0;
363+
}
364+
if (node->low > low) {
365+
return -1;
366+
}
367+
if (node->low < low) {
368+
return 1;
369+
}
370+
if (node->high < high) {
371+
return -1;
372+
}
373+
if (node->high > high) {
374+
return 1;
375+
}
376+
if (node->data > data) {
377+
return -1;
378+
}
379+
return 1;
380+
}
381+
358382
static opal_interval_tree_node_t *opal_interval_tree_find_interval(opal_interval_tree_t *tree, opal_interval_tree_node_t *node, uint64_t low,
359-
uint64_t high, bool exact, void *data)
383+
uint64_t high, void *data)
360384
{
361385
if (node == &tree->nill) {
362386
return NULL;
363387
}
364388

365-
if (((exact && node->low == low && node->high == high) || (!exact && node->low <= low && node->high >= high)) &&
366-
(!data || node->data == data)) {
389+
int check = opal_interval_tree_compare_node(node, low, high, data);
390+
if (0 == check) {
367391
return node;
368392
}
369393

370-
if (low <= node->low) {
371-
return opal_interval_tree_find_interval (tree, node->left, low, high, exact, data);
394+
if (-1 == check) {
395+
return opal_interval_tree_find_interval (tree, node->left, low, high, data);
372396
}
373397

374-
return opal_interval_tree_find_interval (tree, node->right, low, high, exact, data);
398+
return opal_interval_tree_find_interval (tree, node->right, low, high, data);
375399
}
376400

377401
/* Finds the node in the tree based on the key and returns a pointer
378402
* to the node. This is a bit a code duplication, but this has to be fast
379403
* so we go ahead with the duplication */
380-
static opal_interval_tree_node_t *opal_interval_tree_find_node(opal_interval_tree_t *tree, uint64_t low, uint64_t high, bool exact, void *data)
404+
static opal_interval_tree_node_t *opal_interval_tree_find_node(opal_interval_tree_t *tree, uint64_t low, uint64_t high, void *data)
381405
{
382-
return opal_interval_tree_find_interval (tree, tree->root.left, low, high, exact, data);
406+
return opal_interval_tree_find_interval (tree, tree->root.left, low, high, data);
383407
}
384408

385409
void *opal_interval_tree_find_overlapping (opal_interval_tree_t *tree, uint64_t low, uint64_t high)
@@ -388,7 +412,7 @@ void *opal_interval_tree_find_overlapping (opal_interval_tree_t *tree, uint64_t
388412
opal_interval_tree_node_t *node;
389413

390414
token = opal_interval_tree_reader_get_token (tree);
391-
node = opal_interval_tree_find_node (tree, low, high, true, NULL);
415+
node = opal_interval_tree_find_node (tree, low, high, NULL);
392416
opal_interval_tree_reader_return_token (tree, token);
393417

394418
return node ? node->data : NULL;
@@ -536,7 +560,7 @@ int opal_interval_tree_delete (opal_interval_tree_t *tree, uint64_t low, uint64_
536560
opal_interval_tree_node_t *node;
537561

538562
opal_interval_tree_write_lock (tree);
539-
node = opal_interval_tree_find_node (tree, low, high, true, data);
563+
node = opal_interval_tree_find_node (tree, low, high, data);
540564
if (NULL == node) {
541565
opal_interval_tree_write_unlock (tree);
542566
return OPAL_ERR_NOT_FOUND;
@@ -618,18 +642,23 @@ static void opal_interval_tree_insert_node (opal_interval_tree_t *tree, opal_int
618642
node->right = nill;
619643

620644
/* find the leaf where we will insert the node */
645+
int check = -1;
621646
while (n != nill) {
647+
check = opal_interval_tree_compare_node(n, node->low, node->high, node->data);
648+
/* node already exists */
649+
assert (0 != check);
650+
622651
if (n->max < node->high) {
623652
n->max = node->high;
624653
}
625654

626655
parent = n;
627-
n = ((node->low < n->low) ? n->left : n->right);
656+
n = (-1 == check) ? n->left : n->right;
628657
assert (nill == n || n->parent == parent);
629658
}
630659

631660
/* place it on either the left or the right */
632-
if ((node->low < parent->low)) {
661+
if (-1 == check) {
633662
parent->left = node;
634663
} else {
635664
parent->right = node;

opal/mca/btl/vader/btl_vader_xpmem.c

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* Copyright (c) 2014 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
8+
* Copyright (c) 2020 Google, LLC. All rights reserved.
89
* $COPYRIGHT$
910
*
1011
* Additional copyrights may follow
@@ -44,19 +45,34 @@ static int vader_check_reg (mca_rcache_base_registration_t *reg, void *ctx)
4445
{
4546
vader_check_reg_ctx_t *vader_ctx = (vader_check_reg_ctx_t *) ctx;
4647

47-
if ((intptr_t) reg->alloc_base != vader_ctx->ep->peer_smp_rank ||
48-
(reg->flags & MCA_RCACHE_FLAGS_PERSIST)) {
48+
if ((intptr_t) reg->alloc_base != vader_ctx->ep->peer_smp_rank) {
4949
/* ignore this registration */
5050
return OPAL_SUCCESS;
5151
}
5252

5353
vader_ctx->reg[0] = reg;
5454

5555
if (vader_ctx->bound <= (uintptr_t) reg->bound && vader_ctx->base >= (uintptr_t) reg->base) {
56-
opal_atomic_add (&reg->ref_count, 1);
56+
if (0 == opal_atomic_fetch_add_32 (&reg->ref_count, 1)) {
57+
/* registration is being deleted by a thread in vader_return_registration. the
58+
* VMA tree implementation will block in mca_rcache_delete until we finish
59+
* iterating over the VMA tree so it is safe to just ignore this registration
60+
* and continue. */
61+
vader_ctx->reg[0] = NULL;
62+
return OPAL_SUCCESS;
63+
}
5764
return 1;
5865
}
5966

67+
if (MCA_RCACHE_FLAGS_INVALID & opal_atomic_fetch_or_32(&reg->flags, MCA_RCACHE_FLAGS_INVALID)) {
68+
/* another thread has already marked this registration as invalid. ignore and continue. */
69+
vader_ctx->reg[0] = NULL;
70+
return OPAL_SUCCESS;
71+
}
72+
73+
/* let the caller know we found an overlapping registration that can be coalesced into
74+
* the requested interval. the caller will remove the last reference and delete the
75+
* registration. */
6076
return 2;
6177
}
6278

@@ -67,8 +83,12 @@ void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_
6783

6884
ref_count = opal_atomic_add_fetch_32 (&reg->ref_count, -1);
6985
if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) {
70-
mca_rcache_base_vma_delete (vma_module, reg);
71-
86+
#if OPAL_DEBUG
87+
int ret = mca_rcache_base_vma_delete (vma_module, reg);
88+
assert (OPAL_SUCCESS == ret);
89+
#else
90+
(void) mca_rcache_base_vma_delete (vma_module, reg);
91+
#endif
7292
opal_memchecker_base_mem_noaccess (reg->rcache_context, (uintptr_t)(reg->bound - reg->base));
7393
(void)xpmem_detach (reg->rcache_context);
7494
OBJ_RELEASE (reg);
@@ -100,16 +120,9 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
100120
/* several segments may match the base pointer */
101121
rc = mca_rcache_base_vma_iterate (vma_module, (void *) base, bound - base, true, vader_check_reg, &check_ctx);
102122
if (2 == rc) {
103-
/* remove this pointer from the rcache and decrement its reference count
104-
(so it is detached later) */
105-
mca_rcache_base_vma_delete (vma_module, reg);
106-
107-
/* start the new segment from the lower of the two bases */
108-
base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base;
109-
110-
/* remove the last reference to this registration */
111-
vader_return_registration (reg, ep);
112-
123+
bound = bound < (uintptr_t) reg->bound ? (uintptr_t) reg->bound : bound;
124+
base = base > (uintptr_t) reg->base ? (uintptr_t) reg->base : base;
125+
vader_return_registration(reg, ep);
113126
reg = NULL;
114127
}
115128

@@ -151,25 +164,39 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
151164
return reg;
152165
}
153166

167+
struct vader_cleanup_reg_ctx {
168+
mca_btl_vader_endpoint_t *ep;
169+
opal_list_t *registrations;
170+
};
171+
154172
static int mca_btl_vader_endpoint_xpmem_rcache_cleanup (mca_rcache_base_registration_t *reg, void *ctx)
155173
{
156-
mca_btl_vader_endpoint_t *ep = (mca_btl_vader_endpoint_t *) ctx;
157-
if ((intptr_t) reg->alloc_base == ep->peer_smp_rank) {
158-
/* otherwise dereg will fail on assert */
159-
reg->ref_count = 0;
160-
OBJ_RELEASE(reg);
174+
struct vader_cleanup_reg_ctx *cleanup_ctx = (struct vader_cleanup_reg_ctx *) ctx;
175+
if ((intptr_t) reg->alloc_base == cleanup_ctx->ep->peer_smp_rank) {
176+
opal_list_append(cleanup_ctx->registrations, &reg->super.super);
161177
}
162178

163179
return OPAL_SUCCESS;
164180
}
165181

166182
void mca_btl_vader_xpmem_cleanup_endpoint (struct mca_btl_base_endpoint_t *ep)
167183
{
184+
mca_rcache_base_registration_t *reg;
185+
opal_list_t registrations;
186+
struct vader_cleanup_reg_ctx cleanup_ctx = {.ep = ep, .registrations = &registrations};
187+
188+
OBJ_CONSTRUCT(&registrations, opal_list_t);
189+
168190
/* clean out the registration cache */
169191
(void) mca_rcache_base_vma_iterate (mca_btl_vader_component.vma_module,
170192
NULL, (size_t) -1, true,
171193
mca_btl_vader_endpoint_xpmem_rcache_cleanup,
172-
(void *) ep);
194+
(void *) &cleanup_ctx);
195+
while (NULL != (reg = (mca_rcache_base_registration_t *) opal_list_remove_first(&registrations))) {
196+
vader_return_registration (reg, ep);
197+
}
198+
OBJ_DESTRUCT(&registrations);
199+
173200
if (ep->segment_base) {
174201
xpmem_release (ep->segment_data.xpmem.apid);
175202
ep->segment_data.xpmem.apid = 0;

0 commit comments

Comments
 (0)