Skip to content

Commit 38d9b10

Browse files
committed
rcache/base: update VMA tree to use opal_interval_tree_t
This commit replaces the current VMA tree implementation with one that uses the new opal_interval_tree_t class. Since the VMA tree lock is no longer used this commit also updates rcache/grdma and btl/vader to take better care when searching for existing registrations. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 7163fc9 commit 38d9b10

File tree

9 files changed

+226
-788
lines changed

9 files changed

+226
-788
lines changed

opal/mca/btl/vader/btl_vader_xpmem.c

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
3+
* Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
44
* reserved.
55
* Copyright (c) 2014 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
@@ -33,7 +33,6 @@ int mca_btl_vader_xpmem_init (void)
3333
}
3434

3535
struct vader_check_reg_ctx_t {
36-
mca_rcache_base_vma_module_t *vma_module;
3736
mca_btl_base_endpoint_t *ep;
3837
mca_rcache_base_registration_t **reg;
3938
uintptr_t base;
@@ -58,13 +57,24 @@ static int vader_check_reg (mca_rcache_base_registration_t *reg, void *ctx)
5857
return 1;
5958
}
6059

61-
/* remove this pointer from the rcache and decrement its reference count
62-
(so it is detached later) */
63-
mca_rcache_base_vma_delete (vader_ctx->vma_module, reg);
64-
6560
return 2;
6661
}
6762

63+
void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep)
64+
{
65+
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
66+
int32_t ref_count;
67+
68+
ref_count = opal_atomic_add_fetch_32 (&reg->ref_count, -1);
69+
if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) {
70+
mca_rcache_base_vma_delete (vma_module, reg);
71+
72+
opal_memchecker_base_mem_noaccess (reg->rcache_context, (uintptr_t)(reg->bound - reg->base));
73+
(void)xpmem_detach (reg->rcache_context);
74+
OBJ_RELEASE (reg);
75+
}
76+
}
77+
6878
/* look up the remote pointer in the peer rcache and attach if
6979
* necessary */
7080
mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr,
@@ -73,7 +83,7 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
7383
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
7484
uint64_t attach_align = 1 << mca_btl_vader_component.log_attach_align;
7585
mca_rcache_base_registration_t *reg = NULL;
76-
vader_check_reg_ctx_t check_ctx = {.ep = ep, .reg = &reg, .vma_module = vma_module};
86+
vader_check_reg_ctx_t check_ctx = {.ep = ep, .reg = &reg};
7787
xpmem_addr_t xpmem_addr;
7888
uintptr_t base, bound;
7989
int rc;
@@ -88,16 +98,17 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
8898
check_ctx.bound = bound;
8999

90100
/* several segments may match the base pointer */
91-
rc = mca_rcache_base_vma_iterate (vma_module, (void *) base, bound - base, vader_check_reg, &check_ctx);
101+
rc = mca_rcache_base_vma_iterate (vma_module, (void *) base, bound - base, true, vader_check_reg, &check_ctx);
92102
if (2 == rc) {
103+
/* remove this pointer from the rcache and decrement its reference count
104+
(so it is detached later) */
105+
mca_rcache_base_vma_delete (vma_module, reg);
106+
93107
/* start the new segment from the lower of the two bases */
94108
base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base;
95109

96-
if (OPAL_LIKELY(0 == opal_atomic_add_fetch_32 (&reg->ref_count, -1))) {
97-
/* this pointer is not in use */
98-
(void) xpmem_detach (reg->rcache_context);
99-
OBJ_RELEASE(reg);
100-
}
110+
/* remove the last reference to this registration */
111+
vader_return_registration (reg, ep);
101112

102113
reg = NULL;
103114
}
@@ -127,7 +138,9 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
127138

128139
opal_memchecker_base_mem_defined (reg->rcache_context, bound - base);
129140

130-
mca_rcache_base_vma_insert (vma_module, reg, 0);
141+
if (!(flags & MCA_RCACHE_FLAGS_PERSIST)) {
142+
mca_rcache_base_vma_insert (vma_module, reg, 0);
143+
}
131144
}
132145
}
133146

@@ -138,30 +151,13 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
138151
return reg;
139152
}
140153

141-
void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep)
142-
{
143-
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
144-
int32_t ref_count;
145-
146-
ref_count = opal_atomic_add_fetch_32 (&reg->ref_count, -1);
147-
if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) {
148-
/* protect rcache access */
149-
mca_rcache_base_vma_delete (vma_module, reg);
150-
151-
opal_memchecker_base_mem_noaccess (reg->rcache_context, (uintptr_t)(reg->bound - reg->base));
152-
(void)xpmem_detach (reg->rcache_context);
153-
OBJ_RELEASE (reg);
154-
}
155-
}
156-
157154
static int mca_btl_vader_endpoint_xpmem_rcache_cleanup (mca_rcache_base_registration_t *reg, void *ctx)
158155
{
159156
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
160157
mca_btl_vader_endpoint_t *ep = (mca_btl_vader_endpoint_t *) ctx;
161158
if ((intptr_t) reg->alloc_base == ep->peer_smp_rank) {
162159
/* otherwise dereg will fail on assert */
163160
reg->ref_count = 0;
164-
(void) mca_rcache_base_vma_delete (vma_module, reg);
165161
OBJ_RELEASE(reg);
166162
}
167163

@@ -172,7 +168,7 @@ void mca_btl_vader_xpmem_cleanup_endpoint (struct mca_btl_base_endpoint_t *ep)
172168
{
173169
/* clean out the registration cache */
174170
(void) mca_rcache_base_vma_iterate (mca_btl_vader_component.vma_module,
175-
NULL, (size_t) -1,
171+
NULL, (size_t) -1, true,
176172
mca_btl_vader_endpoint_xpmem_rcache_cleanup,
177173
(void *) ep);
178174
if (ep->segment_base) {

opal/mca/rcache/base/rcache_base_frame.c

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
13-
* Copyright (c) 2012-2016 Los Alamos National Security, LLC.
13+
* Copyright (c) 2012-2018 Los Alamos National Security, LLC.
1414
* All rights reserved
1515
* Copyright (c) 2015-2016 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
@@ -73,11 +73,6 @@ OBJ_CLASS_INSTANCE(mca_rcache_base_registration_t, opal_free_list_item_t,
7373
* Global variables
7474
*/
7575
opal_list_t mca_rcache_base_modules = {{0}};
76-
opal_free_list_t mca_rcache_base_vma_tree_items = {{{0}}};
77-
bool mca_rcache_base_vma_tree_items_inited = false;
78-
unsigned int mca_rcache_base_vma_tree_items_min = TREE_ITEMS_MIN;
79-
int mca_rcache_base_vma_tree_items_max = TREE_ITEMS_MAX;
80-
unsigned int mca_rcache_base_vma_tree_items_inc = TREE_ITEMS_INC;
8176

8277
OBJ_CLASS_INSTANCE(mca_rcache_base_selected_module_t, opal_list_item_t, NULL, NULL);
8378

@@ -114,9 +109,6 @@ static int mca_rcache_base_close(void)
114109
(void) mca_base_framework_close (&opal_memory_base_framework);
115110
}
116111

117-
OBJ_DESTRUCT(&mca_rcache_base_vma_tree_items);
118-
mca_rcache_base_vma_tree_items_inited = false;
119-
120112
/* All done */
121113
/* Close all remaining available components */
122114
return mca_base_framework_components_close(&opal_rcache_base_framework, NULL);
@@ -133,37 +125,12 @@ static int mca_rcache_base_open(mca_base_open_flag_t flags)
133125

134126
OBJ_CONSTRUCT(&mca_rcache_base_modules, opal_list_t);
135127

136-
/* the free list is only initialized when a VMA tree is created */
137-
OBJ_CONSTRUCT(&mca_rcache_base_vma_tree_items, opal_free_list_t);
138-
139128
/* Open up all available components */
140129
return mca_base_framework_components_open(&opal_rcache_base_framework, flags);
141130
}
142131

143132
static int mca_rcache_base_register_mca_variables (mca_base_register_flag_t flags)
144133
{
145-
146-
mca_rcache_base_vma_tree_items_min = TREE_ITEMS_MIN;
147-
(void) mca_base_framework_var_register (&opal_rcache_base_framework, "vma_tree_items_min",
148-
"Minimum number of VMA tree items to allocate (default: "
149-
STRINGIFY(TREE_ITEMS_MIN) ")", MCA_BASE_VAR_TYPE_UNSIGNED_INT,
150-
NULL, MCA_BASE_VAR_BIND_NO_OBJECT, 0, OPAL_INFO_LVL_6,
151-
MCA_BASE_VAR_SCOPE_READONLY, &mca_rcache_base_vma_tree_items_min);
152-
153-
mca_rcache_base_vma_tree_items_max = TREE_ITEMS_MAX;
154-
(void) mca_base_framework_var_register (&opal_rcache_base_framework, "vma_tree_items_max",
155-
"Maximum number of VMA tree items to allocate (default: "
156-
STRINGIFY(TREE_ITEMS_MAX) ", -1: unlimited)", MCA_BASE_VAR_TYPE_INT,
157-
NULL, MCA_BASE_VAR_BIND_NO_OBJECT, 0, OPAL_INFO_LVL_6,
158-
MCA_BASE_VAR_SCOPE_READONLY, &mca_rcache_base_vma_tree_items_max);
159-
160-
mca_rcache_base_vma_tree_items_inc = TREE_ITEMS_INC;
161-
(void) mca_base_framework_var_register (&opal_rcache_base_framework, "vma_tree_items_inc",
162-
"Number of VMA tree items to allocate at a time (default: "
163-
STRINGIFY(TREE_ITEMS_INC) ")", MCA_BASE_VAR_TYPE_UNSIGNED_INT,
164-
NULL, MCA_BASE_VAR_BIND_NO_OBJECT, 0, OPAL_INFO_LVL_6,
165-
MCA_BASE_VAR_SCOPE_READONLY, &mca_rcache_base_vma_tree_items_inc);
166-
167134
return OPAL_SUCCESS;
168135
}
169136

opal/mca/rcache/base/rcache_base_vma.c

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
1515
* Copyright (c) 2009 IBM Corporation. All rights reserved.
1616
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
17-
* Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
17+
* Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
1818
* reserved.
1919
*
2020
* $COPYRIGHT$
@@ -53,14 +53,6 @@ OBJ_CLASS_INSTANCE(mca_rcache_base_vma_module_t, opal_object_t,
5353

5454
mca_rcache_base_vma_module_t *mca_rcache_base_vma_module_alloc (void)
5555
{
56-
if (!mca_rcache_base_vma_tree_items_inited) {
57-
opal_free_list_init (&mca_rcache_base_vma_tree_items, sizeof (mca_rcache_base_vma_item_t),
58-
8, OBJ_CLASS(mca_rcache_base_vma_item_t), 0, 8,
59-
mca_rcache_base_vma_tree_items_min, mca_rcache_base_vma_tree_items_max,
60-
mca_rcache_base_vma_tree_items_inc, NULL, 0, NULL, NULL, NULL);
61-
mca_rcache_base_vma_tree_items_inited = true;
62-
}
63-
6456
return OBJ_NEW(mca_rcache_base_vma_module_t);
6557
}
6658

@@ -154,15 +146,20 @@ int mca_rcache_base_vma_delete (mca_rcache_base_vma_module_t *vma_module,
154146
}
155147

156148
int mca_rcache_base_vma_iterate (mca_rcache_base_vma_module_t *vma_module,
157-
unsigned char *base, size_t size,
149+
unsigned char *base, size_t size, bool partial_ok,
158150
int (*callback_fn) (struct mca_rcache_base_registration_t *, void *),
159151
void *ctx)
160152
{
161-
return mca_rcache_base_vma_tree_iterate (vma_module, base, size, callback_fn, ctx);
153+
return mca_rcache_base_vma_tree_iterate (vma_module, base, size, partial_ok, callback_fn, ctx);
162154
}
163155

164156
void mca_rcache_base_vma_dump_range (mca_rcache_base_vma_module_t *vma_module,
165157
unsigned char *base, size_t size, char *msg)
166158
{
167159
mca_rcache_base_vma_tree_dump_range (vma_module, base, size, msg);
168160
}
161+
162+
size_t mca_rcache_base_vma_size (mca_rcache_base_vma_module_t *vma_module)
163+
{
164+
return mca_rcache_base_vma_tree_size (vma_module);
165+
}

opal/mca/rcache/base/rcache_base_vma.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
* Copyright (c) 2006 Voltaire. All rights reserved.
1515
* Copyright (c) 2009 IBM Corporation. All rights reserved.
16-
* Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
16+
* Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
1717
* reserved.
1818
*
1919
* $COPYRIGHT$
@@ -33,7 +33,7 @@
3333

3434
#include "opal_config.h"
3535
#include "opal/class/opal_list.h"
36-
#include "opal/class/opal_rb_tree.h"
36+
#include "opal/class/opal_interval_tree.h"
3737
#include "opal/class/opal_lifo.h"
3838

3939
BEGIN_C_DECLS
@@ -42,7 +42,7 @@ struct mca_rcache_base_registration_t;
4242

4343
struct mca_rcache_base_vma_module_t {
4444
opal_object_t super;
45-
opal_rb_tree_t rb_tree;
45+
opal_interval_tree_t tree;
4646
opal_list_t vma_list;
4747
opal_lifo_t vma_gc_lifo;
4848
size_t reg_cur_cache_size;
@@ -77,6 +77,7 @@ void mca_rcache_base_vma_dump_range (mca_rcache_base_vma_module_t *vma_module,
7777
* @param[in] vma_module vma tree
7878
* @param[in] base base address of region
7979
* @param[in] size size of region
80+
* @param[in] partial_ok partial overlap of range is ok
8081
* @param[in] callback_fn function to call for each matching registration handle
8182
* @param[in] ctx callback context
8283
*
@@ -87,10 +88,12 @@ void mca_rcache_base_vma_dump_range (mca_rcache_base_vma_module_t *vma_module,
8788
* other than OPAL_SUCCESS.
8889
*/
8990
int mca_rcache_base_vma_iterate (mca_rcache_base_vma_module_t *vma_module,
90-
unsigned char *base, size_t size,
91+
unsigned char *base, size_t size, bool partial_ok,
9192
int (*callback_fn) (struct mca_rcache_base_registration_t *, void *),
9293
void *ctx);
9394

95+
size_t mca_rcache_base_vma_size (mca_rcache_base_vma_module_t *vma_module);
96+
9497
END_C_DECLS
9598

9699
#endif /* MCA_RCACHE_BASE_VMA_H */

0 commit comments

Comments
 (0)