Skip to content

Commit 64d5539

Browse files
authored
Merge pull request #2383 from hjelmn/v2.x_btl_self
btl/self: rewrite to decrease memory usage
2 parents e439bda + 99e4b2e commit 64d5539

File tree

6 files changed

+236
-407
lines changed

6 files changed

+236
-407
lines changed

opal/mca/btl/self/btl_self.c

Lines changed: 130 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
1313
* Copyright (c) 2012-2013 Inria. All rights reserved.
14-
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
14+
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
1515
* reserved.
1616
* $COPYRIGHT$
1717
*
@@ -24,68 +24,55 @@
2424

2525
#include <string.h>
2626
#include <stdlib.h>
27-
#include <sys/types.h>
28-
#include <sys/stat.h>
29-
#include <fcntl.h>
30-
#include <errno.h>
3127

3228
#include "opal/class/opal_bitmap.h"
3329
#include "opal/datatype/opal_convertor.h"
34-
#include "opal/sys/atomic.h"
35-
#include "opal/mca/btl/btl.h"
36-
#include "opal/mca/mpool/base/base.h"
3730
#include "btl_self.h"
3831
#include "btl_self_frag.h"
3932
#include "opal/util/proc.h"
4033

41-
static int mca_btl_self_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
42-
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
43-
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
44-
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
45-
46-
static int mca_btl_self_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
47-
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
48-
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
49-
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
50-
51-
mca_btl_base_module_t mca_btl_self = {
52-
.btl_component = &mca_btl_self_component.super,
53-
.btl_add_procs = mca_btl_self_add_procs,
54-
.btl_del_procs = mca_btl_self_del_procs,
55-
.btl_finalize = mca_btl_self_finalize,
56-
.btl_alloc = mca_btl_self_alloc,
57-
.btl_free = mca_btl_self_free,
58-
.btl_prepare_src = mca_btl_self_prepare_src,
59-
.btl_send = mca_btl_self_send,
60-
.btl_put = mca_btl_self_put,
61-
.btl_get = mca_btl_self_get,
62-
.btl_dump = mca_btl_base_dump,
63-
.btl_ft_event = NULL
64-
};
65-
66-
67-
int mca_btl_self_add_procs( struct mca_btl_base_module_t* btl,
68-
size_t nprocs,
69-
struct opal_proc_t **procs,
70-
struct mca_btl_base_endpoint_t **peers,
71-
opal_bitmap_t* reachability )
34+
/**
35+
* PML->BTL notification of change in the process list.
36+
* PML->BTL Notification that a receive fragment has been matched.
37+
* Called for message that is send from process with the virtual
38+
* address of the shared memory segment being different than that of
39+
* the receiver.
40+
*
41+
* @param btl (IN)
42+
* @param proc (IN)
43+
* @param peer (OUT)
44+
* @return OPAL_SUCCESS or error status on failure.
45+
*
46+
*/
47+
static int mca_btl_self_add_procs (struct mca_btl_base_module_t *btl, size_t nprocs,
48+
struct opal_proc_t **procs,
49+
struct mca_btl_base_endpoint_t **peers,
50+
opal_bitmap_t* reachability)
7251
{
73-
int i;
74-
75-
for( i = 0; i < (int)nprocs; i++ ) {
52+
for (int i = 0; i < (int)nprocs; i++ ) {
7653
if( 0 == opal_compare_proc(procs[i]->proc_name, OPAL_PROC_MY_NAME) ) {
7754
opal_bitmap_set_bit( reachability, i );
55+
/* need to return something to keep the bml from ignoring us */
56+
peers[i] = (struct mca_btl_base_endpoint_t *) 1;
7857
break; /* there will always be only one ... */
7958
}
8059
}
60+
8161
return OPAL_SUCCESS;
8262
}
8363

84-
85-
int mca_btl_self_del_procs( struct mca_btl_base_module_t* btl,
86-
size_t nprocs,
87-
struct opal_proc_t **procs,
88-
struct mca_btl_base_endpoint_t **peers )
64+
/**
65+
* PML->BTL notification of change in the process list.
66+
*
67+
* @param btl (IN) BTL instance
68+
* @param proc (IN) Peer process
69+
* @param peer (IN) Peer addressing information.
70+
* @return Status indicating if cleanup was successful
71+
*
72+
*/
73+
static int mca_btl_self_del_procs (struct mca_btl_base_module_t *btl, size_t nprocs,
74+
struct opal_proc_t **procs,
75+
struct mca_btl_base_endpoint_t **peers)
8976
{
9077
return OPAL_SUCCESS;
9178
}
@@ -104,7 +91,7 @@ int mca_btl_self_del_procs( struct mca_btl_base_module_t* btl,
10491
*
10592
*/
10693

107-
int mca_btl_self_finalize(struct mca_btl_base_module_t* btl)
94+
static int mca_btl_self_finalize(struct mca_btl_base_module_t* btl)
10895
{
10996
return OPAL_SUCCESS;
11097
}
@@ -116,29 +103,29 @@ int mca_btl_self_finalize(struct mca_btl_base_module_t* btl)
116103
* @param btl (IN) BTL module
117104
* @param size (IN) Request segment size.
118105
*/
119-
mca_btl_base_descriptor_t* mca_btl_self_alloc(
120-
struct mca_btl_base_module_t* btl,
121-
struct mca_btl_base_endpoint_t* endpoint,
122-
uint8_t order,
123-
size_t size,
124-
uint32_t flags)
106+
static mca_btl_base_descriptor_t *mca_btl_self_alloc (struct mca_btl_base_module_t *btl,
107+
struct mca_btl_base_endpoint_t *endpoint,
108+
uint8_t order, size_t size, uint32_t flags)
125109
{
126-
mca_btl_self_frag_t* frag = NULL;
110+
mca_btl_self_frag_t *frag = NULL;
127111

128-
if(size <= mca_btl_self.btl_eager_limit) {
112+
if (size <= MCA_BTL_SELF_MAX_INLINE_SIZE) {
113+
MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag);
114+
} else if (size <= mca_btl_self.btl_eager_limit) {
129115
MCA_BTL_SELF_FRAG_ALLOC_EAGER(frag);
130116
} else if (size <= btl->btl_max_send_size) {
131117
MCA_BTL_SELF_FRAG_ALLOC_SEND(frag);
132118
}
119+
133120
if( OPAL_UNLIKELY(NULL == frag) ) {
134121
return NULL;
135122
}
136123

137-
frag->segment.seg_len = size;
138-
frag->base.des_flags = flags;
139-
frag->base.des_segments = &(frag->segment);
124+
frag->segments[0].seg_len = size;
140125
frag->base.des_segment_count = 1;
141-
return (mca_btl_base_descriptor_t*)frag;
126+
frag->base.des_flags = flags;
127+
128+
return &frag->base;
142129
}
143130

144131
/**
@@ -147,90 +134,57 @@ mca_btl_base_descriptor_t* mca_btl_self_alloc(
147134
* @param btl (IN) BTL module
148135
* @param segment (IN) Allocated segment.
149136
*/
150-
int mca_btl_self_free( struct mca_btl_base_module_t* btl,
151-
mca_btl_base_descriptor_t* des )
137+
static int mca_btl_self_free (struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des)
152138
{
153-
mca_btl_self_frag_t* frag = (mca_btl_self_frag_t*)des;
154-
155-
frag->base.des_segments = NULL;
156-
frag->base.des_segment_count = 0;
139+
MCA_BTL_SELF_FRAG_RETURN((mca_btl_self_frag_t *) des);
157140

158-
if(frag->size == mca_btl_self.btl_eager_limit) {
159-
MCA_BTL_SELF_FRAG_RETURN_EAGER(frag);
160-
} else if (frag->size == mca_btl_self.btl_max_send_size) {
161-
MCA_BTL_SELF_FRAG_RETURN_SEND(frag);
162-
} else {
163-
MCA_BTL_SELF_FRAG_RETURN_RDMA(frag);
164-
}
165141
return OPAL_SUCCESS;
166142
}
167143

168144

169145
/**
170-
* Prepare data for send/put
146+
* Prepare data for send
171147
*
172148
* @param btl (IN) BTL module
173149
*/
174-
struct mca_btl_base_descriptor_t*
175-
mca_btl_self_prepare_src( struct mca_btl_base_module_t* btl,
176-
struct mca_btl_base_endpoint_t* endpoint,
177-
struct opal_convertor_t* convertor,
178-
uint8_t order,
179-
size_t reserve,
180-
size_t* size,
181-
uint32_t flags )
150+
static struct mca_btl_base_descriptor_t *mca_btl_self_prepare_src (struct mca_btl_base_module_t* btl,
151+
struct mca_btl_base_endpoint_t *endpoint,
152+
struct opal_convertor_t *convertor,
153+
uint8_t order, size_t reserve,
154+
size_t *size, uint32_t flags)
182155
{
183-
mca_btl_self_frag_t* frag;
184-
struct iovec iov;
185-
uint32_t iov_count = 1;
186-
size_t max_data = *size;
187-
int rc;
156+
bool inline_send = !opal_convertor_need_buffers(convertor);
157+
size_t buffer_len = reserve + (inline_send ? 0 : *size);
158+
mca_btl_self_frag_t *frag;
188159

189-
/* non-contigous data */
190-
if( opal_convertor_need_buffers(convertor) ||
191-
max_data < mca_btl_self.btl_max_send_size ||
192-
reserve != 0 ) {
193-
194-
MCA_BTL_SELF_FRAG_ALLOC_SEND(frag);
195-
if(OPAL_UNLIKELY(NULL == frag)) {
196-
return NULL;
197-
}
160+
frag = (mca_btl_self_frag_t *) mca_btl_self_alloc (btl, endpoint, order, buffer_len, flags);
161+
if (OPAL_UNLIKELY(NULL == frag)) {
162+
return NULL;
163+
}
198164

199-
if(reserve + max_data > frag->size) {
200-
max_data = frag->size - reserve;
201-
}
202-
iov.iov_len = max_data;
203-
iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)(frag+1) + reserve);
165+
/* non-contigous data */
166+
if (OPAL_UNLIKELY(!inline_send)) {
167+
struct iovec iov = {.iov_len = *size, .iov_base = (IOVBASE_TYPE *) ((uintptr_t) frag->data + reserve)};
168+
size_t max_data = *size;
169+
uint32_t iov_count = 1;
170+
int rc;
204171

205-
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
172+
rc = opal_convertor_pack (convertor, &iov, &iov_count, &max_data);
206173
if(rc < 0) {
207-
MCA_BTL_SELF_FRAG_RETURN_SEND(frag);
174+
mca_btl_self_free (btl, &frag->base);
208175
return NULL;
209176
}
210-
frag->segment.seg_addr.pval = frag+1;
211-
frag->segment.seg_len = reserve + max_data;
177+
212178
*size = max_data;
213179
} else {
214-
MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag);
215-
if(OPAL_UNLIKELY(NULL == frag)) {
216-
return NULL;
217-
}
218-
iov.iov_len = max_data;
219-
iov.iov_base = NULL;
180+
void *data_ptr;
220181

221-
/* convertor should return offset into users buffer */
222-
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
223-
if(rc < 0) {
224-
MCA_BTL_SELF_FRAG_RETURN_RDMA(frag);
225-
return NULL;
226-
}
227-
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) iov.iov_base;
228-
frag->segment.seg_len = max_data;
229-
*size = max_data;
182+
opal_convertor_get_current_pointer (convertor, &data_ptr);
183+
184+
frag->segments[1].seg_addr.pval = data_ptr;
185+
frag->segments[1].seg_len = *size;
186+
frag->base.des_segment_count = 2;
230187
}
231-
frag->base.des_flags = flags;
232-
frag->base.des_segments = &frag->segment;
233-
frag->base.des_segment_count = 1;
234188

235189
return &frag->base;
236190
}
@@ -242,10 +196,10 @@ mca_btl_self_prepare_src( struct mca_btl_base_module_t* btl,
242196
* @param peer (IN) BTL peer addressing
243197
*/
244198

245-
int mca_btl_self_send( struct mca_btl_base_module_t* btl,
246-
struct mca_btl_base_endpoint_t* endpoint,
247-
struct mca_btl_base_descriptor_t* des,
248-
mca_btl_base_tag_t tag )
199+
static int mca_btl_self_send (struct mca_btl_base_module_t *btl,
200+
struct mca_btl_base_endpoint_t *endpoint,
201+
struct mca_btl_base_descriptor_t *des,
202+
mca_btl_base_tag_t tag)
249203
{
250204
mca_btl_active_message_callback_t* reg;
251205
int btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
@@ -264,6 +218,39 @@ int mca_btl_self_send( struct mca_btl_base_module_t* btl,
264218
return 1;
265219
}
266220

221+
static int mca_btl_self_sendi (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
222+
struct opal_convertor_t *convertor, void *header, size_t header_size,
223+
size_t payload_size, uint8_t order, uint32_t flags, mca_btl_base_tag_t tag,
224+
mca_btl_base_descriptor_t **descriptor)
225+
{
226+
mca_btl_base_descriptor_t *frag;
227+
228+
if (!payload_size || !opal_convertor_need_buffers(convertor)) {
229+
void *data_ptr = NULL;
230+
if (payload_size) {
231+
opal_convertor_get_current_pointer (convertor, &data_ptr);
232+
}
233+
234+
mca_btl_base_segment_t segments[2] = {{.seg_addr.pval = header, .seg_len = header_size},
235+
{.seg_addr.pval = data_ptr, .seg_len = payload_size}};
236+
mca_btl_base_descriptor_t des = {.des_segments = segments, .des_segment_count = payload_size ? 2 : 1,
237+
.des_flags = 0};
238+
239+
(void) mca_btl_self_send (btl, endpoint, &des, tag);
240+
return OPAL_SUCCESS;
241+
}
242+
243+
frag = mca_btl_self_prepare_src (btl, endpoint, convertor, order, header_size, &payload_size,
244+
flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
245+
if (NULL == frag) {
246+
*descriptor = NULL;
247+
return OPAL_ERR_OUT_OF_RESOURCE;
248+
}
249+
250+
memcpy (frag->des_segments[0].seg_addr.pval, header, header_size);
251+
(void) mca_btl_self_send (btl, endpoint, frag, tag);
252+
return OPAL_SUCCESS;
253+
}
267254

268255
static int mca_btl_self_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
269256
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
@@ -288,3 +275,19 @@ static int mca_btl_self_get (mca_btl_base_module_t *btl, struct mca_btl_base_end
288275

289276
return OPAL_SUCCESS;
290277
}
278+
279+
/* btl self module */
280+
mca_btl_base_module_t mca_btl_self = {
281+
.btl_component = &mca_btl_self_component.super,
282+
.btl_add_procs = mca_btl_self_add_procs,
283+
.btl_del_procs = mca_btl_self_del_procs,
284+
.btl_finalize = mca_btl_self_finalize,
285+
.btl_alloc = mca_btl_self_alloc,
286+
.btl_free = mca_btl_self_free,
287+
.btl_prepare_src = mca_btl_self_prepare_src,
288+
.btl_send = mca_btl_self_send,
289+
.btl_sendi = mca_btl_self_sendi,
290+
.btl_put = mca_btl_self_put,
291+
.btl_get = mca_btl_self_get,
292+
.btl_dump = mca_btl_base_dump,
293+
};

0 commit comments

Comments
 (0)