Skip to content

Commit b90da99

Browse files
authored
Merge pull request #1895 from PDeveze/Patchs-on-btl-portals4
btl/portals4: Take into account the limitation of portals4 (max_msg_s…
2 parents 3be6052 + 6d6ec66 commit b90da99

File tree

4 files changed

+95
-58
lines changed

4 files changed

+95
-58
lines changed

opal/mca/btl/portals4/btl_portals4.c

Lines changed: 57 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,6 @@ btl_portals4_init_interface(void)
9999

100100
/* Create recv_idx portal table entry */
101101
ret = PtlPTAlloc(portals4_btl->portals_ni_h,
102-
PTL_PT_ONLY_USE_ONCE |
103102
PTL_PT_ONLY_TRUNCATE,
104103
portals4_btl->recv_eq_h,
105104
REQ_BTL_TABLE_ID,
@@ -429,7 +428,7 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
429428
opal_bitmap_set_bit(reachable, i);
430429

431430
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
432-
"add_procs: rank=%x nid=%x pid=%x for NI %d\n",
431+
"add_procs: rank=%lx nid=%x pid=%x for NI %d",
433432
i,
434433
btl_peer_data[i]->ptl_proc.phys.nid,
435434
btl_peer_data[i]->ptl_proc.phys.pid,
@@ -591,7 +590,7 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
591590
ret = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
592591

593592
*size = max_data;
594-
if ( ret < 0 ) {
593+
if (ret < 0) {
595594
mca_btl_portals4_free(btl_base, (mca_btl_base_descriptor_t *) frag);
596595
return NULL;
597596
}
@@ -624,65 +623,75 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base,
624623
}
625624

626625
handle->key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1);
626+
handle->remote_offset = 0;
627627

628628
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
629-
"mca_btl_portals4_register_mem NI=%d base=%p size=%ld handle=%p key=%ld\n",
630-
portals4_btl->interface_num, base, size, (void *)handle, handle->key));
631-
632-
if (MCA_BTL_FLAGS_PUT == flags) {
633-
/* create a match entry */
634-
me.start = base;
635-
me.length = size;
636-
me.ct_handle = PTL_CT_NONE;
637-
me.min_free = 0;
638-
me.uid = PTL_UID_ANY;
639-
me.options = PTL_ME_OP_GET | PTL_ME_USE_ONCE |
640-
PTL_ME_EVENT_LINK_DISABLE |
641-
PTL_ME_EVENT_COMM_DISABLE |
642-
PTL_ME_EVENT_UNLINK_DISABLE;
643-
644-
if (mca_btl_portals4_component.use_logical) {
645-
me.match_id.rank = endpoint->ptl_proc.rank;
646-
} else {
647-
me.match_id.phys.nid = endpoint->ptl_proc.phys.nid;
648-
me.match_id.phys.pid = endpoint->ptl_proc.phys.pid;
649-
}
650-
me.match_bits = handle->key;
651-
me.ignore_bits = BTL_PORTALS4_PROTOCOL_MASK |
652-
BTL_PORTALS4_CONTEXT_MASK |
653-
BTL_PORTALS4_SOURCE_MASK;
654-
me.ignore_bits = 0;
629+
"mca_btl_portals4_register_mem NI=%d base=%p size=%ld handle=%p key=%ld flags=%d",
630+
portals4_btl->interface_num, base, size, (void *)handle, handle->key, flags));
631+
632+
/* create a match entry */
633+
me.start = base;
634+
me.length = size;
635+
me.ct_handle = PTL_CT_NONE;
636+
me.min_free = 0;
637+
me.uid = PTL_UID_ANY;
638+
me.options = PTL_ME_OP_GET |
639+
PTL_ME_EVENT_LINK_DISABLE |
640+
PTL_ME_EVENT_COMM_DISABLE |
641+
PTL_ME_EVENT_UNLINK_DISABLE;
642+
643+
if (mca_btl_portals4_component.use_logical) {
644+
me.match_id.rank = endpoint->ptl_proc.rank;
645+
} else {
646+
me.match_id.phys.nid = endpoint->ptl_proc.phys.nid;
647+
me.match_id.phys.pid = endpoint->ptl_proc.phys.pid;
648+
}
649+
me.match_bits = handle->key;
650+
me.ignore_bits = BTL_PORTALS4_PROTOCOL_MASK |
651+
BTL_PORTALS4_CONTEXT_MASK |
652+
BTL_PORTALS4_SOURCE_MASK;
653+
me.ignore_bits = 0;
655654

656-
ret = PtlMEAppend(portals4_btl->portals_ni_h,
657-
portals4_btl->recv_idx,
658-
&me,
659-
PTL_PRIORITY_LIST,
660-
handle,
661-
&(handle->me_h));
662-
if (PTL_OK != ret) {
663-
opal_output_verbose(1, opal_btl_base_framework.framework_output,
664-
"%s:%d: PtlMEAppend failed: %d\n",
665-
__FILE__, __LINE__, ret);
666-
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
667-
return NULL;
668-
}
669-
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
670-
"PtlMEAppend (mca_btl_portals4_register_mem) handle=%p, me_h=%d start=%p length=%ld rank=%x nid=%x pid=%x match_bits=%lx\n",
671-
(void *)handle, handle->me_h, me.start, me.length,
672-
me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits));
655+
ret = PtlMEAppend(portals4_btl->portals_ni_h,
656+
portals4_btl->recv_idx,
657+
&me,
658+
PTL_PRIORITY_LIST,
659+
handle,
660+
&(handle->me_h));
661+
if (PTL_OK != ret) {
662+
opal_output_verbose(1, opal_btl_base_framework.framework_output,
663+
"%s:%d: PtlMEAppend failed: %d\n",
664+
__FILE__, __LINE__, ret);
665+
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
666+
return NULL;
673667
}
668+
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
669+
"PtlMEAppend (mca_btl_portals4_register_mem) handle=%p, me_h=%d start=%p length=%ld rank=%x nid=%x pid=%x match_bits=%lx\n",
670+
(void *)handle, handle->me_h, me.start, me.length,
671+
me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits));
674672
return handle;
675673
}
676674

677675
int
678676
mca_btl_portals4_deregister_mem(mca_btl_base_module_t *btl_base,
679677
mca_btl_base_registration_handle_t *handle)
680678
{
679+
int ret;
681680
struct mca_btl_portals4_module_t *portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
682681

683682
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
684-
"mca_btl_portals4_deregister_mem NI=%d handle=%p key=%ld\n",
685-
portals4_btl->interface_num, (void *)handle, handle->key));
683+
"mca_btl_portals4_deregister_mem NI=%d handle=%p key=%ld me_h=%d\n",
684+
portals4_btl->interface_num, (void *)handle, handle->key, handle->me_h));
685+
686+
if (!PtlHandleIsEqual(handle->me_h, PTL_INVALID_HANDLE)) {
687+
ret = PtlMEUnlink(handle->me_h);
688+
if (PTL_OK != ret) {
689+
opal_output_verbose(1, opal_btl_base_framework.framework_output,
690+
"%s:%d: PtlMEUnlink failed: %d\n",__FILE__, __LINE__, ret);
691+
return OPAL_ERROR;
692+
}
693+
handle->me_h = PTL_INVALID_HANDLE;
694+
}
686695

687696
free(handle);
688697

opal/mca/btl/portals4/btl_portals4.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ struct mca_btl_portals4_component_t {
7979

8080
/** Event queue handles table used in PtlEQPoll */
8181
ptl_handle_eq_t *eqs_h;
82+
83+
/** Upper limit for message sizes */
84+
unsigned long portals_max_msg_size;
8285
};
8386

8487
typedef struct mca_btl_portals4_component_t mca_btl_portals4_component_t;
@@ -255,6 +258,8 @@ struct mca_btl_base_registration_handle_t {
255258
ptl_match_bits_t key;
256259
/** Portals4 me_h */
257260
ptl_handle_me_t me_h;
261+
/** Remote offset */
262+
ptl_size_t remote_offset;
258263
};
259264

260265
/*

opal/mca/btl/portals4/btl_portals4_component.c

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,18 @@ mca_btl_portals4_component_register(void)
201201
OPAL_INFO_LVL_5,
202202
MCA_BASE_VAR_SCOPE_READONLY,
203203
&(mca_btl_portals4_component.portals_recv_mds_size));
204+
205+
mca_btl_portals4_component.portals_max_msg_size = PTL_SIZE_MAX;
206+
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
207+
"max_msg_size",
208+
"Max size supported by portals4 (above that, a message is cut into messages less than that size)",
209+
MCA_BASE_VAR_TYPE_UNSIGNED_LONG,
210+
NULL,
211+
0,
212+
0,
213+
OPAL_INFO_LVL_5,
214+
MCA_BASE_VAR_SCOPE_READONLY,
215+
&(mca_btl_portals4_component.portals_max_msg_size));
204216
return OPAL_SUCCESS;
205217
}
206218

@@ -216,6 +228,8 @@ mca_btl_portals4_component_open(void)
216228
mca_btl_portals4_module.super.btl_eager_limit = 32 * 1024;
217229
mca_btl_portals4_module.super.btl_rndv_eager_limit = 32 * 1024;
218230
mca_btl_portals4_module.super.btl_max_send_size = 64 * 1024;
231+
if (mca_btl_portals4_module.super.btl_max_send_size > mca_btl_portals4_component.portals_max_msg_size)
232+
mca_btl_portals4_module.super.btl_max_send_size = mca_btl_portals4_component.portals_max_msg_size;
219233
mca_btl_portals4_module.super.btl_rdma_pipeline_send_length = 64 * 1024;
220234
mca_btl_portals4_module.super.btl_rdma_pipeline_frag_size = INT_MAX;
221235
mca_btl_portals4_module.super.btl_min_rdma_pipeline_size = 0;
@@ -227,6 +241,8 @@ mca_btl_portals4_component_open(void)
227241
mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
228242

229243
mca_btl_portals4_module.super.btl_get_limit = SIZE_MAX;
244+
if (mca_btl_portals4_module.super.btl_get_limit > mca_btl_portals4_component.portals_max_msg_size)
245+
mca_btl_portals4_module.super.btl_get_limit = mca_btl_portals4_component.portals_max_msg_size;
230246
mca_btl_portals4_module.super.btl_put_limit = 0; /* not implemented */
231247
mca_btl_portals4_module.super.btl_get_alignment = 0;
232248
mca_btl_portals4_module.super.btl_put_alignment = 0;
@@ -293,6 +309,7 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
293309
mca_btl_base_module_t **btls = NULL;
294310
unsigned int ret, interface;
295311
ptl_handle_ni_t *portals4_nis_h = NULL;
312+
ptl_ni_limits_t portals4_ni_limits ;
296313
ptl_process_t *ptl_process_ids = NULL;
297314

298315
opal_output_verbose(50, opal_btl_base_framework.framework_output, "mca_btl_portals4_component_init\n");
@@ -325,22 +342,30 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
325342
PTL_NI_LOGICAL | PTL_NI_MATCHING,
326343
PTL_PID_ANY, /* let library assign our pid */
327344
NULL, /* no desired limits */
328-
NULL, /* actual limits */
345+
&portals4_ni_limits, /* actual limits */
329346
&portals4_nis_h[*num_btls] /* our interface handle */
330347
);
331348
else ret = PtlNIInit((1 == mca_btl_portals4_component.max_btls) ? PTL_IFACE_DEFAULT : interface,
332349
PTL_NI_PHYSICAL | PTL_NI_MATCHING,
333350
PTL_PID_ANY, /* let library assign our pid */
334351
NULL, /* no desired limits */
335-
NULL, /* actual limits */
352+
&portals4_ni_limits, /* actual limits */
336353
&portals4_nis_h[*num_btls] /* our interface handle */
337354
);
338355
if (PTL_OK != ret) {
339356
opal_output_verbose(90, opal_btl_base_framework.framework_output,
340357
"%s:%d: PtlNIInit failed for NI %d: %d\n", __FILE__, __LINE__, interface, ret);
341358
}
342359
else {
343-
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlNIInit OK for NI %d\n", *num_btls));
360+
if (mca_btl_portals4_component.portals_max_msg_size > portals4_ni_limits.max_msg_size)
361+
mca_btl_portals4_component.portals_max_msg_size = portals4_ni_limits.max_msg_size;
362+
if (mca_btl_portals4_module.super.btl_max_send_size > portals4_ni_limits.max_msg_size)
363+
mca_btl_portals4_module.super.btl_max_send_size = portals4_ni_limits.max_msg_size;
364+
if (mca_btl_portals4_module.super.btl_get_limit > portals4_ni_limits.max_msg_size)
365+
mca_btl_portals4_module.super.btl_get_limit = portals4_ni_limits.max_msg_size;
366+
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlNIInit OK for NI %d max_msg_size=%ld",
367+
*num_btls, mca_btl_portals4_component.portals_max_msg_size));
368+
344369
(*num_btls)++;
345370
}
346371
}
@@ -698,7 +723,7 @@ mca_btl_portals4_component_progress(void)
698723
frag->peer_proc,
699724
portals4_btl->recv_idx,
700725
frag->match_bits, /* match bits */
701-
0,
726+
0, // Warning : should be ev.remote_offset but it is not defined,
702727
frag);
703728
if (OPAL_UNLIKELY(PTL_OK != ret)) {
704729
opal_output_verbose(1, opal_btl_base_framework.framework_output,

opal/mca/btl/portals4/btl_portals4_rdma.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
5050
{
5151
mca_btl_portals4_module_t *portals4_btl = (mca_btl_portals4_module_t *) btl_base;
5252
mca_btl_portals4_frag_t *frag = NULL;
53-
ptl_md_t md;
5453
int ret;
5554

5655
/* reserve space in the event queue for rdma operations immediately */
@@ -83,25 +82,24 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
8382
frag->length = size;
8483
frag->peer_proc = btl_peer->ptl_proc;
8584

86-
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
87-
md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));
85+
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet offset=%p length=%ld remote_offset=%p nid=%x pid=%x match_bits=%lx",
86+
local_address, size, (void*)local_handle->remote_offset, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));
8887

8988
ret = PtlGet(portals4_btl->send_md_h,
9089
(ptl_size_t) local_address,
9190
size,
9291
btl_peer->ptl_proc,
9392
portals4_btl->recv_idx,
9493
frag->match_bits, /* match bits */
95-
0,
94+
local_handle->remote_offset,
9695
frag);
9796
if (OPAL_UNLIKELY(PTL_OK != ret)) {
9897
opal_output_verbose(1, opal_btl_base_framework.framework_output,
9998
"%s:%d: PtlGet failed: %d",
10099
__FILE__, __LINE__, ret);
101100
return OPAL_ERROR;
102101
}
103-
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "SUCCESS: PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
104-
md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));
102+
local_handle->remote_offset += size;
105103

106104
return OPAL_SUCCESS;
107105
}

0 commit comments

Comments
 (0)