Skip to content

Commit 210534a

Browse files
authored
Merge pull request #1850 from PDeveze/Patchs-on-mtl-portals4
Patchs on mtl portals4
2 parents 4bc5048 + 9cac32b commit 210534a

File tree

6 files changed

+74
-33
lines changed

6 files changed

+74
-33
lines changed

ompi/mca/mtl/portals4/mtl_portals4.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,11 @@ portals4_init_interface(void)
8686

8787
/* Create send and long message (read) portal table entries */
8888
ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
89+
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
90+
PTL_PT_FLOWCTRL |
91+
#endif
8992
PTL_PT_ONLY_USE_ONCE |
90-
PTL_PT_ONLY_TRUNCATE |
91-
PTL_PT_FLOWCTRL,
93+
PTL_PT_ONLY_TRUNCATE,
9294
ompi_mtl_portals4.recv_eq_h,
9395
REQ_RECV_TABLE_ID,
9496
&ompi_mtl_portals4.recv_idx);

ompi/mca/mtl/portals4/mtl_portals4.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,23 +42,23 @@ struct mca_mtl_portals4_module_t {
4242
mca_mtl_base_module_t base;
4343

4444
/* add_procs() can get called multiple times. this prevents multiple calls to portals4_init_interface(). */
45-
int need_init;
45+
int32_t need_init;
4646

4747
/* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */
48-
int use_logical;
48+
int32_t use_logical;
4949
/* Use flow control: 1 (true) : 0 (false) */
50-
int use_flowctl;
50+
int32_t use_flowctl;
5151

5252
/** Eager limit; messages greater than this use a rendezvous protocol */
53-
unsigned long long eager_limit;
53+
uint64_t eager_limit;
5454
/** Size of short message blocks */
55-
unsigned long long recv_short_size;
55+
uint64_t recv_short_size;
5656
/** Number of short message blocks which should be created during startup */
57-
int recv_short_num;
57+
uint32_t recv_short_num;
5858
/** Length of the send event queues */
59-
int send_queue_size;
59+
uint32_t send_queue_size;
6060
/** Length of the receive event queues */
61-
int recv_queue_size;
61+
uint32_t recv_queue_size;
6262
/** Protocol for long message transfer */
6363
enum { eager, rndv } protocol;
6464

ompi/mca/mtl/portals4/mtl_portals4_component.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ ompi_mtl_portals4_component_open(void)
224224
ompi_mtl_portals4.send_eq_h = PTL_INVALID_HANDLE;
225225
ompi_mtl_portals4.recv_eq_h = PTL_INVALID_HANDLE;
226226
ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE;
227-
227+
ompi_mtl_portals4.send_md_h = PTL_INVALID_HANDLE;
228228
ompi_mtl_portals4.long_overflow_me_h = PTL_INVALID_HANDLE;
229229
ompi_mtl_portals4.recv_idx = (ptl_pt_index_t) ~0UL;
230230
ompi_mtl_portals4.read_idx = (ptl_pt_index_t) ~0UL;
@@ -277,6 +277,7 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
277277
{
278278
int ret;
279279
ptl_process_t id;
280+
ptl_ni_limits_t actual_limits;
280281

281282
if (enable_mpi_threads && ompi_mpi_thread_multiple) {
282283
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
@@ -298,13 +299,13 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
298299
PTL_NI_LOGICAL | PTL_NI_MATCHING,
299300
PTL_PID_ANY,
300301
NULL,
301-
NULL,
302+
&actual_limits,
302303
&ompi_mtl_portals4.ni_h);
303304
else ret = PtlNIInit(PTL_IFACE_DEFAULT,
304305
PTL_NI_PHYSICAL | PTL_NI_MATCHING,
305306
PTL_PID_ANY,
306307
NULL,
307-
NULL,
308+
&actual_limits,
308309
&ompi_mtl_portals4.ni_h);
309310
if (PTL_OK != ret) {
310311
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
@@ -313,6 +314,25 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
313314
goto error;
314315
}
315316

317+
if (ompi_comm_rank(MPI_COMM_WORLD) == 0) {
318+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_entries=%d", actual_limits.max_entries);
319+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_unexpected_headers=%d", actual_limits.max_unexpected_headers);
320+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_mds=%d", actual_limits.max_mds);
321+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_eqs=%d", actual_limits.max_eqs);
322+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_cts=%d", actual_limits.max_cts);
323+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_pt_index=%d", actual_limits.max_pt_index);
324+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_iovecs=%d", actual_limits.max_iovecs);
325+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_list_size=%d", actual_limits.max_list_size);
326+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_triggered_ops=%d", actual_limits.max_triggered_ops);
327+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_msg_size=%ld", actual_limits.max_msg_size);
328+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_atomic_size=%ld", actual_limits.max_atomic_size);
329+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_fetch_atomic_size=%ld", actual_limits.max_fetch_atomic_size);
330+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_waw_ordered_size=%ld", actual_limits.max_waw_ordered_size);
331+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_war_ordered_size=%ld", actual_limits.max_war_ordered_size);
332+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_volatile_size=%ld", actual_limits.max_volatile_size);
333+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "features=%u", actual_limits.features);
334+
}
335+
316336
ret = PtlGetUid(ompi_mtl_portals4.ni_h, &ompi_mtl_portals4.uid);
317337
if (PTL_OK != ret) {
318338
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
@@ -344,6 +364,7 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
344364
"My nid,pid = %x,%x",
345365
id.phys.nid, id.phys.pid));
346366

367+
ompi_mtl_portals4.base.mtl_max_tag = MTL_PORTALS4_MAX_TAG;
347368
return &ompi_mtl_portals4.base;
348369

349370
error:

ompi/mca/mtl/portals4/mtl_portals4_flowctl.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -219,15 +219,16 @@ ompi_mtl_portals4_flowctl_init(void)
219219
int
220220
ompi_mtl_portals4_flowctl_fini(void)
221221
{
222-
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.flowctl_idx);
223-
PtlCTFree(ompi_mtl_portals4.flowctl.trigger_ct_h);
224222
PtlMEUnlink(ompi_mtl_portals4.flowctl.trigger_me_h);
225-
PtlCTFree(ompi_mtl_portals4.flowctl.alert_ct_h);
223+
PtlCTFree(ompi_mtl_portals4.flowctl.trigger_ct_h);
226224
PtlMEUnlink(ompi_mtl_portals4.flowctl.alert_me_h);
227-
PtlCTFree(ompi_mtl_portals4.flowctl.fanin_ct_h);
225+
PtlCTFree(ompi_mtl_portals4.flowctl.alert_ct_h);
228226
PtlMEUnlink(ompi_mtl_portals4.flowctl.fanin_me_h);
229-
PtlCTFree(ompi_mtl_portals4.flowctl.fanout_ct_h);
227+
PtlCTFree(ompi_mtl_portals4.flowctl.fanin_ct_h);
230228
PtlMEUnlink(ompi_mtl_portals4.flowctl.fanout_me_h);
229+
PtlCTFree(ompi_mtl_portals4.flowctl.fanout_ct_h);
230+
231+
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.flowctl_idx);
231232

232233
return OMPI_SUCCESS;
233234
}

ompi/mca/mtl/portals4/mtl_portals4_recv.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
9191
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
9292
"%s:%d: PTL_EVENT_PUT with ni_fail_type: %d",
9393
__FILE__, __LINE__, ev->ni_fail_type);
94+
ret = PTL_FAIL;
9495
goto callback_error;
9596
}
9697

@@ -118,7 +119,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
118119
to pull the second part of the message. */
119120
ret = read_msg((char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit,
120121
((msg_length > ptl_request->delivery_len) ?
121-
ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit,
122+
ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit,
122123
ev->initiator,
123124
ev->hdr_data,
124125
ompi_mtl_portals4.eager_limit,
@@ -159,6 +160,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
159160
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
160161
"%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d",
161162
__FILE__, __LINE__, ev->ni_fail_type);
163+
ret = PTL_FAIL;
162164
goto callback_error;
163165
}
164166

@@ -204,6 +206,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
204206
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
205207
"%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d",
206208
__FILE__, __LINE__, ev->ni_fail_type);
209+
ret = PTL_FAIL;
207210
goto callback_error;
208211
}
209212

@@ -285,7 +288,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
285288

286289
ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength,
287290
((msg_length > ptl_request->delivery_len) ?
288-
ptl_request->delivery_len : msg_length) - ev->mlength,
291+
ptl_request->delivery_len : msg_length) - ev->mlength,
289292
ev->initiator,
290293
ev->hdr_data,
291294
ev->mlength,

ompi/mca/mtl/portals4/mtl_portals4_recv_short.c

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ static int
3737
ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev,
3838
ompi_mtl_portals4_base_request_t* ptl_base_request)
3939
{
40+
int ret = OMPI_SUCCESS;
4041
ompi_mtl_portals4_recv_short_request_t *ptl_request =
4142
(ompi_mtl_portals4_recv_short_request_t*) ptl_base_request;
4243
ompi_mtl_portals4_recv_short_block_t *block = ptl_request->block;
@@ -58,10 +59,10 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev,
5859
opal_list_remove_item(&ompi_mtl_portals4.recv_short_blocks,
5960
&block->base);
6061
OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex);
61-
ompi_mtl_portals4_recv_short_block_free(block);
62+
ret = ompi_mtl_portals4_recv_short_block_free(block);
6263
} else {
6364
OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex);
64-
ompi_mtl_portals4_activate_block(block);
65+
ret = ompi_mtl_portals4_activate_block(block);
6566
}
6667
break;
6768

@@ -76,6 +77,7 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev,
7677
break;
7778

7879
case PTL_EVENT_AUTO_UNLINK:
80+
block->me_h = PTL_INVALID_HANDLE;
7981
OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex);
8082
switch (block->status) {
8183
case BLOCK_STATUS_ACTIVATED: /* Normal case */
@@ -89,12 +91,12 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev,
8991
opal_list_remove_item(&ompi_mtl_portals4.recv_short_blocks,
9092
&block->base);
9193
OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex);
92-
ompi_mtl_portals4_recv_short_block_free(block);
94+
ret = ompi_mtl_portals4_recv_short_block_free(block);
9395
} else {
9496
OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex);
9597
OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_framework.framework_output,
9698
"mtl:portals4 PTL_EVENT_AUTO_UNLINK received after PTL_EVENT_AUTO_FREE"));
97-
ompi_mtl_portals4_activate_block(block);
99+
ret = ompi_mtl_portals4_activate_block(block);
98100
}
99101
break;
100102

@@ -133,7 +135,7 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev,
133135
break;
134136
}
135137

136-
return OMPI_SUCCESS;
138+
return ret;
137139
}
138140

139141

@@ -193,6 +195,7 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block)
193195
me.uid = ompi_mtl_portals4.uid;
194196
me.options =
195197
PTL_ME_OP_PUT |
198+
PTL_ME_EVENT_COMM_DISABLE |
196199
PTL_ME_MANAGE_LOCAL |
197200
PTL_ME_MAY_ALIGN;
198201
if (ompi_mtl_portals4.use_logical) {
@@ -227,7 +230,8 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block)
227230
int
228231
ompi_mtl_portals4_recv_short_init(void)
229232
{
230-
int i;
233+
int ret = OMPI_SUCCESS;
234+
uint32_t i;
231235

232236
OBJ_CONSTRUCT(&ompi_mtl_portals4.short_block_mutex, opal_mutex_t);
233237
OBJ_CONSTRUCT(&(ompi_mtl_portals4.recv_short_blocks), opal_list_t);
@@ -241,46 +245,56 @@ ompi_mtl_portals4_recv_short_init(void)
241245
}
242246
opal_list_append(&ompi_mtl_portals4.recv_short_blocks,
243247
&block->base);
244-
ompi_mtl_portals4_activate_block(block);
248+
ret = ompi_mtl_portals4_activate_block(block);
245249
}
246250

247-
return OMPI_SUCCESS;
251+
return ret;
248252
}
249253

250254

251255
int
252256
ompi_mtl_portals4_recv_short_fini(void)
253257
{
254258
opal_list_item_t *item;
259+
int ret = OMPI_SUCCESS;
255260

256261
OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex);
257262
while (NULL != (item = opal_list_remove_first(&ompi_mtl_portals4.recv_short_blocks))) {
258263
ompi_mtl_portals4_recv_short_block_t *block =
259264
(ompi_mtl_portals4_recv_short_block_t*) item;
260-
ompi_mtl_portals4_recv_short_block_free(block);
265+
ret = ompi_mtl_portals4_recv_short_block_free(block);
266+
ompi_mtl_portals4.active_recv_short_blocks--;
261267
}
262268
OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex);
263269

264-
return OMPI_SUCCESS;
270+
return ret;
265271
}
266272

267273

268274
int
269275
ompi_mtl_portals4_recv_short_link(int count)
270276
{
277+
int ret = OMPI_SUCCESS;
271278
int active = ompi_mtl_portals4.active_recv_short_blocks;
272279
int i;
273280

274281
if (active < count) {
275282
for (i = 0 ; i < (count - active) ; ++i) {
276283
ompi_mtl_portals4_recv_short_block_t *block =
277-
ompi_mtl_portals4_recv_short_block_alloc(false);
284+
ompi_mtl_portals4_recv_short_block_alloc(true);
278285
if (NULL == block) {
279286
return OMPI_ERR_OUT_OF_RESOURCE;
280287
}
281-
ompi_mtl_portals4_activate_block(block);
288+
OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex);
289+
opal_list_append(&ompi_mtl_portals4.recv_short_blocks,
290+
&block->base);
291+
OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_framework.framework_output,
292+
"recv_short_link: total=%d active=%d",
293+
(int) opal_list_get_size(&ompi_mtl_portals4.recv_short_blocks), ompi_mtl_portals4.active_recv_short_blocks));
294+
OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex);
295+
ret = ompi_mtl_portals4_activate_block(block);
282296
}
283297
}
284298

285-
return OMPI_SUCCESS;
299+
return ret;
286300
}

0 commit comments

Comments
 (0)